[
  {
    "path": ".gitignore",
    "content": "*.pyc\n*.swp\n*.so\n*.jpg\n*.html\n\\#*\\#\n.\\#*\n*~\n*.h5\nsrc/expt_outputs/*\nsrc/data/*\n\n# Custom stuff\nwebpages/002_VisAtt2/hmdb_frames\nwebpages/002_VisAtt2/linAtt\nwebpages/002_VisAtt2/poseAtt\n"
  },
  {
    "path": "LICENSE",
    "content": "Copyright (c) 2017 Rohit Girdhar and Deva Ramanan.\nAll rights reserved.\n\nThis code is copyrighted by the authors and Carnegie Mellon University,\nand is for non-commercial research purposes only. Please contact the authors and\nCarnegie Mellon University if you are interested in licensing for\ncommercial purposes.\n\n                                  Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"{}\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright {yyyy} {name of copyright owner}\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "# Attentional Pooling for Action Recognition\n\n[[project page](https://rohitgirdhar.github.io/AttentionalPoolingAction/)] [[paper](https://arxiv.org/abs/1711.01467)]\n\nIf this code helps with your work/research, please consider citing\n\nRohit Girdhar and Deva Ramanan. **Attentional Pooling for Action Recognition**. Advances in Neural Information Processing Systems (NIPS), 2017.\n\n```txt\n@inproceedings{Girdhar_17b_AttentionalPoolingAction,\n    title = {Attentional Pooling for Action Recognition},\n    author = {Girdhar, Rohit and Ramanan, Deva},\n    booktitle = {NIPS},\n    year = 2017\n}\n```\n\n## Pre-requisites\n\nThis code was trained and tested with\n\n1. CentOS 6.5\n2. Python 2.7\n3. TensorFlow 1.1.0-rc2 ([6a1825e2](https://github.com/tensorflow/tensorflow/tree/6a1825e2369d2537e15dc585705c53c4b763f3f6))\n\n## Getting started\n\nClone the code and create some directories for outputs\n\n```bash\n$ git clone --recursive https://github.com/rohitgirdhar/AttentionalPoolingAction.git\n$ export ROOT=`pwd`/AttentionalPoolingAction\n$ cd $ROOT/src/\n$ mkdir -p expt_outputs data\n$ # compile some custom ops\n$ cd custom_ops; make; cd ..\n```\n\n## Data setup\n\nYou can download the `tfrecord` files for MPII I used from\n[here](https://cmu.box.com/shared/static/xb7esevyl6uzmra2eehnkbt2ud7awld9.tar)\nand uncompress on to a fast local disk.\nIf you want to create your own tfrecords, you can use the following steps, which is\nwhat I used to create the linked tfrecord files\n\nConvert the MPII data into tfrecords. The system also can read from individual JPEG files,\nbut that needs a slightly different intial setup.\n\nFirst download the MPII [images](http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/mpii_human_pose_v1.tar.gz)\nand [annotations](http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/mpii_human_pose_v1_u12_2.zip),\nand un-compress the files.\n\n```bash\n$ cd $ROOT/utils/dataset_utils\n$ # Set the paths for MPII images and annotations file in gen_tfrecord_mpii.py\n$ python gen_tfrecord_mpii.py  # Will generate the tfrecord files\n```\n\n### Keypoint labels for other datasets\n\nWhile MPII dataset comes with pose labels, I also experiment with HMDB-51 and HICO, pose for which was computed using an initial version of [OpenPose](https://github.com/CMU-Perceptual-Computing-Lab/openpose). I provide the extracted keypoints here: [HMDB51](https://cmu.box.com/shared/static/gt8lhpafu7zwexf1wdwwmsufoktg94rg.tar) and [HICO](https://cmu.box.com/shared/static/42xizpt0w3almdgwczjxawvc1pvpesoa.tar).\n\n## Testing pre-trained models\n\nFirst download and unzip the\n[pretrained models](https://cmu.box.com/shared/static/s72scgtjj3lm60hsufi25rfjs2dk3a7i.zip)\nto a `$ROOT/src/pretrained_models/`.\nThe models can be run by\n\n```bash\n# Baseline model (no attention)\n$ python eval.py --cfg ../experiments/001_MPII_ResNet_pretrained.yaml\n# With attention\n$ python eval.py --cfg ../experiments/002_MPII_ResNet_withAttention_pretrained.yaml\n# With pose regularized attention\n$ python eval.py --cfg ../experiments/003_MPII_ResNet_withPoseAttention_pretrained.yaml\n```\n\n### Expected performance on MPII Validation set\n\n| Method  | mAP | Accuracy |\n|--------|-----|------|\n| Baseline (no attention) | 26.2 | 33.5 |\n| With attention | 30.3 | 37.2 |\n| With pose regularized attention | 30.6 | 37.8 |\n\n## Training\n\nTrain an attentional pooled model on MPII dataset, using `python train.py --cfg <path to YAML file>`.\n\n```bash\n$ cd $ROOT/src\n$ python train.py --cfg ../experiments/002_MPII_ResNet_withAttention.yaml\n# To train the model with pose regularized attention, use the following config\n$ python train.py --cfg ../experiments/003_MPII_ResNet_withPoseAttention.yaml\n# To train the baseline without attention, use the following config\n$ python train.py --cfg ../experiments/001_MPII_ResNet.yaml\n```\n\n## Testing and evaluation\n\nTest the model trained above on the validation set, using `python eval.py --cfg <path to YAML file>`.\n\n```bash\n$ python eval.py --cfg ../experiments/002_MPII_ResNet_withAttention.yaml\n# To evaluate the model with pose regularized attention\n$ python eval.py --cfg ../experiments/003_MPII_ResNet_withPoseAttention.yaml\n# To evaluate the model without attention\n$ python train.py --cfg ../experiments/001_MPII_ResNet.yaml\n```\n\nThe performance of these models should be similar to the above\nreleased pre-trained models.\n\n## Train + test on the final test set\n\nThis is for getting the final number on MPII test set.\n\n```bash\n# Train on the train + val set\n$ python train.py --cfg ../experiments/004_MPII_ResNet_withAttention_train+val.yaml\n# Test on the test set\n$ python eval.py --cfg ../experiments/004_MPII_ResNet_withAttention_train+val.yaml --save\n# Convert the output into the MAT files as expected by MPII authors (requires matlab/octave)\n$ cd ../utils;\n$ bash convert_mpii_result_for_eval.sh ../src/expt_outputs/004_MPII_ResNet_withAttention_train+val.yaml/<filename.h5>\n# Now the generated mat file can be emailed to MPII authors for test evaluation\n```\n"
  },
  {
    "path": "experiments/001_MPII_ResNet.yaml",
    "content": "GPUS: '0,1,2,3'\nNUM_READERS: 4\nNUM_PREPROCESSING_THREADS: 12\nMODEL_NAME: 'resnet_v1_101'\nTRAIN:\n  ITER_SIZE: 2\n  LEARNING_RATE: 0.001\n  BATCH_SIZE: 16\n  FINAL_POSE_HMAP_SIDE: 15\n  LEARNING_RATE_DECAY_RATE: 0.33\n  NUM_STEPS_PER_DECAY: 5000\n  MAX_NUMBER_OF_STEPS: 12000\n  LOSS_FN_ACTION: softmax-xentropy\n  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt\n  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits\n  LOSS_FN_ACTION: 'softmax-xentropy'\n  LOSS_FN_POSE: ''\nTEST:\n  EVAL_METRIC: mAP\n  BATCH_SIZE: 1\n"
  },
  {
    "path": "experiments/001_MPII_ResNet_pretrained.yaml",
    "content": "GPUS: '0,1,2,3'\nNUM_READERS: 4\nNUM_PREPROCESSING_THREADS: 12\nMODEL_NAME: 'resnet_v1_101'\nTRAIN:\n  ITER_SIZE: 2\n  LEARNING_RATE: 0.001\n  BATCH_SIZE: 16\n  FINAL_POSE_HMAP_SIDE: 15\n  LEARNING_RATE_DECAY_RATE: 0.33\n  NUM_STEPS_PER_DECAY: 5000\n  MAX_NUMBER_OF_STEPS: 12000\n  LOSS_FN_ACTION: softmax-xentropy\n  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt\n  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits\n  LOSS_FN_ACTION: 'softmax-xentropy'\n  LOSS_FN_POSE: ''\nTEST:\n  EVAL_METRIC: mAP\n  BATCH_SIZE: 1\n  CHECKPOINT_PATH: pretrained_models/mpii_baseline/model.ckpt-12000\n"
  },
  {
    "path": "experiments/002_MPII_ResNet_withAttention.yaml",
    "content": "GPUS: '0,1,2,3'\nNUM_READERS: 4\nNUM_PREPROCESSING_THREADS: 12\nMODEL_NAME: 'resnet_v1_101'\nNET:\n  USE_POSE_PRELOGITS_BASED_ATTENTION: True\n  USE_POSE_PRELOGITS_BASED_ATTENTION_SINGLE_LAYER_ATT: True\nTRAIN:\n  ITER_SIZE: 2\n  LEARNING_RATE: 0.001\n  BATCH_SIZE: 16\n  FINAL_POSE_HMAP_SIDE: 15\n  LEARNING_RATE_DECAY_RATE: 0.33\n  NUM_STEPS_PER_DECAY: 5000\n  MAX_NUMBER_OF_STEPS: 12000\n  LOSS_FN_ACTION: softmax-xentropy\n  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt\n  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits\n  LOSS_FN_ACTION: 'softmax-xentropy'\n  LOSS_FN_POSE: ''\nTEST:\n  EVAL_METRIC: mAP\n  BATCH_SIZE: 1\n"
  },
  {
    "path": "experiments/002_MPII_ResNet_withAttention_pretrained.yaml",
    "content": "GPUS: '0,1,2,3'\nNUM_READERS: 4\nNUM_PREPROCESSING_THREADS: 12\nMODEL_NAME: 'resnet_v1_101'\nNET:\n  USE_POSE_PRELOGITS_BASED_ATTENTION: True\n  USE_POSE_PRELOGITS_BASED_ATTENTION_SINGLE_LAYER_ATT: True\nTRAIN:\n  ITER_SIZE: 2\n  LEARNING_RATE: 0.001\n  BATCH_SIZE: 16\n  FINAL_POSE_HMAP_SIDE: 15\n  LEARNING_RATE_DECAY_RATE: 0.33\n  NUM_STEPS_PER_DECAY: 5000\n  MAX_NUMBER_OF_STEPS: 12000\n  LOSS_FN_ACTION: softmax-xentropy\n  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt\n  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits\n  LOSS_FN_ACTION: 'softmax-xentropy'\n  LOSS_FN_POSE: ''\nTEST:\n  EVAL_METRIC: mAP\n  BATCH_SIZE: 1\n  CHECKPOINT_PATH: pretrained_models/mpii_attention/model.ckpt-12000\n"
  },
  {
    "path": "experiments/003_MPII_ResNet_withPoseAttention.yaml",
    "content": "GPUS: '0,1,2,3'\nNUM_READERS: 4\nNUM_PREPROCESSING_THREADS: 12\nMODEL_NAME: 'resnet_v1_101'\nHEATMAP_MARKER_WD_RATIO: 0.05\nNET:\n  USE_POSE_PRELOGITS_BASED_ATTENTION: True\nTRAIN:\n  ITER_SIZE: 2\n  LEARNING_RATE: 0.001\n  BATCH_SIZE: 16\n  FINAL_POSE_HMAP_SIDE: 15\n  LEARNING_RATE_DECAY_RATE: 0.33\n  NUM_STEPS_PER_DECAY: 5000\n  MAX_NUMBER_OF_STEPS: 12000\n  LOSS_FN_ACTION: softmax-xentropy\n  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt\n  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits\n  LOSS_FN_ACTION: 'softmax-xentropy'\n  LOSS_FN_POSE: 'l2'\nTEST:\n  EVAL_METRIC: mAP\n  BATCH_SIZE: 1\n"
  },
  {
    "path": "experiments/003_MPII_ResNet_withPoseAttention_pretrained.yaml",
    "content": "GPUS: '0,1,2,3'\nNUM_READERS: 4\nNUM_PREPROCESSING_THREADS: 12\nMODEL_NAME: 'resnet_v1_101'\nHEATMAP_MARKER_WD_RATIO: 0.05\nNET:\n  USE_POSE_PRELOGITS_BASED_ATTENTION: True\nTRAIN:\n  ITER_SIZE: 2\n  LEARNING_RATE: 0.001\n  BATCH_SIZE: 16\n  FINAL_POSE_HMAP_SIDE: 15\n  LEARNING_RATE_DECAY_RATE: 0.33\n  NUM_STEPS_PER_DECAY: 5000\n  MAX_NUMBER_OF_STEPS: 12000\n  LOSS_FN_ACTION: softmax-xentropy\n  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt\n  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits\n  LOSS_FN_ACTION: 'softmax-xentropy'\n  LOSS_FN_POSE: 'l2'\nTEST:\n  EVAL_METRIC: mAP\n  BATCH_SIZE: 1\n  CHECKPOINT_PATH: pretrained_models/mpii_poseAttention/model.ckpt-12000\n"
  },
  {
    "path": "experiments/004_MPII_ResNet_withAttention_train+val.yaml",
    "content": "GPUS: '0,1,2,3'\nNUM_READERS: 4\nNUM_PREPROCESSING_THREADS: 12\nMODEL_NAME: 'resnet_v1_101'\nNET:\n  USE_POSE_PRELOGITS_BASED_ATTENTION: True\n  USE_POSE_PRELOGITS_BASED_ATTENTION_SINGLE_LAYER_ATT: True\nTRAIN:\n  ITER_SIZE: 2\n  LEARNING_RATE: 0.001\n  BATCH_SIZE: 16\n  FINAL_POSE_HMAP_SIDE: 15\n  LEARNING_RATE_DECAY_RATE: 0.33\n  NUM_STEPS_PER_DECAY: 5000\n  MAX_NUMBER_OF_STEPS: 12000\n  LOSS_FN_ACTION: softmax-xentropy\n  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt\n  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits\n  LOSS_FN_ACTION: 'softmax-xentropy'\n  LOSS_FN_POSE: ''\n  DATASET_SPLIT_NAME: 'trainval'\nTEST:\n  DATASET_SPLIT_NAME: 'test'\n  EVAL_METRIC: mAP\n  BATCH_SIZE: 1\n"
  },
  {
    "path": "models/.github/ISSUE_TEMPLATE.md",
    "content": "## Please let us know which model this issue is about (specify the top-level directory)\n"
  },
  {
    "path": "models/.gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nenv/\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\n*.egg-info/\n.installed.cfg\n*.egg\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*,cover\n.hypothesis/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# IPython Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# dotenv\n.env\n\n# virtualenv\nvenv/\nENV/\n\n# Spyder project settings\n.spyderproject\n\n# Rope project settings\n.ropeproject\n\n# editor\n*.swp\n"
  },
  {
    "path": "models/.gitmodules",
    "content": "[submodule \"tensorflow\"]\n\tpath = syntaxnet/tensorflow\n\turl = https://github.com/tensorflow/tensorflow.git\n"
  },
  {
    "path": "models/LICENSE",
    "content": "Copyright 2016 The TensorFlow Authors.  All rights reserved.\n\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright 2016, The Authors.\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "models/slim/__init__.py",
    "content": "\n"
  },
  {
    "path": "models/slim/datasets/__init__.py",
    "content": "\n"
  },
  {
    "path": "models/slim/datasets/cifar10.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Provides data for the Cifar10 dataset.\n\nThe dataset scripts used to create the dataset can be found at:\ntensorflow/models/slim/data/create_cifar10_dataset.py\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport tensorflow as tf\n\nfrom datasets import dataset_utils\n\nslim = tf.contrib.slim\n\n_FILE_PATTERN = 'cifar10_%s.tfrecord'\n\nSPLITS_TO_SIZES = {'train': 50000, 'test': 10000}\n\n_NUM_CLASSES = 10\n\n_ITEMS_TO_DESCRIPTIONS = {\n    'image': 'A [32 x 32 x 3] color image.',\n    'label': 'A single integer between 0 and 9',\n}\n\n\ndef get_split(split_name, dataset_dir, file_pattern=None, reader=None):\n  \"\"\"Gets a dataset tuple with instructions for reading cifar10.\n\n  Args:\n    split_name: A train/test split name.\n    dataset_dir: The base directory of the dataset sources.\n    file_pattern: The file pattern to use when matching the dataset sources.\n      It is assumed that the pattern contains a '%s' string so that the split\n      name can be inserted.\n    reader: The TensorFlow reader type.\n\n  Returns:\n    A `Dataset` namedtuple.\n\n  Raises:\n    ValueError: if `split_name` is not a valid train/test split.\n  \"\"\"\n  if split_name not in SPLITS_TO_SIZES:\n    raise ValueError('split name %s was not recognized.' % split_name)\n\n  if not file_pattern:\n    file_pattern = _FILE_PATTERN\n  file_pattern = os.path.join(dataset_dir, file_pattern % split_name)\n\n  # Allowing None in the signature so that dataset_factory can use the default.\n  if not reader:\n    reader = tf.TFRecordReader\n\n  keys_to_features = {\n      'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),\n      'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),\n      'image/class/label': tf.FixedLenFeature(\n          [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),\n  }\n\n  items_to_handlers = {\n      'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]),\n      'label': slim.tfexample_decoder.Tensor('image/class/label'),\n  }\n\n  decoder = slim.tfexample_decoder.TFExampleDecoder(\n      keys_to_features, items_to_handlers)\n\n  labels_to_names = None\n  if dataset_utils.has_labels(dataset_dir):\n    labels_to_names = dataset_utils.read_label_file(dataset_dir)\n\n  return slim.dataset.Dataset(\n      data_sources=file_pattern,\n      reader=reader,\n      decoder=decoder,\n      num_samples=SPLITS_TO_SIZES[split_name],\n      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,\n      num_classes=_NUM_CLASSES,\n      labels_to_names=labels_to_names)\n"
  },
  {
    "path": "models/slim/datasets/dataset_factory.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"A factory-pattern class which returns classification image/label pairs.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom datasets import cifar10\nfrom datasets import flowers\nfrom datasets import imagenet\nfrom datasets import mnist\n\ndatasets_map = {\n    'cifar10': cifar10,\n    'flowers': flowers,\n    'imagenet': imagenet,\n    'mnist': mnist,\n}\n\n\ndef get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None):\n  \"\"\"Given a dataset name and a split_name returns a Dataset.\n\n  Args:\n    name: String, the name of the dataset.\n    split_name: A train/test split name.\n    dataset_dir: The directory where the dataset files are stored.\n    file_pattern: The file pattern to use for matching the dataset source files.\n    reader: The subclass of tf.ReaderBase. If left as `None`, then the default\n      reader defined by each dataset is used.\n\n  Returns:\n    A `Dataset` class.\n\n  Raises:\n    ValueError: If the dataset `name` is unknown.\n  \"\"\"\n  if name not in datasets_map:\n    raise ValueError('Name of dataset unknown %s' % name)\n  return datasets_map[name].get_split(\n      split_name,\n      dataset_dir,\n      file_pattern,\n      reader)\n"
  },
  {
    "path": "models/slim/datasets/dataset_utils.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains utilities for downloading and converting datasets.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport sys\nimport tarfile\n\nfrom six.moves import urllib\nimport tensorflow as tf\n\nLABELS_FILENAME = 'labels.txt'\n\n\ndef int64_feature(values):\n  \"\"\"Returns a TF-Feature of int64s.\n\n  Args:\n    values: A scalar or list of values.\n\n  Returns:\n    a TF-Feature.\n  \"\"\"\n  if not isinstance(values, (tuple, list)):\n    values = [values]\n  return tf.train.Feature(int64_list=tf.train.Int64List(value=values))\n\n\ndef bytes_feature(values):\n  \"\"\"Returns a TF-Feature of bytes.\n\n  Args:\n    values: A string.\n\n  Returns:\n    a TF-Feature.\n  \"\"\"\n  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))\n\n\ndef image_to_tfexample(image_data, image_format, height, width, class_id):\n  return tf.train.Example(features=tf.train.Features(feature={\n      'image/encoded': bytes_feature(image_data),\n      'image/format': bytes_feature(image_format),\n      'image/class/label': int64_feature(class_id),\n      'image/height': int64_feature(height),\n      'image/width': int64_feature(width),\n  }))\n\n\ndef download_and_uncompress_tarball(tarball_url, dataset_dir):\n  \"\"\"Downloads the `tarball_url` and uncompresses it locally.\n\n  Args:\n    tarball_url: The URL of a tarball file.\n    dataset_dir: The directory where the temporary files are stored.\n  \"\"\"\n  filename = tarball_url.split('/')[-1]\n  filepath = os.path.join(dataset_dir, filename)\n\n  def _progress(count, block_size, total_size):\n    sys.stdout.write('\\r>> Downloading %s %.1f%%' % (\n        filename, float(count * block_size) / float(total_size) * 100.0))\n    sys.stdout.flush()\n  filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress)\n  print()\n  statinfo = os.stat(filepath)\n  print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')\n  tarfile.open(filepath, 'r:gz').extractall(dataset_dir)\n\n\ndef write_label_file(labels_to_class_names, dataset_dir,\n                     filename=LABELS_FILENAME):\n  \"\"\"Writes a file with the list of class names.\n\n  Args:\n    labels_to_class_names: A map of (integer) labels to class names.\n    dataset_dir: The directory in which the labels file should be written.\n    filename: The filename where the class names are written.\n  \"\"\"\n  labels_filename = os.path.join(dataset_dir, filename)\n  with tf.gfile.Open(labels_filename, 'w') as f:\n    for label in labels_to_class_names:\n      class_name = labels_to_class_names[label]\n      f.write('%d:%s\\n' % (label, class_name))\n\n\ndef has_labels(dataset_dir, filename=LABELS_FILENAME):\n  \"\"\"Specifies whether or not the dataset directory contains a label map file.\n\n  Args:\n    dataset_dir: The directory in which the labels file is found.\n    filename: The filename where the class names are written.\n\n  Returns:\n    `True` if the labels file exists and `False` otherwise.\n  \"\"\"\n  return tf.gfile.Exists(os.path.join(dataset_dir, filename))\n\n\ndef read_label_file(dataset_dir, filename=LABELS_FILENAME):\n  \"\"\"Reads the labels file and returns a mapping from ID to class name.\n\n  Args:\n    dataset_dir: The directory in which the labels file is found.\n    filename: The filename where the class names are written.\n\n  Returns:\n    A map from a label (integer) to class name.\n  \"\"\"\n  labels_filename = os.path.join(dataset_dir, filename)\n  with tf.gfile.Open(labels_filename, 'r') as f:\n    lines = f.read().decode()\n  lines = lines.split('\\n')\n  lines = filter(None, lines)\n\n  labels_to_class_names = {}\n  for line in lines:\n    index = line.index(':')\n    labels_to_class_names[int(line[:index])] = line[index+1:]\n  return labels_to_class_names\n"
  },
  {
    "path": "models/slim/datasets/download_and_convert_cifar10.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nr\"\"\"Downloads and converts cifar10 data to TFRecords of TF-Example protos.\n\nThis module downloads the cifar10 data, uncompresses it, reads the files\nthat make up the cifar10 data and creates two TFRecord datasets: one for train\nand one for test. Each TFRecord dataset is comprised of a set of TF-Example\nprotocol buffers, each of which contain a single image and label.\n\nThe script should take several minutes to run.\n\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport cPickle\nimport os\nimport sys\nimport tarfile\n\nimport numpy as np\nfrom six.moves import urllib\nimport tensorflow as tf\n\nfrom datasets import dataset_utils\n\n# The URL where the CIFAR data can be downloaded.\n_DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'\n\n# The number of training files.\n_NUM_TRAIN_FILES = 5\n\n# The height and width of each image.\n_IMAGE_SIZE = 32\n\n# The names of the classes.\n_CLASS_NAMES = [\n    'airplane',\n    'automobile',\n    'bird',\n    'cat',\n    'deer',\n    'dog',\n    'frog',\n    'horse',\n    'ship',\n    'truck',\n]\n\n\ndef _add_to_tfrecord(filename, tfrecord_writer, offset=0):\n  \"\"\"Loads data from the cifar10 pickle files and writes files to a TFRecord.\n\n  Args:\n    filename: The filename of the cifar10 pickle file.\n    tfrecord_writer: The TFRecord writer to use for writing.\n    offset: An offset into the absolute number of images previously written.\n\n  Returns:\n    The new offset.\n  \"\"\"\n  with tf.gfile.Open(filename, 'r') as f:\n    data = cPickle.load(f)\n\n  images = data['data']\n  num_images = images.shape[0]\n\n  images = images.reshape((num_images, 3, 32, 32))\n  labels = data['labels']\n\n  with tf.Graph().as_default():\n    image_placeholder = tf.placeholder(dtype=tf.uint8)\n    encoded_image = tf.image.encode_png(image_placeholder)\n\n    with tf.Session('') as sess:\n\n      for j in range(num_images):\n        sys.stdout.write('\\r>> Reading file [%s] image %d/%d' % (\n            filename, offset + j + 1, offset + num_images))\n        sys.stdout.flush()\n\n        image = np.squeeze(images[j]).transpose((1, 2, 0))\n        label = labels[j]\n\n        png_string = sess.run(encoded_image,\n                              feed_dict={image_placeholder: image})\n\n        example = dataset_utils.image_to_tfexample(\n            png_string, 'png', _IMAGE_SIZE, _IMAGE_SIZE, label)\n        tfrecord_writer.write(example.SerializeToString())\n\n  return offset + num_images\n\n\ndef _get_output_filename(dataset_dir, split_name):\n  \"\"\"Creates the output filename.\n\n  Args:\n    dataset_dir: The dataset directory where the dataset is stored.\n    split_name: The name of the train/test split.\n\n  Returns:\n    An absolute file path.\n  \"\"\"\n  return '%s/cifar10_%s.tfrecord' % (dataset_dir, split_name)\n\n\ndef _download_and_uncompress_dataset(dataset_dir):\n  \"\"\"Downloads cifar10 and uncompresses it locally.\n\n  Args:\n    dataset_dir: The directory where the temporary files are stored.\n  \"\"\"\n  filename = _DATA_URL.split('/')[-1]\n  filepath = os.path.join(dataset_dir, filename)\n\n  if not os.path.exists(filepath):\n    def _progress(count, block_size, total_size):\n      sys.stdout.write('\\r>> Downloading %s %.1f%%' % (\n          filename, float(count * block_size) / float(total_size) * 100.0))\n      sys.stdout.flush()\n    filepath, _ = urllib.request.urlretrieve(_DATA_URL, filepath, _progress)\n    print()\n    statinfo = os.stat(filepath)\n    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')\n    tarfile.open(filepath, 'r:gz').extractall(dataset_dir)\n\n\ndef _clean_up_temporary_files(dataset_dir):\n  \"\"\"Removes temporary files used to create the dataset.\n\n  Args:\n    dataset_dir: The directory where the temporary files are stored.\n  \"\"\"\n  filename = _DATA_URL.split('/')[-1]\n  filepath = os.path.join(dataset_dir, filename)\n  tf.gfile.Remove(filepath)\n\n  tmp_dir = os.path.join(dataset_dir, 'cifar-10-batches-py')\n  tf.gfile.DeleteRecursively(tmp_dir)\n\n\ndef run(dataset_dir):\n  \"\"\"Runs the download and conversion operation.\n\n  Args:\n    dataset_dir: The dataset directory where the dataset is stored.\n  \"\"\"\n  if not tf.gfile.Exists(dataset_dir):\n    tf.gfile.MakeDirs(dataset_dir)\n\n  training_filename = _get_output_filename(dataset_dir, 'train')\n  testing_filename = _get_output_filename(dataset_dir, 'test')\n\n  if tf.gfile.Exists(training_filename) and tf.gfile.Exists(testing_filename):\n    print('Dataset files already exist. Exiting without re-creating them.')\n    return\n\n  dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)\n\n  # First, process the training data:\n  with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer:\n    offset = 0\n    for i in range(_NUM_TRAIN_FILES):\n      filename = os.path.join(dataset_dir,\n                              'cifar-10-batches-py',\n                              'data_batch_%d' % (i + 1))  # 1-indexed.\n      offset = _add_to_tfrecord(filename, tfrecord_writer, offset)\n\n  # Next, process the testing data:\n  with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:\n    filename = os.path.join(dataset_dir,\n                            'cifar-10-batches-py',\n                            'test_batch')\n    _add_to_tfrecord(filename, tfrecord_writer)\n\n  # Finally, write the labels file:\n  labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))\n  dataset_utils.write_label_file(labels_to_class_names, dataset_dir)\n\n  _clean_up_temporary_files(dataset_dir)\n  print('\\nFinished converting the Cifar10 dataset!')\n"
  },
  {
    "path": "models/slim/datasets/download_and_convert_flowers.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nr\"\"\"Downloads and converts Flowers data to TFRecords of TF-Example protos.\n\nThis module downloads the Flowers data, uncompresses it, reads the files\nthat make up the Flowers data and creates two TFRecord datasets: one for train\nand one for test. Each TFRecord dataset is comprised of a set of TF-Example\nprotocol buffers, each of which contain a single image and label.\n\nThe script should take about a minute to run.\n\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport math\nimport os\nimport random\nimport sys\n\nimport tensorflow as tf\n\nfrom datasets import dataset_utils\n\n# The URL where the Flowers data can be downloaded.\n_DATA_URL = 'http://download.tensorflow.org/example_images/flower_photos.tgz'\n\n# The number of images in the validation set.\n_NUM_VALIDATION = 350\n\n# Seed for repeatability.\n_RANDOM_SEED = 0\n\n# The number of shards per dataset split.\n_NUM_SHARDS = 5\n\n\nclass ImageReader(object):\n  \"\"\"Helper class that provides TensorFlow image coding utilities.\"\"\"\n\n  def __init__(self):\n    # Initializes function that decodes RGB JPEG data.\n    self._decode_jpeg_data = tf.placeholder(dtype=tf.string)\n    self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)\n\n  def read_image_dims(self, sess, image_data):\n    image = self.decode_jpeg(sess, image_data)\n    return image.shape[0], image.shape[1]\n\n  def decode_jpeg(self, sess, image_data):\n    image = sess.run(self._decode_jpeg,\n                     feed_dict={self._decode_jpeg_data: image_data})\n    assert len(image.shape) == 3\n    assert image.shape[2] == 3\n    return image\n\n\ndef _get_filenames_and_classes(dataset_dir):\n  \"\"\"Returns a list of filenames and inferred class names.\n\n  Args:\n    dataset_dir: A directory containing a set of subdirectories representing\n      class names. Each subdirectory should contain PNG or JPG encoded images.\n\n  Returns:\n    A list of image file paths, relative to `dataset_dir` and the list of\n    subdirectories, representing class names.\n  \"\"\"\n  flower_root = os.path.join(dataset_dir, 'flower_photos')\n  directories = []\n  class_names = []\n  for filename in os.listdir(flower_root):\n    path = os.path.join(flower_root, filename)\n    if os.path.isdir(path):\n      directories.append(path)\n      class_names.append(filename)\n\n  photo_filenames = []\n  for directory in directories:\n    for filename in os.listdir(directory):\n      path = os.path.join(directory, filename)\n      photo_filenames.append(path)\n\n  return photo_filenames, sorted(class_names)\n\n\ndef _get_dataset_filename(dataset_dir, split_name, shard_id):\n  output_filename = 'flowers_%s_%05d-of-%05d.tfrecord' % (\n      split_name, shard_id, _NUM_SHARDS)\n  return os.path.join(dataset_dir, output_filename)\n\n\ndef _convert_dataset(split_name, filenames, class_names_to_ids, dataset_dir):\n  \"\"\"Converts the given filenames to a TFRecord dataset.\n\n  Args:\n    split_name: The name of the dataset, either 'train' or 'validation'.\n    filenames: A list of absolute paths to png or jpg images.\n    class_names_to_ids: A dictionary from class names (strings) to ids\n      (integers).\n    dataset_dir: The directory where the converted datasets are stored.\n  \"\"\"\n  assert split_name in ['train', 'validation']\n\n  num_per_shard = int(math.ceil(len(filenames) / float(_NUM_SHARDS)))\n\n  with tf.Graph().as_default():\n    image_reader = ImageReader()\n\n    with tf.Session('') as sess:\n\n      for shard_id in range(_NUM_SHARDS):\n        output_filename = _get_dataset_filename(\n            dataset_dir, split_name, shard_id)\n\n        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:\n          start_ndx = shard_id * num_per_shard\n          end_ndx = min((shard_id+1) * num_per_shard, len(filenames))\n          for i in range(start_ndx, end_ndx):\n            sys.stdout.write('\\r>> Converting image %d/%d shard %d' % (\n                i+1, len(filenames), shard_id))\n            sys.stdout.flush()\n\n            # Read the filename:\n            image_data = tf.gfile.FastGFile(filenames[i], 'r').read()\n            height, width = image_reader.read_image_dims(sess, image_data)\n\n            class_name = os.path.basename(os.path.dirname(filenames[i]))\n            class_id = class_names_to_ids[class_name]\n\n            example = dataset_utils.image_to_tfexample(\n                image_data, 'jpg', height, width, class_id)\n            tfrecord_writer.write(example.SerializeToString())\n\n  sys.stdout.write('\\n')\n  sys.stdout.flush()\n\n\ndef _clean_up_temporary_files(dataset_dir):\n  \"\"\"Removes temporary files used to create the dataset.\n\n  Args:\n    dataset_dir: The directory where the temporary files are stored.\n  \"\"\"\n  filename = _DATA_URL.split('/')[-1]\n  filepath = os.path.join(dataset_dir, filename)\n  tf.gfile.Remove(filepath)\n\n  tmp_dir = os.path.join(dataset_dir, 'flower_photos')\n  tf.gfile.DeleteRecursively(tmp_dir)\n\n\ndef _dataset_exists(dataset_dir):\n  for split_name in ['train', 'validation']:\n    for shard_id in range(_NUM_SHARDS):\n      output_filename = _get_dataset_filename(\n          dataset_dir, split_name, shard_id)\n      if not tf.gfile.Exists(output_filename):\n        return False\n  return True\n\n\ndef run(dataset_dir):\n  \"\"\"Runs the download and conversion operation.\n\n  Args:\n    dataset_dir: The dataset directory where the dataset is stored.\n  \"\"\"\n  if not tf.gfile.Exists(dataset_dir):\n    tf.gfile.MakeDirs(dataset_dir)\n\n  if _dataset_exists(dataset_dir):\n    print('Dataset files already exist. Exiting without re-creating them.')\n    return\n\n  dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)\n  photo_filenames, class_names = _get_filenames_and_classes(dataset_dir)\n  class_names_to_ids = dict(zip(class_names, range(len(class_names))))\n\n  # Divide into train and test:\n  random.seed(_RANDOM_SEED)\n  random.shuffle(photo_filenames)\n  training_filenames = photo_filenames[_NUM_VALIDATION:]\n  validation_filenames = photo_filenames[:_NUM_VALIDATION]\n\n  # First, convert the training and validation sets.\n  _convert_dataset('train', training_filenames, class_names_to_ids,\n                   dataset_dir)\n  _convert_dataset('validation', validation_filenames, class_names_to_ids,\n                   dataset_dir)\n\n  # Finally, write the labels file:\n  labels_to_class_names = dict(zip(range(len(class_names)), class_names))\n  dataset_utils.write_label_file(labels_to_class_names, dataset_dir)\n\n  _clean_up_temporary_files(dataset_dir)\n  print('\\nFinished converting the Flowers dataset!')\n\n"
  },
  {
    "path": "models/slim/datasets/download_and_convert_mnist.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\nr\"\"\"Downloads and converts MNIST data to TFRecords of TF-Example protos.\n\nThis module downloads the MNIST data, uncompresses it, reads the files\nthat make up the MNIST data and creates two TFRecord datasets: one for train\nand one for test. Each TFRecord dataset is comprised of a set of TF-Example\nprotocol buffers, each of which contain a single image and label.\n\nThe script should take about a minute to run.\n\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport gzip\nimport os\nimport sys\n\nimport numpy as np\nfrom six.moves import urllib\nimport tensorflow as tf\n\nfrom datasets import dataset_utils\n\n# The URLs where the MNIST data can be downloaded.\n_DATA_URL = 'http://yann.lecun.com/exdb/mnist/'\n_TRAIN_DATA_FILENAME = 'train-images-idx3-ubyte.gz'\n_TRAIN_LABELS_FILENAME = 'train-labels-idx1-ubyte.gz'\n_TEST_DATA_FILENAME = 't10k-images-idx3-ubyte.gz'\n_TEST_LABELS_FILENAME = 't10k-labels-idx1-ubyte.gz'\n\n_IMAGE_SIZE = 28\n_NUM_CHANNELS = 1\n\n# The names of the classes.\n_CLASS_NAMES = [\n    'zero',\n    'one',\n    'two',\n    'three',\n    'four',\n    'five',\n    'size',\n    'seven',\n    'eight',\n    'nine',\n]\n\n\ndef _extract_images(filename, num_images):\n  \"\"\"Extract the images into a numpy array.\n\n  Args:\n    filename: The path to an MNIST images file.\n    num_images: The number of images in the file.\n\n  Returns:\n    A numpy array of shape [number_of_images, height, width, channels].\n  \"\"\"\n  print('Extracting images from: ', filename)\n  with gzip.open(filename) as bytestream:\n    bytestream.read(16)\n    buf = bytestream.read(\n        _IMAGE_SIZE * _IMAGE_SIZE * num_images * _NUM_CHANNELS)\n    data = np.frombuffer(buf, dtype=np.uint8)\n    data = data.reshape(num_images, _IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)\n  return data\n\n\ndef _extract_labels(filename, num_labels):\n  \"\"\"Extract the labels into a vector of int64 label IDs.\n\n  Args:\n    filename: The path to an MNIST labels file.\n    num_labels: The number of labels in the file.\n\n  Returns:\n    A numpy array of shape [number_of_labels]\n  \"\"\"\n  print('Extracting labels from: ', filename)\n  with gzip.open(filename) as bytestream:\n    bytestream.read(8)\n    buf = bytestream.read(1 * num_labels)\n    labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)\n  return labels\n\n\ndef _add_to_tfrecord(data_filename, labels_filename, num_images,\n                     tfrecord_writer):\n  \"\"\"Loads data from the binary MNIST files and writes files to a TFRecord.\n\n  Args:\n    data_filename: The filename of the MNIST images.\n    labels_filename: The filename of the MNIST labels.\n    num_images: The number of images in the dataset.\n    tfrecord_writer: The TFRecord writer to use for writing.\n  \"\"\"\n  images = _extract_images(data_filename, num_images)\n  labels = _extract_labels(labels_filename, num_images)\n\n  shape = (_IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)\n  with tf.Graph().as_default():\n    image = tf.placeholder(dtype=tf.uint8, shape=shape)\n    encoded_png = tf.image.encode_png(image)\n\n    with tf.Session('') as sess:\n      for j in range(num_images):\n        sys.stdout.write('\\r>> Converting image %d/%d' % (j + 1, num_images))\n        sys.stdout.flush()\n\n        png_string = sess.run(encoded_png, feed_dict={image: images[j]})\n\n        example = dataset_utils.image_to_tfexample(\n            png_string, 'png', _IMAGE_SIZE, _IMAGE_SIZE, labels[j])\n        tfrecord_writer.write(example.SerializeToString())\n\n\ndef _get_output_filename(dataset_dir, split_name):\n  \"\"\"Creates the output filename.\n\n  Args:\n    dataset_dir: The directory where the temporary files are stored.\n    split_name: The name of the train/test split.\n\n  Returns:\n    An absolute file path.\n  \"\"\"\n  return '%s/mnist_%s.tfrecord' % (dataset_dir, split_name)\n\n\ndef _download_dataset(dataset_dir):\n  \"\"\"Downloads MNIST locally.\n\n  Args:\n    dataset_dir: The directory where the temporary files are stored.\n  \"\"\"\n  for filename in [_TRAIN_DATA_FILENAME,\n                   _TRAIN_LABELS_FILENAME,\n                   _TEST_DATA_FILENAME,\n                   _TEST_LABELS_FILENAME]:\n    filepath = os.path.join(dataset_dir, filename)\n\n    if not os.path.exists(filepath):\n      print('Downloading file %s...' % filename)\n      def _progress(count, block_size, total_size):\n        sys.stdout.write('\\r>> Downloading %.1f%%' % (\n            float(count * block_size) / float(total_size) * 100.0))\n        sys.stdout.flush()\n      filepath, _ = urllib.request.urlretrieve(_DATA_URL + filename,\n                                               filepath,\n                                               _progress)\n      print()\n      with tf.gfile.GFile(filepath) as f:\n        size = f.Size()\n      print('Successfully downloaded', filename, size, 'bytes.')\n\n\ndef _clean_up_temporary_files(dataset_dir):\n  \"\"\"Removes temporary files used to create the dataset.\n\n  Args:\n    dataset_dir: The directory where the temporary files are stored.\n  \"\"\"\n  for filename in [_TRAIN_DATA_FILENAME,\n                   _TRAIN_LABELS_FILENAME,\n                   _TEST_DATA_FILENAME,\n                   _TEST_LABELS_FILENAME]:\n    filepath = os.path.join(dataset_dir, filename)\n    tf.gfile.Remove(filepath)\n\n\ndef run(dataset_dir):\n  \"\"\"Runs the download and conversion operation.\n\n  Args:\n    dataset_dir: The dataset directory where the dataset is stored.\n  \"\"\"\n  if not tf.gfile.Exists(dataset_dir):\n    tf.gfile.MakeDirs(dataset_dir)\n\n  training_filename = _get_output_filename(dataset_dir, 'train')\n  testing_filename = _get_output_filename(dataset_dir, 'test')\n\n  if tf.gfile.Exists(training_filename) and tf.gfile.Exists(testing_filename):\n    print('Dataset files already exist. Exiting without re-creating them.')\n    return\n\n  _download_dataset(dataset_dir)\n\n  # First, process the training data:\n  with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer:\n    data_filename = os.path.join(dataset_dir, _TRAIN_DATA_FILENAME)\n    labels_filename = os.path.join(dataset_dir, _TRAIN_LABELS_FILENAME)\n    _add_to_tfrecord(data_filename, labels_filename, 60000, tfrecord_writer)\n\n  # Next, process the testing data:\n  with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:\n    data_filename = os.path.join(dataset_dir, _TEST_DATA_FILENAME)\n    labels_filename = os.path.join(dataset_dir, _TEST_LABELS_FILENAME)\n    _add_to_tfrecord(data_filename, labels_filename, 10000, tfrecord_writer)\n\n  # Finally, write the labels file:\n  labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))\n  dataset_utils.write_label_file(labels_to_class_names, dataset_dir)\n\n  _clean_up_temporary_files(dataset_dir)\n  print('\\nFinished converting the MNIST dataset!')\n"
  },
  {
    "path": "models/slim/datasets/flowers.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Provides data for the flowers dataset.\n\nThe dataset scripts used to create the dataset can be found at:\ntensorflow/models/slim/datasets/download_and_convert_flowers.py\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport tensorflow as tf\n\nfrom datasets import dataset_utils\n\nslim = tf.contrib.slim\n\n_FILE_PATTERN = 'flowers_%s_*.tfrecord'\n\nSPLITS_TO_SIZES = {'train': 3320, 'validation': 350}\n\n_NUM_CLASSES = 5\n\n_ITEMS_TO_DESCRIPTIONS = {\n    'image': 'A color image of varying size.',\n    'label': 'A single integer between 0 and 4',\n}\n\n\ndef get_split(split_name, dataset_dir, file_pattern=None, reader=None):\n  \"\"\"Gets a dataset tuple with instructions for reading flowers.\n\n  Args:\n    split_name: A train/validation split name.\n    dataset_dir: The base directory of the dataset sources.\n    file_pattern: The file pattern to use when matching the dataset sources.\n      It is assumed that the pattern contains a '%s' string so that the split\n      name can be inserted.\n    reader: The TensorFlow reader type.\n\n  Returns:\n    A `Dataset` namedtuple.\n\n  Raises:\n    ValueError: if `split_name` is not a valid train/validation split.\n  \"\"\"\n  if split_name not in SPLITS_TO_SIZES:\n    raise ValueError('split name %s was not recognized.' % split_name)\n\n  if not file_pattern:\n    file_pattern = _FILE_PATTERN\n  file_pattern = os.path.join(dataset_dir, file_pattern % split_name)\n\n  # Allowing None in the signature so that dataset_factory can use the default.\n  if reader is None:\n    reader = tf.TFRecordReader\n\n  keys_to_features = {\n      'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),\n      'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),\n      'image/class/label': tf.FixedLenFeature(\n          [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),\n  }\n\n  items_to_handlers = {\n      'image': slim.tfexample_decoder.Image(),\n      'label': slim.tfexample_decoder.Tensor('image/class/label'),\n  }\n\n  decoder = slim.tfexample_decoder.TFExampleDecoder(\n      keys_to_features, items_to_handlers)\n\n  labels_to_names = None\n  if dataset_utils.has_labels(dataset_dir):\n    labels_to_names = dataset_utils.read_label_file(dataset_dir)\n\n  return slim.dataset.Dataset(\n      data_sources=file_pattern,\n      reader=reader,\n      decoder=decoder,\n      num_samples=SPLITS_TO_SIZES[split_name],\n      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,\n      num_classes=_NUM_CLASSES,\n      labels_to_names=labels_to_names)\n"
  },
  {
    "path": "models/slim/datasets/imagenet.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Provides data for the ImageNet ILSVRC 2012 Dataset plus some bounding boxes.\n\nSome images have one or more bounding boxes associated with the label of the\nimage. See details here: http://image-net.org/download-bboxes\n\nImageNet is based upon WordNet 3.0. To uniquely identify a synset, we use\n\"WordNet ID\" (wnid), which is a concatenation of POS ( i.e. part of speech )\nand SYNSET OFFSET of WordNet. For more information, please refer to the\nWordNet documentation[http://wordnet.princeton.edu/wordnet/documentation/].\n\n\"There are bounding boxes for over 3000 popular synsets available.\nFor each synset, there are on average 150 images with bounding boxes.\"\n\nWARNING: Don't use for object detection, in this case all the bounding boxes\nof the image belong to just one class.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nfrom six.moves import urllib\nimport tensorflow as tf\n\nfrom datasets import dataset_utils\n\nslim = tf.contrib.slim\n\n# TODO(nsilberman): Add tfrecord file type once the script is updated.\n_FILE_PATTERN = '%s-*'\n\n_SPLITS_TO_SIZES = {\n    'train': 1281167,\n    'validation': 50000,\n}\n\n_ITEMS_TO_DESCRIPTIONS = {\n    'image': 'A color image of varying height and width.',\n    'label': 'The label id of the image, integer between 0 and 999',\n    'label_text': 'The text of the label.',\n    'object/bbox': 'A list of bounding boxes.',\n    'object/label': 'A list of labels, one per each object.',\n}\n\n_NUM_CLASSES = 1001\n\n\ndef create_readable_names_for_imagenet_labels():\n  \"\"\"Create a dict mapping label id to human readable string.\n\n  Returns:\n      labels_to_names: dictionary where keys are integers from to 1000\n      and values are human-readable names.\n\n  We retrieve a synset file, which contains a list of valid synset labels used\n  by ILSVRC competition. There is one synset one per line, eg.\n          #   n01440764\n          #   n01443537\n  We also retrieve a synset_to_human_file, which contains a mapping from synsets\n  to human-readable names for every synset in Imagenet. These are stored in a\n  tsv format, as follows:\n          #   n02119247    black fox\n          #   n02119359    silver fox\n  We assign each synset (in alphabetical order) an integer, starting from 1\n  (since 0 is reserved for the background class).\n\n  Code is based on\n  https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py#L463\n  \"\"\"\n\n  # pylint: disable=g-line-too-long\n  base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/'\n  synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url)\n  synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url)\n\n  filename, _ = urllib.request.urlretrieve(synset_url)\n  synset_list = [s.strip() for s in open(filename).readlines()]\n  num_synsets_in_ilsvrc = len(synset_list)\n  assert num_synsets_in_ilsvrc == 1000\n\n  filename, _ = urllib.request.urlretrieve(synset_to_human_url)\n  synset_to_human_list = open(filename).readlines()\n  num_synsets_in_all_imagenet = len(synset_to_human_list)\n  assert num_synsets_in_all_imagenet == 21842\n\n  synset_to_human = {}\n  for s in synset_to_human_list:\n    parts = s.strip().split('\\t')\n    assert len(parts) == 2\n    synset = parts[0]\n    human = parts[1]\n    synset_to_human[synset] = human\n\n  label_index = 1\n  labels_to_names = {0: 'background'}\n  for synset in synset_list:\n    name = synset_to_human[synset]\n    labels_to_names[label_index] = name\n    label_index += 1\n\n  return labels_to_names\n\n\ndef get_split(split_name, dataset_dir, file_pattern=None, reader=None):\n  \"\"\"Gets a dataset tuple with instructions for reading ImageNet.\n\n  Args:\n    split_name: A train/test split name.\n    dataset_dir: The base directory of the dataset sources.\n    file_pattern: The file pattern to use when matching the dataset sources.\n      It is assumed that the pattern contains a '%s' string so that the split\n      name can be inserted.\n    reader: The TensorFlow reader type.\n\n  Returns:\n    A `Dataset` namedtuple.\n\n  Raises:\n    ValueError: if `split_name` is not a valid train/test split.\n  \"\"\"\n  if split_name not in _SPLITS_TO_SIZES:\n    raise ValueError('split name %s was not recognized.' % split_name)\n\n  if not file_pattern:\n    file_pattern = _FILE_PATTERN\n  file_pattern = os.path.join(dataset_dir, file_pattern % split_name)\n\n  # Allowing None in the signature so that dataset_factory can use the default.\n  if reader is None:\n    reader = tf.TFRecordReader\n\n  keys_to_features = {\n      'image/encoded': tf.FixedLenFeature(\n          (), tf.string, default_value=''),\n      'image/format': tf.FixedLenFeature(\n          (), tf.string, default_value='jpeg'),\n      'image/class/label': tf.FixedLenFeature(\n          [], dtype=tf.int64, default_value=-1),\n      'image/class/text': tf.FixedLenFeature(\n          [], dtype=tf.string, default_value=''),\n      'image/object/bbox/xmin': tf.VarLenFeature(\n          dtype=tf.float32),\n      'image/object/bbox/ymin': tf.VarLenFeature(\n          dtype=tf.float32),\n      'image/object/bbox/xmax': tf.VarLenFeature(\n          dtype=tf.float32),\n      'image/object/bbox/ymax': tf.VarLenFeature(\n          dtype=tf.float32),\n      'image/object/class/label': tf.VarLenFeature(\n          dtype=tf.int64),\n  }\n\n  items_to_handlers = {\n      'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),\n      'label': slim.tfexample_decoder.Tensor('image/class/label'),\n      'label_text': slim.tfexample_decoder.Tensor('image/class/text'),\n      'object/bbox': slim.tfexample_decoder.BoundingBox(\n          ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),\n      'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'),\n  }\n\n  decoder = slim.tfexample_decoder.TFExampleDecoder(\n      keys_to_features, items_to_handlers)\n\n  labels_to_names = None\n  if dataset_utils.has_labels(dataset_dir):\n    labels_to_names = dataset_utils.read_label_file(dataset_dir)\n  else:\n    labels_to_names = create_readable_names_for_imagenet_labels()\n    dataset_utils.write_label_file(labels_to_names, dataset_dir)\n\n  return slim.dataset.Dataset(\n      data_sources=file_pattern,\n      reader=reader,\n      decoder=decoder,\n      num_samples=_SPLITS_TO_SIZES[split_name],\n      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,\n      num_classes=_NUM_CLASSES,\n      labels_to_names=labels_to_names)\n"
  },
  {
    "path": "models/slim/datasets/mnist.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Provides data for the MNIST dataset.\n\nThe dataset scripts used to create the dataset can be found at:\ntensorflow/models/slim/data/create_mnist_dataset.py\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport tensorflow as tf\n\nfrom datasets import dataset_utils\n\nslim = tf.contrib.slim\n\n_FILE_PATTERN = 'mnist_%s.tfrecord'\n\n_SPLITS_TO_SIZES = {'train': 60000, 'test': 10000}\n\n_NUM_CLASSES = 10\n\n_ITEMS_TO_DESCRIPTIONS = {\n    'image': 'A [28 x 28 x 1] grayscale image.',\n    'label': 'A single integer between 0 and 9',\n}\n\n\ndef get_split(split_name, dataset_dir, file_pattern=None, reader=None):\n  \"\"\"Gets a dataset tuple with instructions for reading MNIST.\n\n  Args:\n    split_name: A train/test split name.\n    dataset_dir: The base directory of the dataset sources.\n    file_pattern: The file pattern to use when matching the dataset sources.\n      It is assumed that the pattern contains a '%s' string so that the split\n      name can be inserted.\n    reader: The TensorFlow reader type.\n\n  Returns:\n    A `Dataset` namedtuple.\n\n  Raises:\n    ValueError: if `split_name` is not a valid train/test split.\n  \"\"\"\n  if split_name not in _SPLITS_TO_SIZES:\n    raise ValueError('split name %s was not recognized.' % split_name)\n\n  if not file_pattern:\n    file_pattern = _FILE_PATTERN\n  file_pattern = os.path.join(dataset_dir, file_pattern % split_name)\n\n  # Allowing None in the signature so that dataset_factory can use the default.\n  if reader is None:\n    reader = tf.TFRecordReader\n\n  keys_to_features = {\n      'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),\n      'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'),\n      'image/class/label': tf.FixedLenFeature(\n          [1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)),\n  }\n\n  items_to_handlers = {\n      'image': slim.tfexample_decoder.Image(shape=[28, 28, 1], channels=1),\n      'label': slim.tfexample_decoder.Tensor('image/class/label', shape=[]),\n  }\n\n  decoder = slim.tfexample_decoder.TFExampleDecoder(\n      keys_to_features, items_to_handlers)\n\n  labels_to_names = None\n  if dataset_utils.has_labels(dataset_dir):\n    labels_to_names = dataset_utils.read_label_file(dataset_dir)\n\n  return slim.dataset.Dataset(\n      data_sources=file_pattern,\n      reader=reader,\n      decoder=decoder,\n      num_samples=_SPLITS_TO_SIZES[split_name],\n      num_classes=_NUM_CLASSES,\n      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,\n      labels_to_names=labels_to_names)\n"
  },
  {
    "path": "models/slim/deployment/__init__.py",
    "content": "\n"
  },
  {
    "path": "models/slim/deployment/model_deploy.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Deploy Slim models across multiple clones and replicas.\n\n# TODO(sguada) docstring paragraph by (a) motivating the need for the file and\n# (b) defining clones.\n\n# TODO(sguada) describe the high-level components of model deployment.\n# E.g. \"each model deployment is composed of several parts: a DeploymentConfig,\n# which captures A, B and C, an input_fn which loads data.. etc\n\nTo easily train a model on multiple GPUs or across multiple machines this\nmodule provides a set of helper functions: `create_clones`,\n`optimize_clones` and `deploy`.\n\nUsage:\n\n  g = tf.Graph()\n\n  # Set up DeploymentConfig\n  config = model_deploy.DeploymentConfig(num_clones=2, clone_on_cpu=True)\n\n  # Create the global step on the device storing the variables.\n  with tf.device(config.variables_device()):\n    global_step = slim.create_global_step()\n\n  # Define the inputs\n  with tf.device(config.inputs_device()):\n    images, labels = LoadData(...)\n    inputs_queue = slim.data.prefetch_queue((images, labels))\n\n  # Define the optimizer.\n  with tf.device(config.optimizer_device()):\n    optimizer = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum)\n\n  # Define the model including the loss.\n  def model_fn(inputs_queue):\n    images, labels = inputs_queue.dequeue()\n    predictions = CreateNetwork(images)\n    slim.losses.log_loss(predictions, labels)\n\n  model_dp = model_deploy.deploy(config, model_fn, [inputs_queue],\n                                 optimizer=optimizer)\n\n  # Run training.\n  slim.learning.train(model_dp.train_op, my_log_dir,\n                      summary_op=model_dp.summary_op)\n\nThe Clone namedtuple holds together the values associated with each call to\nmodel_fn:\n  * outputs: The return values of the calls to `model_fn()`.\n  * scope: The scope used to create the clone.\n  * device: The device used to create the clone.\n\nDeployedModel namedtuple, holds together the values needed to train multiple\nclones:\n  * train_op: An operation that run the optimizer training op and include\n    all the update ops created by `model_fn`. Present only if an optimizer\n    was specified.\n  * summary_op: An operation that run the summaries created by `model_fn`\n    and process_gradients.\n  * total_loss: A `Tensor` that contains the sum of all losses created by\n    `model_fn` plus the regularization losses.\n  * clones: List of `Clone` tuples returned by `create_clones()`.\n\nDeploymentConfig parameters:\n  * num_clones: Number of model clones to deploy in each replica.\n  * clone_on_cpu: True if clones should be placed on CPU.\n  * replica_id: Integer.  Index of the replica for which the model is\n      deployed.  Usually 0 for the chief replica.\n  * num_replicas: Number of replicas to use.\n  * num_ps_tasks: Number of tasks for the `ps` job. 0 to not use replicas.\n  * worker_job_name: A name for the worker job.\n  * ps_job_name: A name for the parameter server job.\n\nTODO(sguada):\n  - describe side effect to the graph.\n  - what happens to summaries and update_ops.\n  - which graph collections are altered.\n  - write a tutorial on how to use this.\n  - analyze the possibility of calling deploy more than once.\n\n\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport collections\n\nimport tensorflow as tf\n\nfrom tensorflow.python.ops import control_flow_ops\n\nslim = tf.contrib.slim\n\n\n__all__ = ['create_clones',\n           'deploy',\n           'optimize_clones',\n           'DeployedModel',\n           'DeploymentConfig',\n           'Clone',\n          ]\n\n\n# Namedtuple used to represent a clone during deployment.\nClone = collections.namedtuple('Clone',\n                               ['outputs',  # Whatever model_fn() returned.\n                                'scope',  # The scope used to create it.\n                                'device',  # The device used to create.\n                               ])\n\n# Namedtuple used to represent a DeployedModel, returned by deploy().\nDeployedModel = collections.namedtuple('DeployedModel',\n                                       ['train_op',  # The `train_op`\n                                        'summary_op',  # The `summary_op`\n                                        'total_loss',  # The loss `Tensor`\n                                        'clones',  # A list of `Clones` tuples.\n                                       ])\n\n# Default parameters for DeploymentConfig\n_deployment_params = {'num_clones': 1,\n                      'clone_on_cpu': False,\n                      'replica_id': 0,\n                      'num_replicas': 1,\n                      'num_ps_tasks': 0,\n                      'worker_job_name': 'worker',\n                      'ps_job_name': 'ps'}\n\n\ndef create_clones(config, model_fn, args=None, kwargs=None):\n  \"\"\"Creates multiple clones according to config using a `model_fn`.\n\n  The returned values of `model_fn(*args, **kwargs)` are collected along with\n  the scope and device used to created it in a namedtuple\n  `Clone(outputs, scope, device)`\n\n  Note: it is assumed that any loss created by `model_fn` is collected at\n  the tf.GraphKeys.LOSSES collection.\n\n  To recover the losses, summaries or update_ops created by the clone use:\n  ```python\n    losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope)\n    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, clone.scope)\n    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope)\n  ```\n\n  The deployment options are specified by the config object and support\n  deploying one or several clones on different GPUs and one or several replicas\n  of such clones.\n\n  The argument `model_fn` is called `config.num_clones` times to create the\n  model clones as `model_fn(*args, **kwargs)`.\n\n  If `config` specifies deployment on multiple replicas then the default\n  tensorflow device is set appropriatly for each call to `model_fn` and for the\n  slim variable creation functions: model and global variables will be created\n  on the `ps` device, the clone operations will be on the `worker` device.\n\n  Args:\n    config: A DeploymentConfig object.\n    model_fn: A callable. Called as `model_fn(*args, **kwargs)`\n    args: Optional list of arguments to pass to `model_fn`.\n    kwargs: Optional list of keyword arguments to pass to `model_fn`.\n\n  Returns:\n    A list of namedtuples `Clone`.\n  \"\"\"\n  clones = []\n  args = args or []\n  kwargs = kwargs or {}\n  with slim.arg_scope([slim.model_variable, slim.variable],\n                      device=config.variables_device()):\n    # Create clones.\n    for i in range(0, config.num_clones):\n      with tf.name_scope(config.clone_scope(i)) as clone_scope:\n        clone_device = config.clone_device(i)\n        with tf.device(clone_device):\n          with tf.variable_scope(tf.get_variable_scope(),\n                                 reuse=True if i > 0 else None):\n            outputs = model_fn(*args, **kwargs)\n          clones.append(Clone(outputs, clone_scope, clone_device))\n  return clones\n\n\ndef _gather_clone_loss(clone, num_clones, regularization_losses):\n  \"\"\"Gather the loss for a single clone.\n\n  Args:\n    clone: A Clone namedtuple.\n    num_clones: The number of clones being deployed.\n    regularization_losses: Possibly empty list of regularization_losses\n      to add to the clone losses.\n\n  Returns:\n    A tensor for the total loss for the clone.  Can be None.\n  \"\"\"\n  # The return value.\n  sum_loss = None\n  # Individual components of the loss that will need summaries.\n  clone_loss = None\n  regularization_loss = None\n  # Compute and aggregate losses on the clone device.\n  with tf.device(clone.device):\n    all_losses = []\n    clone_losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope)\n    if clone_losses:\n      clone_loss = tf.add_n(clone_losses, name='clone_loss')\n      if num_clones > 1:\n        clone_loss = tf.div(clone_loss, 1.0 * num_clones,\n                            name='scaled_clone_loss')\n      all_losses.append(clone_loss)\n    if regularization_losses:\n      regularization_loss = tf.add_n(regularization_losses,\n                                     name='regularization_loss')\n      all_losses.append(regularization_loss)\n    if all_losses:\n      sum_loss = tf.add_n(all_losses)\n  # Add the summaries out of the clone device block.\n  if clone_loss is not None:\n    tf.summary.scalar(clone.scope + '/clone_loss', clone_loss)\n  if regularization_loss is not None:\n    tf.summary.scalar('regularization_loss', regularization_loss)\n  return sum_loss\n\n\ndef _optimize_clone(optimizer, clone, num_clones, regularization_losses,\n                    **kwargs):\n  \"\"\"Compute losses and gradients for a single clone.\n\n  Args:\n    optimizer: A tf.Optimizer  object.\n    clone: A Clone namedtuple.\n    num_clones: The number of clones being deployed.\n    regularization_losses: Possibly empty list of regularization_losses\n      to add to the clone losses.\n    **kwargs: Dict of kwarg to pass to compute_gradients().\n\n  Returns:\n    A tuple (clone_loss, clone_grads_and_vars).\n      - clone_loss: A tensor for the total loss for the clone.  Can be None.\n      - clone_grads_and_vars: List of (gradient, variable) for the clone.\n        Can be empty.\n  \"\"\"\n  sum_loss = _gather_clone_loss(clone, num_clones, regularization_losses)\n  clone_grad = None\n  if sum_loss is not None:\n    with tf.device(clone.device):\n      clone_grad = optimizer.compute_gradients(sum_loss, **kwargs)\n  return sum_loss, clone_grad\n\n\ndef optimize_clones(clones, optimizer,\n                    regularization_losses=None,\n                    clip_gradients=-1.0,\n                    **kwargs):\n  \"\"\"Compute clone losses and gradients for the given list of `Clones`.\n\n  Note: The regularization_losses are added to the first clone losses.\n\n  Args:\n   clones: List of `Clones` created by `create_clones()`.\n   optimizer: An `Optimizer` object.\n   regularization_losses: Optional list of regularization losses. If None it\n     will gather them from tf.GraphKeys.REGULARIZATION_LOSSES. Pass `[]` to\n     exclude them.\n   **kwargs: Optional list of keyword arguments to pass to `compute_gradients`.\n\n  Returns:\n   A tuple (total_loss, grads_and_vars).\n     - total_loss: A Tensor containing the average of the clone losses including\n       the regularization loss.\n     - grads_and_vars: A List of tuples (gradient, variable) containing the sum\n       of the gradients for each variable.\n\n  \"\"\"\n  grads_and_vars = []\n  clones_losses = []\n  num_clones = len(clones)\n  if regularization_losses is None:\n    regularization_losses = tf.get_collection(\n        tf.GraphKeys.REGULARIZATION_LOSSES)\n  for clone in clones:\n    with tf.name_scope(clone.scope):\n      clone_loss, clone_grad = _optimize_clone(\n          optimizer, clone, num_clones, regularization_losses, **kwargs)\n      if clip_gradients > 0:\n        tf.logging.info('Clipping gradient by norm {}'.format(clip_gradients))\n        clone_grad = slim.learning.clip_gradient_norms(\n          clone_grad, clip_gradients)\n      if clone_loss is not None:\n        clones_losses.append(clone_loss)\n        grads_and_vars.append(clone_grad)\n      # Only use regularization_losses for the first clone\n      regularization_losses = None\n  # Compute the total_loss summing all the clones_losses.\n  total_loss = tf.add_n(clones_losses, name='total_loss')\n  # Sum the gradients accross clones.\n  grads_and_vars = _sum_clones_gradients(grads_and_vars)\n  return total_loss, grads_and_vars\n\n\ndef deploy(config,\n           model_fn,\n           args=None,\n           kwargs=None,\n           optimizer=None,\n           summarize_gradients=False):\n  \"\"\"Deploys a Slim-constructed model across multiple clones.\n\n  The deployment options are specified by the config object and support\n  deploying one or several clones on different GPUs and one or several replicas\n  of such clones.\n\n  The argument `model_fn` is called `config.num_clones` times to create the\n  model clones as `model_fn(*args, **kwargs)`.\n\n  The optional argument `optimizer` is an `Optimizer` object.  If not `None`,\n  the deployed model is configured for training with that optimizer.\n\n  If `config` specifies deployment on multiple replicas then the default\n  tensorflow device is set appropriatly for each call to `model_fn` and for the\n  slim variable creation functions: model and global variables will be created\n  on the `ps` device, the clone operations will be on the `worker` device.\n\n  Args:\n    config: A `DeploymentConfig` object.\n    model_fn: A callable. Called as `model_fn(*args, **kwargs)`\n    args: Optional list of arguments to pass to `model_fn`.\n    kwargs: Optional list of keyword arguments to pass to `model_fn`.\n    optimizer: Optional `Optimizer` object.  If passed the model is deployed\n      for training with that optimizer.\n    summarize_gradients: Whether or not add summaries to the gradients.\n\n  Returns:\n    A `DeployedModel` namedtuple.\n\n  \"\"\"\n  # Gather initial summaries.\n  summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))\n\n  # Create Clones.\n  clones = create_clones(config, model_fn, args, kwargs)\n  first_clone = clones[0]\n\n  # Gather update_ops from the first clone. These contain, for example,\n  # the updates for the batch_norm variables created by model_fn.\n  update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone.scope)\n\n  train_op = None\n  total_loss = None\n  with tf.device(config.optimizer_device()):\n    if optimizer:\n      # Place the global step on the device storing the variables.\n      with tf.device(config.variables_device()):\n        global_step = slim.get_or_create_global_step()\n\n      # Compute the gradients for the clones.\n      total_loss, clones_gradients = optimize_clones(clones, optimizer)\n\n      if clones_gradients:\n        if summarize_gradients:\n          # Add summaries to the gradients.\n          summaries |= set(_add_gradients_summaries(clones_gradients))\n\n        # Create gradient updates.\n        grad_updates = optimizer.apply_gradients(clones_gradients,\n                                                 global_step=global_step)\n        update_ops.append(grad_updates)\n\n        update_op = tf.group(*update_ops)\n        train_op = control_flow_ops.with_dependencies([update_op], total_loss,\n                                                      name='train_op')\n    else:\n      clones_losses = []\n      regularization_losses = tf.get_collection(\n          tf.GraphKeys.REGULARIZATION_LOSSES)\n      for clone in clones:\n        with tf.name_scope(clone.scope):\n          clone_loss = _gather_clone_loss(clone, len(clones),\n                                          regularization_losses)\n          if clone_loss is not None:\n            clones_losses.append(clone_loss)\n          # Only use regularization_losses for the first clone\n          regularization_losses = None\n      if clones_losses:\n        total_loss = tf.add_n(clones_losses, name='total_loss')\n\n    # Add the summaries from the first clone. These contain the summaries\n    # created by model_fn and either optimize_clones() or _gather_clone_loss().\n    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,\n                                       first_clone.scope))\n\n    if total_loss is not None:\n      # Add total_loss to summary.\n      summaries.add(tf.summary.scalar('total_loss', total_loss))\n\n    if summaries:\n      # Merge all summaries together.\n      summary_op = tf.merge_summary(list(summaries), name='summary_op')\n    else:\n      summary_op = None\n\n  return DeployedModel(train_op, summary_op, total_loss, clones)\n\n\ndef _sum_clones_gradients(clone_grads):\n  \"\"\"Calculate the sum gradient for each shared variable across all clones.\n\n  This function assumes that the clone_grads has been scaled appropriately by\n  1 / num_clones.\n\n  Args:\n    clone_grads: A List of List of tuples (gradient, variable), one list per\n    `Clone`.\n\n  Returns:\n     List of tuples of (gradient, variable) where the gradient has been summed\n     across all clones.\n  \"\"\"\n  sum_grads = []\n  for grad_and_vars in zip(*clone_grads):\n    # Note that each grad_and_vars looks like the following:\n    #   ((grad_var0_clone0, var0), ... (grad_varN_cloneN, varN))\n    grads = []\n    var = grad_and_vars[0][1]\n    for g, v in grad_and_vars:\n      assert v == var\n      if g is not None:\n        grads.append(g)\n    if grads:\n      if len(grads) > 1:\n        sum_grad = tf.add_n(grads, name=var.op.name + '/sum_grads')\n      else:\n        sum_grad = grads[0]\n      sum_grads.append((sum_grad, var))\n  return sum_grads\n\n\ndef _add_gradients_summaries(grads_and_vars):\n  \"\"\"Add histogram summaries to gradients.\n\n  Note: The summaries are also added to the SUMMARIES collection.\n\n  Args:\n    grads_and_vars: A list of gradient to variable pairs (tuples).\n\n  Returns:\n    The _list_ of the added summaries for grads_and_vars.\n  \"\"\"\n  summaries = []\n  for grad, var in grads_and_vars:\n    if grad is not None:\n      if isinstance(grad, tf.IndexedSlices):\n        grad_values = grad.values\n      else:\n        grad_values = grad\n      summaries.append(tf.histogram_summary(var.op.name + ':gradient',\n                                            grad_values))\n      summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm',\n                                            tf.global_norm([grad_values])))\n    else:\n      tf.logging.info('Var %s has no gradient', var.op.name)\n  return summaries\n\n\nclass DeploymentConfig(object):\n  \"\"\"Configuration for deploying a model with `deploy()`.\n\n  You can pass an instance of this class to `deploy()` to specify exactly\n  how to deploy the model to build.  If you do not pass one, an instance built\n  from the default deployment_hparams will be used.\n  \"\"\"\n\n  def __init__(self,\n               num_clones=1,\n               clone_on_cpu=False,\n               replica_id=0,\n               num_replicas=1,\n               num_ps_tasks=0,\n               worker_job_name='worker',\n               ps_job_name='ps'):\n    \"\"\"Create a DeploymentConfig.\n\n    The config describes how to deploy a model across multiple clones and\n    replicas.  The model will be replicated `num_clones` times in each replica.\n    If `clone_on_cpu` is True, each clone will placed on CPU.\n\n    If `num_replicas` is 1, the model is deployed via a single process.  In that\n    case `worker_device`, `num_ps_tasks`, and `ps_device` are ignored.\n\n    If `num_replicas` is greater than 1, then `worker_device` and `ps_device`\n    must specify TensorFlow devices for the `worker` and `ps` jobs and\n    `num_ps_tasks` must be positive.\n\n    Args:\n      num_clones: Number of model clones to deploy in each replica.\n      clone_on_cpu: If True clones would be placed on CPU.\n      replica_id: Integer.  Index of the replica for which the model is\n        deployed.  Usually 0 for the chief replica.\n      num_replicas: Number of replicas to use.\n      num_ps_tasks: Number of tasks for the `ps` job. 0 to not use replicas.\n      worker_job_name: A name for the worker job.\n      ps_job_name: A name for the parameter server job.\n\n    Raises:\n      ValueError: If the arguments are invalid.\n    \"\"\"\n    if num_replicas > 1:\n      if num_ps_tasks < 1:\n        raise ValueError('When using replicas num_ps_tasks must be positive')\n    if num_replicas > 1 or num_ps_tasks > 0:\n      if not worker_job_name:\n        raise ValueError('Must specify worker_job_name when using replicas')\n      if not ps_job_name:\n        raise ValueError('Must specify ps_job_name when using parameter server')\n    if replica_id >= num_replicas:\n      raise ValueError('replica_id must be less than num_replicas')\n    self._num_clones = num_clones\n    self._clone_on_cpu = clone_on_cpu\n    self._replica_id = replica_id\n    self._num_replicas = num_replicas\n    self._num_ps_tasks = num_ps_tasks\n    self._ps_device = '/job:' + ps_job_name if num_ps_tasks > 0 else ''\n    self._worker_device = '/job:' + worker_job_name if num_ps_tasks > 0 else ''\n\n  @property\n  def num_clones(self):\n    return self._num_clones\n\n  @property\n  def clone_on_cpu(self):\n    return self._clone_on_cpu\n\n  @property\n  def replica_id(self):\n    return self._replica_id\n\n  @property\n  def num_replicas(self):\n    return self._num_replicas\n\n  @property\n  def num_ps_tasks(self):\n    return self._num_ps_tasks\n\n  @property\n  def ps_device(self):\n    return self._ps_device\n\n  @property\n  def worker_device(self):\n    return self._worker_device\n\n  def caching_device(self):\n    \"\"\"Returns the device to use for caching variables.\n\n    Variables are cached on the worker CPU when using replicas.\n\n    Returns:\n      A device string or None if the variables do not need to be cached.\n    \"\"\"\n    if self._num_ps_tasks > 0:\n      return lambda op: op.device\n    else:\n      return None\n\n  def clone_device(self, clone_index):\n    \"\"\"Device used to create the clone and all the ops inside the clone.\n\n    Args:\n      clone_index: Int, representing the clone_index.\n\n    Returns:\n      A value suitable for `tf.device()`.\n\n    Raises:\n      ValueError: if `clone_index` is greater or equal to the number of clones\".\n    \"\"\"\n    if clone_index >= self._num_clones:\n      raise ValueError('clone_index must be less than num_clones')\n    device = ''\n    if self._num_ps_tasks > 0:\n      device += self._worker_device\n    if self._clone_on_cpu:\n      device += '/cpu:0'\n    else:\n      if self._num_clones > 1:\n        device += '/gpu:%d' % clone_index\n    return device\n\n  def clone_scope(self, clone_index):\n    \"\"\"Name scope to create the clone.\n\n    Args:\n      clone_index: Int, representing the clone_index.\n\n    Returns:\n      A name_scope suitable for `tf.name_scope()`.\n\n    Raises:\n      ValueError: if `clone_index` is greater or equal to the number of clones\".\n    \"\"\"\n    if clone_index >= self._num_clones:\n      raise ValueError('clone_index must be less than num_clones')\n    scope = ''\n    if self._num_clones > 1:\n      scope = 'clone_%d' % clone_index\n    return scope\n\n  def optimizer_device(self):\n    \"\"\"Device to use with the optimizer.\n\n    Returns:\n      A value suitable for `tf.device()`.\n    \"\"\"\n    if self._num_ps_tasks > 0 or self._num_clones > 0:\n      return self._worker_device + '/cpu:0'\n    else:\n      return ''\n\n  def inputs_device(self):\n    \"\"\"Device to use to build the inputs.\n\n    Returns:\n      A value suitable for `tf.device()`.\n    \"\"\"\n    device = ''\n    if self._num_ps_tasks > 0:\n      device += self._worker_device\n    device += '/cpu:0'\n    return device\n\n  def variables_device(self):\n    \"\"\"Returns the device to use for variables created inside the clone.\n\n    Returns:\n      A value suitable for `tf.device()`.\n    \"\"\"\n    device = ''\n    if self._num_ps_tasks > 0:\n      device += self._ps_device\n    device += '/cpu:0'\n\n    class _PSDeviceChooser(object):\n      \"\"\"Slim device chooser for variables when using PS.\"\"\"\n\n      def __init__(self, device, tasks):\n        self._device = device\n        self._tasks = tasks\n        self._task = 0\n\n      def choose(self, op):\n        if op.device:\n          return op.device\n        node_def = op if isinstance(op, tf.NodeDef) else op.node_def\n        if node_def.op == 'Variable':\n          t = self._task\n          self._task = (self._task + 1) % self._tasks\n          d = '%s/task:%d' % (self._device, t)\n          return d\n        else:\n          return op.device\n\n    if not self._num_ps_tasks:\n      return device\n    else:\n      chooser = _PSDeviceChooser(device, self._num_ps_tasks)\n      return chooser.choose\n"
  },
  {
    "path": "models/slim/deployment/model_deploy_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for model_deploy.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nimport tensorflow as tf\n\nfrom deployment import model_deploy\n\nslim = tf.contrib.slim\n\n\nclass DeploymentConfigTest(tf.test.TestCase):\n\n  def testDefaults(self):\n    deploy_config = model_deploy.DeploymentConfig()\n\n    self.assertEqual(slim.get_variables(), [])\n    self.assertEqual(deploy_config.caching_device(), None)\n    self.assertDeviceEqual(deploy_config.clone_device(0), '')\n    self.assertEqual(deploy_config.clone_scope(0), '')\n    self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')\n    self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')\n    self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')\n\n  def testCPUonly(self):\n    deploy_config = model_deploy.DeploymentConfig(clone_on_cpu=True)\n\n    self.assertEqual(deploy_config.caching_device(), None)\n    self.assertDeviceEqual(deploy_config.clone_device(0), 'CPU:0')\n    self.assertEqual(deploy_config.clone_scope(0), '')\n    self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')\n    self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')\n    self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')\n\n  def testMultiGPU(self):\n    deploy_config = model_deploy.DeploymentConfig(num_clones=2)\n\n    self.assertEqual(deploy_config.caching_device(), None)\n    self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0')\n    self.assertDeviceEqual(deploy_config.clone_device(1), 'GPU:1')\n    self.assertEqual(deploy_config.clone_scope(0), 'clone_0')\n    self.assertEqual(deploy_config.clone_scope(1), 'clone_1')\n    self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')\n    self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')\n    self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')\n\n  def testPS(self):\n    deploy_config = model_deploy.DeploymentConfig(num_clones=1, num_ps_tasks=1)\n\n    self.assertDeviceEqual(deploy_config.clone_device(0),\n                           '/job:worker')\n    self.assertEqual(deploy_config.clone_scope(0), '')\n    self.assertDeviceEqual(deploy_config.optimizer_device(),\n                           '/job:worker/device:CPU:0')\n    self.assertDeviceEqual(deploy_config.inputs_device(),\n                           '/job:worker/device:CPU:0')\n    with tf.device(deploy_config.variables_device()):\n      a = tf.Variable(0)\n      b = tf.Variable(0)\n      c = tf.no_op()\n      d = slim.variable('a', [],\n                        caching_device=deploy_config.caching_device())\n    self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')\n    self.assertDeviceEqual(a.device, a.value().device)\n    self.assertDeviceEqual(b.device, '/job:ps/task:0/device:CPU:0')\n    self.assertDeviceEqual(b.device, b.value().device)\n    self.assertDeviceEqual(c.device, '')\n    self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')\n    self.assertDeviceEqual(d.value().device, '')\n\n  def testMultiGPUPS(self):\n    deploy_config = model_deploy.DeploymentConfig(num_clones=2, num_ps_tasks=1)\n\n    self.assertEqual(deploy_config.caching_device()(tf.no_op()), '')\n    self.assertDeviceEqual(deploy_config.clone_device(0),\n                           '/job:worker/device:GPU:0')\n    self.assertDeviceEqual(deploy_config.clone_device(1),\n                           '/job:worker/device:GPU:1')\n    self.assertEqual(deploy_config.clone_scope(0), 'clone_0')\n    self.assertEqual(deploy_config.clone_scope(1), 'clone_1')\n    self.assertDeviceEqual(deploy_config.optimizer_device(),\n                           '/job:worker/device:CPU:0')\n    self.assertDeviceEqual(deploy_config.inputs_device(),\n                           '/job:worker/device:CPU:0')\n\n  def testReplicasPS(self):\n    deploy_config = model_deploy.DeploymentConfig(num_replicas=2,\n                                                  num_ps_tasks=2)\n\n    self.assertDeviceEqual(deploy_config.clone_device(0),\n                           '/job:worker')\n    self.assertEqual(deploy_config.clone_scope(0), '')\n    self.assertDeviceEqual(deploy_config.optimizer_device(),\n                           '/job:worker/device:CPU:0')\n    self.assertDeviceEqual(deploy_config.inputs_device(),\n                           '/job:worker/device:CPU:0')\n\n  def testReplicasMultiGPUPS(self):\n    deploy_config = model_deploy.DeploymentConfig(num_replicas=2,\n                                                  num_clones=2,\n                                                  num_ps_tasks=2)\n    self.assertDeviceEqual(deploy_config.clone_device(0),\n                           '/job:worker/device:GPU:0')\n    self.assertDeviceEqual(deploy_config.clone_device(1),\n                           '/job:worker/device:GPU:1')\n    self.assertEqual(deploy_config.clone_scope(0), 'clone_0')\n    self.assertEqual(deploy_config.clone_scope(1), 'clone_1')\n    self.assertDeviceEqual(deploy_config.optimizer_device(),\n                           '/job:worker/device:CPU:0')\n    self.assertDeviceEqual(deploy_config.inputs_device(),\n                           '/job:worker/device:CPU:0')\n\n  def testVariablesPS(self):\n    deploy_config = model_deploy.DeploymentConfig(num_ps_tasks=2)\n\n    with tf.device(deploy_config.variables_device()):\n      a = tf.Variable(0)\n      b = tf.Variable(0)\n      c = tf.no_op()\n      d = slim.variable('a', [],\n                        caching_device=deploy_config.caching_device())\n\n    self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')\n    self.assertDeviceEqual(a.device, a.value().device)\n    self.assertDeviceEqual(b.device, '/job:ps/task:1/device:CPU:0')\n    self.assertDeviceEqual(b.device, b.value().device)\n    self.assertDeviceEqual(c.device, '')\n    self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')\n    self.assertDeviceEqual(d.value().device, '')\n\n\ndef LogisticClassifier(inputs, labels, scope=None, reuse=None):\n  with tf.variable_scope(scope, 'LogisticClassifier', [inputs, labels],\n                         reuse=reuse):\n    predictions = slim.fully_connected(inputs, 1, activation_fn=tf.sigmoid,\n                                       scope='fully_connected')\n    slim.losses.log_loss(predictions, labels)\n    return predictions\n\n\ndef BatchNormClassifier(inputs, labels, scope=None, reuse=None):\n  with tf.variable_scope(scope, 'BatchNormClassifier', [inputs, labels],\n                         reuse=reuse):\n    inputs = slim.batch_norm(inputs, decay=0.1)\n    predictions = slim.fully_connected(inputs, 1,\n                                       activation_fn=tf.sigmoid,\n                                       scope='fully_connected')\n    slim.losses.log_loss(predictions, labels)\n    return predictions\n\n\nclass CreatecloneTest(tf.test.TestCase):\n\n  def setUp(self):\n    # Create an easy training set:\n    np.random.seed(0)\n\n    self._inputs = np.zeros((16, 4))\n    self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)\n    self._logdir = self.get_temp_dir()\n\n    for i in range(16):\n      j = int(2 * self._labels[i] + np.random.randint(0, 2))\n      self._inputs[i, j] = 1\n\n  def testCreateLogisticClassifier(self):\n    g = tf.Graph()\n    with g.as_default():\n      tf.set_random_seed(0)\n      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)\n      tf_labels = tf.constant(self._labels, dtype=tf.float32)\n\n      model_fn = LogisticClassifier\n      clone_args = (tf_inputs, tf_labels)\n      deploy_config = model_deploy.DeploymentConfig(num_clones=1)\n\n      self.assertEqual(slim.get_variables(), [])\n      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)\n      clone = clones[0]\n      self.assertEqual(len(slim.get_variables()), 2)\n      for v in slim.get_variables():\n        self.assertDeviceEqual(v.device, 'CPU:0')\n        self.assertDeviceEqual(v.value().device, 'CPU:0')\n      self.assertEqual(clone.outputs.op.name,\n                       'LogisticClassifier/fully_connected/Sigmoid')\n      self.assertEqual(clone.scope, '')\n      self.assertDeviceEqual(clone.device, '')\n      self.assertEqual(len(slim.losses.get_losses()), 1)\n      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n      self.assertEqual(update_ops, [])\n\n  def testCreateSingleclone(self):\n    g = tf.Graph()\n    with g.as_default():\n      tf.set_random_seed(0)\n      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)\n      tf_labels = tf.constant(self._labels, dtype=tf.float32)\n\n      model_fn = BatchNormClassifier\n      clone_args = (tf_inputs, tf_labels)\n      deploy_config = model_deploy.DeploymentConfig(num_clones=1)\n\n      self.assertEqual(slim.get_variables(), [])\n      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)\n      clone = clones[0]\n      self.assertEqual(len(slim.get_variables()), 5)\n      for v in slim.get_variables():\n        self.assertDeviceEqual(v.device, 'CPU:0')\n        self.assertDeviceEqual(v.value().device, 'CPU:0')\n      self.assertEqual(clone.outputs.op.name,\n                       'BatchNormClassifier/fully_connected/Sigmoid')\n      self.assertEqual(clone.scope, '')\n      self.assertDeviceEqual(clone.device, '')\n      self.assertEqual(len(slim.losses.get_losses()), 1)\n      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n      self.assertEqual(len(update_ops), 2)\n\n  def testCreateMulticlone(self):\n    g = tf.Graph()\n    with g.as_default():\n      tf.set_random_seed(0)\n      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)\n      tf_labels = tf.constant(self._labels, dtype=tf.float32)\n\n      model_fn = BatchNormClassifier\n      clone_args = (tf_inputs, tf_labels)\n      num_clones = 4\n      deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones)\n\n      self.assertEqual(slim.get_variables(), [])\n      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)\n      self.assertEqual(len(slim.get_variables()), 5)\n      for v in slim.get_variables():\n        self.assertDeviceEqual(v.device, 'CPU:0')\n        self.assertDeviceEqual(v.value().device, 'CPU:0')\n      self.assertEqual(len(clones), num_clones)\n      for i, clone in enumerate(clones):\n        self.assertEqual(\n            clone.outputs.op.name,\n            'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)\n        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope)\n        self.assertEqual(len(update_ops), 2)\n        self.assertEqual(clone.scope, 'clone_%d/' % i)\n        self.assertDeviceEqual(clone.device, 'GPU:%d' % i)\n\n  def testCreateOnecloneWithPS(self):\n    g = tf.Graph()\n    with g.as_default():\n      tf.set_random_seed(0)\n      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)\n      tf_labels = tf.constant(self._labels, dtype=tf.float32)\n\n      model_fn = BatchNormClassifier\n      clone_args = (tf_inputs, tf_labels)\n      deploy_config = model_deploy.DeploymentConfig(num_clones=1,\n                                                    num_ps_tasks=1)\n\n      self.assertEqual(slim.get_variables(), [])\n      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)\n      self.assertEqual(len(clones), 1)\n      clone = clones[0]\n      self.assertEqual(clone.outputs.op.name,\n                       'BatchNormClassifier/fully_connected/Sigmoid')\n      self.assertDeviceEqual(clone.device, '/job:worker')\n      self.assertEqual(clone.scope, '')\n      self.assertEqual(len(slim.get_variables()), 5)\n      for v in slim.get_variables():\n        self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')\n        self.assertDeviceEqual(v.device, v.value().device)\n\n  def testCreateMulticloneWithPS(self):\n    g = tf.Graph()\n    with g.as_default():\n      tf.set_random_seed(0)\n      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)\n      tf_labels = tf.constant(self._labels, dtype=tf.float32)\n\n      model_fn = BatchNormClassifier\n      clone_args = (tf_inputs, tf_labels)\n      deploy_config = model_deploy.DeploymentConfig(num_clones=2,\n                                                    num_ps_tasks=2)\n\n      self.assertEqual(slim.get_variables(), [])\n      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)\n      self.assertEqual(len(slim.get_variables()), 5)\n      for i, v in enumerate(slim.get_variables()):\n        t = i % 2\n        self.assertDeviceEqual(v.device, '/job:ps/task:%d/device:CPU:0' % t)\n        self.assertDeviceEqual(v.device, v.value().device)\n      self.assertEqual(len(clones), 2)\n      for i, clone in enumerate(clones):\n        self.assertEqual(\n            clone.outputs.op.name,\n            'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)\n        self.assertEqual(clone.scope, 'clone_%d/' % i)\n        self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:%d' % i)\n\n\nclass OptimizeclonesTest(tf.test.TestCase):\n\n  def setUp(self):\n    # Create an easy training set:\n    np.random.seed(0)\n\n    self._inputs = np.zeros((16, 4))\n    self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)\n    self._logdir = self.get_temp_dir()\n\n    for i in range(16):\n      j = int(2 * self._labels[i] + np.random.randint(0, 2))\n      self._inputs[i, j] = 1\n\n  def testCreateLogisticClassifier(self):\n    g = tf.Graph()\n    with g.as_default():\n      tf.set_random_seed(0)\n      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)\n      tf_labels = tf.constant(self._labels, dtype=tf.float32)\n\n      model_fn = LogisticClassifier\n      clone_args = (tf_inputs, tf_labels)\n      deploy_config = model_deploy.DeploymentConfig(num_clones=1)\n\n      self.assertEqual(slim.get_variables(), [])\n      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)\n      self.assertEqual(len(slim.get_variables()), 2)\n      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n      self.assertEqual(update_ops, [])\n\n      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)\n      total_loss, grads_and_vars = model_deploy.optimize_clones(clones,\n                                                                optimizer)\n      self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))\n      self.assertEqual(total_loss.op.name, 'total_loss')\n      for g, v in grads_and_vars:\n        self.assertDeviceEqual(g.device, '')\n        self.assertDeviceEqual(v.device, 'CPU:0')\n\n  def testCreateSingleclone(self):\n    g = tf.Graph()\n    with g.as_default():\n      tf.set_random_seed(0)\n      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)\n      tf_labels = tf.constant(self._labels, dtype=tf.float32)\n\n      model_fn = BatchNormClassifier\n      clone_args = (tf_inputs, tf_labels)\n      deploy_config = model_deploy.DeploymentConfig(num_clones=1)\n\n      self.assertEqual(slim.get_variables(), [])\n      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)\n      self.assertEqual(len(slim.get_variables()), 5)\n      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n      self.assertEqual(len(update_ops), 2)\n\n      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)\n      total_loss, grads_and_vars = model_deploy.optimize_clones(clones,\n                                                                optimizer)\n      self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))\n      self.assertEqual(total_loss.op.name, 'total_loss')\n      for g, v in grads_and_vars:\n        self.assertDeviceEqual(g.device, '')\n        self.assertDeviceEqual(v.device, 'CPU:0')\n\n  def testCreateMulticlone(self):\n    g = tf.Graph()\n    with g.as_default():\n      tf.set_random_seed(0)\n      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)\n      tf_labels = tf.constant(self._labels, dtype=tf.float32)\n\n      model_fn = BatchNormClassifier\n      clone_args = (tf_inputs, tf_labels)\n      num_clones = 4\n      deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones)\n\n      self.assertEqual(slim.get_variables(), [])\n      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)\n      self.assertEqual(len(slim.get_variables()), 5)\n      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n      self.assertEqual(len(update_ops), num_clones * 2)\n\n      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)\n      total_loss, grads_and_vars = model_deploy.optimize_clones(clones,\n                                                                optimizer)\n      self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))\n      self.assertEqual(total_loss.op.name, 'total_loss')\n      for g, v in grads_and_vars:\n        self.assertDeviceEqual(g.device, '')\n        self.assertDeviceEqual(v.device, 'CPU:0')\n\n  def testCreateMulticloneCPU(self):\n    g = tf.Graph()\n    with g.as_default():\n      tf.set_random_seed(0)\n      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)\n      tf_labels = tf.constant(self._labels, dtype=tf.float32)\n\n      model_fn = BatchNormClassifier\n      model_args = (tf_inputs, tf_labels)\n      num_clones = 4\n      deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones,\n                                                    clone_on_cpu=True)\n\n      self.assertEqual(slim.get_variables(), [])\n      clones = model_deploy.create_clones(deploy_config, model_fn, model_args)\n      self.assertEqual(len(slim.get_variables()), 5)\n      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n      self.assertEqual(len(update_ops), num_clones * 2)\n\n      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)\n      total_loss, grads_and_vars = model_deploy.optimize_clones(clones,\n                                                                optimizer)\n      self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))\n      self.assertEqual(total_loss.op.name, 'total_loss')\n      for g, v in grads_and_vars:\n        self.assertDeviceEqual(g.device, '')\n        self.assertDeviceEqual(v.device, 'CPU:0')\n\n  def testCreateOnecloneWithPS(self):\n    g = tf.Graph()\n    with g.as_default():\n      tf.set_random_seed(0)\n      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)\n      tf_labels = tf.constant(self._labels, dtype=tf.float32)\n\n      model_fn = BatchNormClassifier\n      model_args = (tf_inputs, tf_labels)\n      deploy_config = model_deploy.DeploymentConfig(num_clones=1,\n                                                    num_ps_tasks=1)\n\n      self.assertEqual(slim.get_variables(), [])\n      clones = model_deploy.create_clones(deploy_config, model_fn, model_args)\n      self.assertEqual(len(slim.get_variables()), 5)\n      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n      self.assertEqual(len(update_ops), 2)\n\n      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)\n      total_loss, grads_and_vars = model_deploy.optimize_clones(clones,\n                                                                optimizer)\n      self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))\n      self.assertEqual(total_loss.op.name, 'total_loss')\n      for g, v in grads_and_vars:\n        self.assertDeviceEqual(g.device, '/job:worker')\n        self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')\n\n\nclass DeployTest(tf.test.TestCase):\n\n  def setUp(self):\n    # Create an easy training set:\n    np.random.seed(0)\n\n    self._inputs = np.zeros((16, 4))\n    self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)\n    self._logdir = self.get_temp_dir()\n\n    for i in range(16):\n      j = int(2 * self._labels[i] + np.random.randint(0, 2))\n      self._inputs[i, j] = 1\n\n  def testLocalTrainOp(self):\n    g = tf.Graph()\n    with g.as_default():\n      tf.set_random_seed(0)\n      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)\n      tf_labels = tf.constant(self._labels, dtype=tf.float32)\n\n      model_fn = BatchNormClassifier\n      model_args = (tf_inputs, tf_labels)\n      deploy_config = model_deploy.DeploymentConfig(num_clones=2,\n                                                    clone_on_cpu=True)\n\n      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)\n\n      self.assertEqual(slim.get_variables(), [])\n      model = model_deploy.deploy(deploy_config, model_fn, model_args,\n                                  optimizer=optimizer)\n\n      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n      self.assertEqual(len(update_ops), 4)\n      self.assertEqual(len(model.clones), 2)\n      self.assertEqual(model.total_loss.op.name, 'total_loss')\n      self.assertEqual(model.summary_op.op.name, 'summary_op/summary_op')\n      self.assertEqual(model.train_op.op.name, 'train_op')\n\n      with tf.Session() as sess:\n        sess.run(tf.global_variables_initializer())\n        moving_mean = tf.contrib.framework.get_variables_by_name(\n            'moving_mean')[0]\n        moving_variance = tf.contrib.framework.get_variables_by_name(\n            'moving_variance')[0]\n        initial_loss = sess.run(model.total_loss)\n        initial_mean, initial_variance = sess.run([moving_mean,\n                                                   moving_variance])\n        self.assertAllClose(initial_mean, [0.0, 0.0, 0.0, 0.0])\n        self.assertAllClose(initial_variance, [1.0, 1.0, 1.0, 1.0])\n        for _ in range(10):\n          sess.run(model.train_op)\n        final_loss = sess.run(model.total_loss)\n        self.assertLess(final_loss, initial_loss / 10.0)\n\n        final_mean, final_variance = sess.run([moving_mean,\n                                               moving_variance])\n        self.assertAllClose(final_mean, [0.125, 0.25, 0.375, 0.25])\n        self.assertAllClose(final_variance, [0.109375, 0.1875,\n                                             0.234375, 0.1875])\n\n  def testNoSummariesOnGPU(self):\n    with tf.Graph().as_default():\n      deploy_config = model_deploy.DeploymentConfig(num_clones=2)\n\n      # clone function creates a fully_connected layer with a regularizer loss.\n      def ModelFn():\n        inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)\n        reg = tf.contrib.layers.l2_regularizer(0.001)\n        tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg)\n\n      model = model_deploy.deploy(\n          deploy_config, ModelFn,\n          optimizer=tf.train.GradientDescentOptimizer(1.0))\n      # The model summary op should have a few summary inputs and all of them\n      # should be on the CPU.\n      self.assertTrue(model.summary_op.op.inputs)\n      for inp in  model.summary_op.op.inputs:\n        self.assertEqual('/device:CPU:0', inp.device)\n\n  def testNoSummariesOnGPUForEvals(self):\n    with tf.Graph().as_default():\n      deploy_config = model_deploy.DeploymentConfig(num_clones=2)\n\n      # clone function creates a fully_connected layer with a regularizer loss.\n      def ModelFn():\n        inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)\n        reg = tf.contrib.layers.l2_regularizer(0.001)\n        tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg)\n\n      # No optimizer here, it's an eval.\n      model = model_deploy.deploy(deploy_config, ModelFn)\n      # The model summary op should have a few summary inputs and all of them\n      # should be on the CPU.\n      self.assertTrue(model.summary_op.op.inputs)\n      for inp in  model.summary_op.op.inputs:\n        self.assertEqual('/device:CPU:0', inp.device)\n\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "models/slim/nets/__init__.py",
    "content": "\n"
  },
  {
    "path": "models/slim/nets/alexnet.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains a model definition for AlexNet.\n\nThis work was first described in:\n  ImageNet Classification with Deep Convolutional Neural Networks\n  Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton\n\nand later refined in:\n  One weird trick for parallelizing convolutional neural networks\n  Alex Krizhevsky, 2014\n\nHere we provide the implementation proposed in \"One weird trick\" and not\n\"ImageNet Classification\", as per the paper, the LRN layers have been removed.\n\nUsage:\n  with slim.arg_scope(alexnet.alexnet_v2_arg_scope()):\n    outputs, end_points = alexnet.alexnet_v2(inputs)\n\n@@alexnet_v2\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nslim = tf.contrib.slim\ntrunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)\n\n\ndef alexnet_v2_arg_scope(weight_decay=0.0005):\n  with slim.arg_scope([slim.conv2d, slim.fully_connected],\n                      activation_fn=tf.nn.relu,\n                      biases_initializer=tf.constant_initializer(0.1),\n                      weights_regularizer=slim.l2_regularizer(weight_decay)):\n    with slim.arg_scope([slim.conv2d], padding='SAME'):\n      with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:\n        return arg_sc\n\n\ndef alexnet_v2(inputs,\n               num_classes=1000,\n               is_training=True,\n               dropout_keep_prob=0.5,\n               spatial_squeeze=True,\n               scope='alexnet_v2'):\n  \"\"\"AlexNet version 2.\n\n  Described in: http://arxiv.org/pdf/1404.5997v2.pdf\n  Parameters from:\n  github.com/akrizhevsky/cuda-convnet2/blob/master/layers/\n  layers-imagenet-1gpu.cfg\n\n  Note: All the fully_connected layers have been transformed to conv2d layers.\n        To use in classification mode, resize input to 224x224. To use in fully\n        convolutional mode, set spatial_squeeze to false.\n        The LRN layers have been removed and change the initializers from\n        random_normal_initializer to xavier_initializer.\n\n  Args:\n    inputs: a tensor of size [batch_size, height, width, channels].\n    num_classes: number of predicted classes.\n    is_training: whether or not the model is being trained.\n    dropout_keep_prob: the probability that activations are kept in the dropout\n      layers during training.\n    spatial_squeeze: whether or not should squeeze the spatial dimensions of the\n      outputs. Useful to remove unnecessary dimensions for classification.\n    scope: Optional scope for the variables.\n\n  Returns:\n    the last op containing the log predictions and end_points dict.\n  \"\"\"\n  with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:\n    end_points_collection = sc.name + '_end_points'\n    # Collect outputs for conv2d, fully_connected and max_pool2d.\n    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],\n                        outputs_collections=[end_points_collection]):\n      net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',\n                        scope='conv1')\n      net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')\n      net = slim.conv2d(net, 192, [5, 5], scope='conv2')\n      net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')\n      net = slim.conv2d(net, 384, [3, 3], scope='conv3')\n      net = slim.conv2d(net, 384, [3, 3], scope='conv4')\n      net = slim.conv2d(net, 256, [3, 3], scope='conv5')\n      net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')\n\n      # Use conv2d instead of fully_connected layers.\n      with slim.arg_scope([slim.conv2d],\n                          weights_initializer=trunc_normal(0.005),\n                          biases_initializer=tf.constant_initializer(0.1)):\n        net = slim.conv2d(net, 4096, [5, 5], padding='VALID',\n                          scope='fc6')\n        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                           scope='dropout6')\n        net = slim.conv2d(net, 4096, [1, 1], scope='fc7')\n        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                           scope='dropout7')\n        net = slim.conv2d(net, num_classes, [1, 1],\n                          activation_fn=None,\n                          normalizer_fn=None,\n                          biases_initializer=tf.zeros_initializer,\n                          scope='fc8')\n\n      # Convert end_points_collection into a end_point dict.\n      end_points = slim.utils.convert_collection_to_dict(end_points_collection)\n      if spatial_squeeze:\n        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')\n        end_points[sc.name + '/fc8'] = net\n      return net, end_points\nalexnet_v2.default_image_size = 224\n"
  },
  {
    "path": "models/slim/nets/alexnet_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for slim.nets.alexnet.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom nets import alexnet\n\nslim = tf.contrib.slim\n\n\nclass AlexnetV2Test(tf.test.TestCase):\n\n  def testBuild(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = alexnet.alexnet_v2(inputs, num_classes)\n      self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed')\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n\n  def testFullyConvolutional(self):\n    batch_size = 1\n    height, width = 300, 400\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False)\n      self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd')\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, 4, 7, num_classes])\n\n  def testEndPoints(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      _, end_points = alexnet.alexnet_v2(inputs, num_classes)\n      expected_names = ['alexnet_v2/conv1',\n                        'alexnet_v2/pool1',\n                        'alexnet_v2/conv2',\n                        'alexnet_v2/pool2',\n                        'alexnet_v2/conv3',\n                        'alexnet_v2/conv4',\n                        'alexnet_v2/conv5',\n                        'alexnet_v2/pool5',\n                        'alexnet_v2/fc6',\n                        'alexnet_v2/fc7',\n                        'alexnet_v2/fc8'\n                       ]\n      self.assertSetEqual(set(end_points.keys()), set(expected_names))\n\n  def testModelVariables(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      alexnet.alexnet_v2(inputs, num_classes)\n      expected_names = ['alexnet_v2/conv1/weights',\n                        'alexnet_v2/conv1/biases',\n                        'alexnet_v2/conv2/weights',\n                        'alexnet_v2/conv2/biases',\n                        'alexnet_v2/conv3/weights',\n                        'alexnet_v2/conv3/biases',\n                        'alexnet_v2/conv4/weights',\n                        'alexnet_v2/conv4/biases',\n                        'alexnet_v2/conv5/weights',\n                        'alexnet_v2/conv5/biases',\n                        'alexnet_v2/fc6/weights',\n                        'alexnet_v2/fc6/biases',\n                        'alexnet_v2/fc7/weights',\n                        'alexnet_v2/fc7/biases',\n                        'alexnet_v2/fc8/weights',\n                        'alexnet_v2/fc8/biases',\n                       ]\n      model_variables = [v.op.name for v in slim.get_model_variables()]\n      self.assertSetEqual(set(model_variables), set(expected_names))\n\n  def testEvaluation(self):\n    batch_size = 2\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      eval_inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n      predictions = tf.argmax(logits, 1)\n      self.assertListEqual(predictions.get_shape().as_list(), [batch_size])\n\n  def testTrainEvalWithReuse(self):\n    train_batch_size = 2\n    eval_batch_size = 1\n    train_height, train_width = 224, 224\n    eval_height, eval_width = 300, 400\n    num_classes = 1000\n    with self.test_session():\n      train_inputs = tf.random_uniform(\n          (train_batch_size, train_height, train_width, 3))\n      logits, _ = alexnet.alexnet_v2(train_inputs)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [train_batch_size, num_classes])\n      tf.get_variable_scope().reuse_variables()\n      eval_inputs = tf.random_uniform(\n          (eval_batch_size, eval_height, eval_width, 3))\n      logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False,\n                                     spatial_squeeze=False)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [eval_batch_size, 4, 7, num_classes])\n      logits = tf.reduce_mean(logits, [1, 2])\n      predictions = tf.argmax(logits, 1)\n      self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])\n\n  def testForward(self):\n    batch_size = 1\n    height, width = 224, 224\n    with self.test_session() as sess:\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = alexnet.alexnet_v2(inputs)\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(logits)\n      self.assertTrue(output.any())\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "models/slim/nets/cifarnet.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains a variant of the CIFAR-10 model definition.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nslim = tf.contrib.slim\n\ntrunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev)\n\n\ndef cifarnet(images, num_classes=10, is_training=False,\n             dropout_keep_prob=0.5,\n             prediction_fn=slim.softmax,\n             scope='CifarNet'):\n  \"\"\"Creates a variant of the CifarNet model.\n\n  Note that since the output is a set of 'logits', the values fall in the\n  interval of (-infinity, infinity). Consequently, to convert the outputs to a\n  probability distribution over the characters, one will need to convert them\n  using the softmax function:\n\n        logits = cifarnet.cifarnet(images, is_training=False)\n        probabilities = tf.nn.softmax(logits)\n        predictions = tf.argmax(logits, 1)\n\n  Args:\n    images: A batch of `Tensors` of size [batch_size, height, width, channels].\n    num_classes: the number of classes in the dataset.\n    is_training: specifies whether or not we're currently training the model.\n      This variable will determine the behaviour of the dropout layer.\n    dropout_keep_prob: the percentage of activation values that are retained.\n    prediction_fn: a function to get predictions out of logits.\n    scope: Optional variable_scope.\n\n  Returns:\n    logits: the pre-softmax activations, a tensor of size\n      [batch_size, `num_classes`]\n    end_points: a dictionary from components of the network to the corresponding\n      activation.\n  \"\"\"\n  end_points = {}\n\n  with tf.variable_scope(scope, 'CifarNet', [images, num_classes]):\n    net = slim.conv2d(images, 64, [5, 5], scope='conv1')\n    end_points['conv1'] = net\n    net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')\n    end_points['pool1'] = net\n    net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')\n    net = slim.conv2d(net, 64, [5, 5], scope='conv2')\n    end_points['conv2'] = net\n    net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2')\n    net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')\n    end_points['pool2'] = net\n    net = slim.flatten(net)\n    end_points['Flatten'] = net\n    net = slim.fully_connected(net, 384, scope='fc3')\n    end_points['fc3'] = net\n    net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                       scope='dropout3')\n    net = slim.fully_connected(net, 192, scope='fc4')\n    end_points['fc4'] = net\n    logits = slim.fully_connected(net, num_classes,\n                                  biases_initializer=tf.zeros_initializer,\n                                  weights_initializer=trunc_normal(1/192.0),\n                                  weights_regularizer=None,\n                                  activation_fn=None,\n                                  scope='logits')\n\n    end_points['Logits'] = logits\n    end_points['Predictions'] = prediction_fn(logits, scope='Predictions')\n\n  return logits, end_points\ncifarnet.default_image_size = 32\n\n\ndef cifarnet_arg_scope(weight_decay=0.004):\n  \"\"\"Defines the default cifarnet argument scope.\n\n  Args:\n    weight_decay: The weight decay to use for regularizing the model.\n\n  Returns:\n    An `arg_scope` to use for the inception v3 model.\n  \"\"\"\n  with slim.arg_scope(\n      [slim.conv2d],\n      weights_initializer=tf.truncated_normal_initializer(stddev=5e-2),\n      activation_fn=tf.nn.relu):\n    with slim.arg_scope(\n        [slim.fully_connected],\n        biases_initializer=tf.constant_initializer(0.1),\n        weights_initializer=trunc_normal(0.04),\n        weights_regularizer=slim.l2_regularizer(weight_decay),\n        activation_fn=tf.nn.relu) as sc:\n      return sc\n"
  },
  {
    "path": "models/slim/nets/inception.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Brings all inception models under one namespace.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n# pylint: disable=unused-import\nfrom nets.inception_resnet_v2 import inception_resnet_v2\nfrom nets.inception_resnet_v2 import inception_resnet_v2_arg_scope\nfrom nets.inception_v1 import inception_v1\nfrom nets.inception_v1 import inception_v1_arg_scope\nfrom nets.inception_v1 import inception_v1_base\nfrom nets.inception_v2 import inception_v2\nfrom nets.inception_v2 import inception_v2_arg_scope\nfrom nets.inception_v2 import inception_v2_base\nfrom nets.inception_v2_tsn import inception_v2_tsn\nfrom nets.inception_v2_tsn import inception_v2_tsn_arg_scope\nfrom nets.inception_v2_tsn import inception_v2_tsn_base\nfrom nets.inception_v3 import inception_v3\nfrom nets.inception_v3 import inception_v3_arg_scope\nfrom nets.inception_v3 import inception_v3_base\nfrom nets.inception_v4 import inception_v4\nfrom nets.inception_v4 import inception_v4_arg_scope\nfrom nets.inception_v4 import inception_v4_base\n# pylint: enable=unused-import\n"
  },
  {
    "path": "models/slim/nets/inception_resnet_v2.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains the definition of the Inception Resnet V2 architecture.\n\nAs described in http://arxiv.org/abs/1602.07261.\n\n  Inception-v4, Inception-ResNet and the Impact of Residual Connections\n    on Learning\n  Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n\nimport tensorflow as tf\n\nslim = tf.contrib.slim\n\n\ndef block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):\n  \"\"\"Builds the 35x35 resnet block.\"\"\"\n  with tf.variable_scope(scope, 'Block35', [net], reuse=reuse):\n    with tf.variable_scope('Branch_0'):\n      tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1')\n    with tf.variable_scope('Branch_1'):\n      tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')\n      tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3')\n    with tf.variable_scope('Branch_2'):\n      tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')\n      tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3')\n      tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3')\n    mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2])\n    up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,\n                     activation_fn=None, scope='Conv2d_1x1')\n    net += scale * up\n    if activation_fn:\n      net = activation_fn(net)\n  return net\n\n\ndef block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):\n  \"\"\"Builds the 17x17 resnet block.\"\"\"\n  with tf.variable_scope(scope, 'Block17', [net], reuse=reuse):\n    with tf.variable_scope('Branch_0'):\n      tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')\n    with tf.variable_scope('Branch_1'):\n      tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1')\n      tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7],\n                                  scope='Conv2d_0b_1x7')\n      tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1],\n                                  scope='Conv2d_0c_7x1')\n    mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2])\n    up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,\n                     activation_fn=None, scope='Conv2d_1x1')\n    net += scale * up\n    if activation_fn:\n      net = activation_fn(net)\n  return net\n\n\ndef block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):\n  \"\"\"Builds the 8x8 resnet block.\"\"\"\n  with tf.variable_scope(scope, 'Block8', [net], reuse=reuse):\n    with tf.variable_scope('Branch_0'):\n      tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')\n    with tf.variable_scope('Branch_1'):\n      tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1')\n      tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3],\n                                  scope='Conv2d_0b_1x3')\n      tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1],\n                                  scope='Conv2d_0c_3x1')\n    mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2])\n    up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,\n                     activation_fn=None, scope='Conv2d_1x1')\n    net += scale * up\n    if activation_fn:\n      net = activation_fn(net)\n  return net\n\n\ndef inception_resnet_v2(inputs, num_classes=1001, is_training=True,\n                        dropout_keep_prob=0.8,\n                        reuse=None,\n                        scope='InceptionResnetV2'):\n  \"\"\"Creates the Inception Resnet V2 model.\n\n  Args:\n    inputs: a 4-D tensor of size [batch_size, height, width, 3].\n    num_classes: number of predicted classes.\n    is_training: whether is training or not.\n    dropout_keep_prob: float, the fraction to keep before final layer.\n    reuse: whether or not the network and its variables should be reused. To be\n      able to reuse 'scope' must be given.\n    scope: Optional variable_scope.\n\n  Returns:\n    logits: the logits outputs of the model.\n    end_points: the set of end_points from the inception model.\n  \"\"\"\n  end_points = {}\n\n  with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse):\n    with slim.arg_scope([slim.batch_norm, slim.dropout],\n                        is_training=is_training):\n      with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],\n                          stride=1, padding='SAME'):\n\n        # 149 x 149 x 32\n        net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID',\n                          scope='Conv2d_1a_3x3')\n        end_points['Conv2d_1a_3x3'] = net\n        # 147 x 147 x 32\n        net = slim.conv2d(net, 32, 3, padding='VALID',\n                          scope='Conv2d_2a_3x3')\n        end_points['Conv2d_2a_3x3'] = net\n        # 147 x 147 x 64\n        net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')\n        end_points['Conv2d_2b_3x3'] = net\n        # 73 x 73 x 64\n        net = slim.max_pool2d(net, 3, stride=2, padding='VALID',\n                              scope='MaxPool_3a_3x3')\n        end_points['MaxPool_3a_3x3'] = net\n        # 73 x 73 x 80\n        net = slim.conv2d(net, 80, 1, padding='VALID',\n                          scope='Conv2d_3b_1x1')\n        end_points['Conv2d_3b_1x1'] = net\n        # 71 x 71 x 192\n        net = slim.conv2d(net, 192, 3, padding='VALID',\n                          scope='Conv2d_4a_3x3')\n        end_points['Conv2d_4a_3x3'] = net\n        # 35 x 35 x 192\n        net = slim.max_pool2d(net, 3, stride=2, padding='VALID',\n                              scope='MaxPool_5a_3x3')\n        end_points['MaxPool_5a_3x3'] = net\n\n        # 35 x 35 x 320\n        with tf.variable_scope('Mixed_5b'):\n          with tf.variable_scope('Branch_0'):\n            tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1')\n          with tf.variable_scope('Branch_1'):\n            tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1')\n            tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5,\n                                        scope='Conv2d_0b_5x5')\n          with tf.variable_scope('Branch_2'):\n            tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1')\n            tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3,\n                                        scope='Conv2d_0b_3x3')\n            tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3,\n                                        scope='Conv2d_0c_3x3')\n          with tf.variable_scope('Branch_3'):\n            tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME',\n                                         scope='AvgPool_0a_3x3')\n            tower_pool_1 = slim.conv2d(tower_pool, 64, 1,\n                                       scope='Conv2d_0b_1x1')\n          net = tf.concat(axis=3, values=[tower_conv, tower_conv1_1,\n                              tower_conv2_2, tower_pool_1])\n\n        end_points['Mixed_5b'] = net\n        net = slim.repeat(net, 10, block35, scale=0.17)\n\n        # 17 x 17 x 1024\n        with tf.variable_scope('Mixed_6a'):\n          with tf.variable_scope('Branch_0'):\n            tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID',\n                                     scope='Conv2d_1a_3x3')\n          with tf.variable_scope('Branch_1'):\n            tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')\n            tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3,\n                                        scope='Conv2d_0b_3x3')\n            tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3,\n                                        stride=2, padding='VALID',\n                                        scope='Conv2d_1a_3x3')\n          with tf.variable_scope('Branch_2'):\n            tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',\n                                         scope='MaxPool_1a_3x3')\n          net = tf.concat(axis=3, values=[tower_conv, tower_conv1_2, tower_pool])\n\n        end_points['Mixed_6a'] = net\n        net = slim.repeat(net, 20, block17, scale=0.10)\n\n        # Auxillary tower\n        with tf.variable_scope('AuxLogits'):\n          aux = slim.avg_pool2d(net, 5, stride=3, padding='VALID',\n                                scope='Conv2d_1a_3x3')\n          aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1')\n          aux = slim.conv2d(aux, 768, aux.get_shape()[1:3],\n                            padding='VALID', scope='Conv2d_2a_5x5')\n          aux = slim.flatten(aux)\n          aux = slim.fully_connected(aux, num_classes, activation_fn=None,\n                                     scope='Logits')\n          end_points['AuxLogits'] = aux\n\n        with tf.variable_scope('Mixed_7a'):\n          with tf.variable_scope('Branch_0'):\n            tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')\n            tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,\n                                       padding='VALID', scope='Conv2d_1a_3x3')\n          with tf.variable_scope('Branch_1'):\n            tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')\n            tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2,\n                                        padding='VALID', scope='Conv2d_1a_3x3')\n          with tf.variable_scope('Branch_2'):\n            tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')\n            tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,\n                                        scope='Conv2d_0b_3x3')\n            tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2,\n                                        padding='VALID', scope='Conv2d_1a_3x3')\n          with tf.variable_scope('Branch_3'):\n            tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',\n                                         scope='MaxPool_1a_3x3')\n          net = tf.concat(axis=3, values=[tower_conv_1, tower_conv1_1,\n                              tower_conv2_2, tower_pool])\n\n        end_points['Mixed_7a'] = net\n\n        net = slim.repeat(net, 9, block8, scale=0.20)\n        net = block8(net, activation_fn=None)\n\n        net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')\n        end_points['Conv2d_7b_1x1'] = net\n\n        with tf.variable_scope('Logits'):\n          end_points['PrePool'] = net\n          net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',\n                                scope='AvgPool_1a_8x8')\n          net = slim.flatten(net)\n\n          net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                             scope='Dropout')\n\n          end_points['PreLogitsFlatten'] = net\n          logits = slim.fully_connected(net, num_classes, activation_fn=None,\n                                        scope='Logits')\n          end_points['Logits'] = logits\n          end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')\n\n    return logits, end_points\ninception_resnet_v2.default_image_size = 299\n\n\ndef inception_resnet_v2_arg_scope(weight_decay=0.00004,\n                                  batch_norm_decay=0.9997,\n                                  batch_norm_epsilon=0.001):\n  \"\"\"Yields the scope with the default parameters for inception_resnet_v2.\n\n  Args:\n    weight_decay: the weight decay for weights variables.\n    batch_norm_decay: decay for the moving average of batch_norm momentums.\n    batch_norm_epsilon: small float added to variance to avoid dividing by zero.\n\n  Returns:\n    a arg_scope with the parameters needed for inception_resnet_v2.\n  \"\"\"\n  # Set weight_decay for weights in conv2d and fully_connected layers.\n  with slim.arg_scope([slim.conv2d, slim.fully_connected],\n                      weights_regularizer=slim.l2_regularizer(weight_decay),\n                      biases_regularizer=slim.l2_regularizer(weight_decay)):\n\n    batch_norm_params = {\n        'decay': batch_norm_decay,\n        'epsilon': batch_norm_epsilon,\n    }\n    # Set activation_fn and parameters for batch_norm.\n    with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu,\n                        normalizer_fn=slim.batch_norm,\n                        normalizer_params=batch_norm_params) as scope:\n      return scope\n"
  },
  {
    "path": "models/slim/nets/inception_resnet_v2_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for slim.inception_resnet_v2.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom nets import inception\n\n\nclass InceptionTest(tf.test.TestCase):\n\n  def testBuildLogits(self):\n    batch_size = 5\n    height, width = 299, 299\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = inception.inception_resnet_v2(inputs, num_classes)\n      self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n\n  def testBuildEndPoints(self):\n    batch_size = 5\n    height, width = 299, 299\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      _, end_points = inception.inception_resnet_v2(inputs, num_classes)\n      self.assertTrue('Logits' in end_points)\n      logits = end_points['Logits']\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n      self.assertTrue('AuxLogits' in end_points)\n      aux_logits = end_points['AuxLogits']\n      self.assertListEqual(aux_logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n      pre_pool = end_points['PrePool']\n      self.assertListEqual(pre_pool.get_shape().as_list(),\n                           [batch_size, 8, 8, 1536])\n\n  def testVariablesSetDevice(self):\n    batch_size = 5\n    height, width = 299, 299\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      # Force all Variables to reside on the device.\n      with tf.variable_scope('on_cpu'), tf.device('/cpu:0'):\n        inception.inception_resnet_v2(inputs, num_classes)\n      with tf.variable_scope('on_gpu'), tf.device('/gpu:0'):\n        inception.inception_resnet_v2(inputs, num_classes)\n      for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_cpu'):\n        self.assertDeviceEqual(v.device, '/cpu:0')\n      for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_gpu'):\n        self.assertDeviceEqual(v.device, '/gpu:0')\n\n  def testHalfSizeImages(self):\n    batch_size = 5\n    height, width = 150, 150\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, end_points = inception.inception_resnet_v2(inputs, num_classes)\n      self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n      pre_pool = end_points['PrePool']\n      self.assertListEqual(pre_pool.get_shape().as_list(),\n                           [batch_size, 3, 3, 1536])\n\n  def testUnknownBatchSize(self):\n    batch_size = 1\n    height, width = 299, 299\n    num_classes = 1000\n    with self.test_session() as sess:\n      inputs = tf.placeholder(tf.float32, (None, height, width, 3))\n      logits, _ = inception.inception_resnet_v2(inputs, num_classes)\n      self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [None, num_classes])\n      images = tf.random_uniform((batch_size, height, width, 3))\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(logits, {inputs: images.eval()})\n      self.assertEquals(output.shape, (batch_size, num_classes))\n\n  def testEvaluation(self):\n    batch_size = 2\n    height, width = 299, 299\n    num_classes = 1000\n    with self.test_session() as sess:\n      eval_inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = inception.inception_resnet_v2(eval_inputs,\n                                                num_classes,\n                                                is_training=False)\n      predictions = tf.argmax(logits, 1)\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(predictions)\n      self.assertEquals(output.shape, (batch_size,))\n\n  def testTrainEvalWithReuse(self):\n    train_batch_size = 5\n    eval_batch_size = 2\n    height, width = 150, 150\n    num_classes = 1000\n    with self.test_session() as sess:\n      train_inputs = tf.random_uniform((train_batch_size, height, width, 3))\n      inception.inception_resnet_v2(train_inputs, num_classes)\n      eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))\n      logits, _ = inception.inception_resnet_v2(eval_inputs,\n                                                num_classes,\n                                                is_training=False,\n                                                reuse=True)\n      predictions = tf.argmax(logits, 1)\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(predictions)\n      self.assertEquals(output.shape, (eval_batch_size,))\n\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "models/slim/nets/inception_utils.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains common code shared by all inception models.\n\nUsage of arg scope:\n  with slim.arg_scope(inception_arg_scope()):\n    logits, end_points = inception.inception_v3(images, num_classes,\n                                                is_training=is_training)\n\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nslim = tf.contrib.slim\n\n\ndef inception_arg_scope(weight_decay=0.00004,\n                        use_batch_norm=True,\n                        batch_norm_decay=0.9997,\n                        batch_norm_epsilon=0.001):\n  \"\"\"Defines the default arg scope for inception models.\n\n  Args:\n    weight_decay: The weight decay to use for regularizing the model.\n    use_batch_norm: \"If `True`, batch_norm is applied after each convolution.\n    batch_norm_decay: Decay for batch norm moving average.\n    batch_norm_epsilon: Small float added to variance to avoid dividing by zero\n      in batch norm.\n\n  Returns:\n    An `arg_scope` to use for the inception models.\n  \"\"\"\n  batch_norm_params = {\n      # Decay for the moving averages.\n      'decay': batch_norm_decay,\n      # epsilon to prevent 0s in variance.\n      'epsilon': batch_norm_epsilon,\n      # collection containing update_ops.\n      'updates_collections': tf.GraphKeys.UPDATE_OPS,\n  }\n  if use_batch_norm:\n    normalizer_fn = slim.batch_norm\n    normalizer_params = batch_norm_params\n  else:\n    normalizer_fn = None\n    normalizer_params = {}\n  # Set weight_decay for weights in Conv and FC layers.\n  with slim.arg_scope([slim.conv2d, slim.fully_connected],\n                      weights_regularizer=slim.l2_regularizer(weight_decay)):\n    with slim.arg_scope(\n        [slim.conv2d],\n        weights_initializer=slim.variance_scaling_initializer(),\n        activation_fn=tf.nn.relu,\n        normalizer_fn=normalizer_fn,\n        normalizer_params=normalizer_params) as sc:\n      return sc\n"
  },
  {
    "path": "models/slim/nets/inception_v1.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains the definition for inception v1 classification network.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom nets import inception_utils\n\nslim = tf.contrib.slim\ntrunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)\n\n\ndef inception_v1_base(inputs,\n                      final_endpoint='Mixed_5c',\n                      scope='InceptionV1'):\n  \"\"\"Defines the Inception V1 base architecture.\n\n  This architecture is defined in:\n    Going deeper with convolutions\n    Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,\n    Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.\n    http://arxiv.org/pdf/1409.4842v1.pdf.\n\n  Args:\n    inputs: a tensor of size [batch_size, height, width, channels].\n    final_endpoint: specifies the endpoint to construct the network up to. It\n      can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',\n      'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',\n      'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',\n      'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']\n    scope: Optional variable_scope.\n\n  Returns:\n    A dictionary from components of the network to the corresponding activation.\n\n  Raises:\n    ValueError: if final_endpoint is not set to one of the predefined values.\n  \"\"\"\n  end_points = {}\n  with tf.variable_scope(scope, 'InceptionV1', [inputs]):\n    with slim.arg_scope(\n        [slim.conv2d, slim.fully_connected],\n        weights_initializer=trunc_normal(0.01)):\n      with slim.arg_scope([slim.conv2d, slim.max_pool2d],\n                          stride=1, padding='SAME'):\n        end_point = 'Conv2d_1a_7x7'\n        net = slim.conv2d(inputs, 64, [7, 7], stride=2, scope=end_point)\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n        end_point = 'MaxPool_2a_3x3'\n        net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n        end_point = 'Conv2d_2b_1x1'\n        net = slim.conv2d(net, 64, [1, 1], scope=end_point)\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n        end_point = 'Conv2d_2c_3x3'\n        net = slim.conv2d(net, 192, [3, 3], scope=end_point)\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n        end_point = 'MaxPool_3a_3x3'\n        net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n\n        end_point = 'Mixed_3b'\n        with tf.variable_scope(end_point):\n          with tf.variable_scope('Branch_0'):\n            branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')\n          with tf.variable_scope('Branch_1'):\n            branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')\n            branch_1 = slim.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_2'):\n            branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')\n            branch_2 = slim.conv2d(branch_2, 32, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_3'):\n            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')\n            branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')\n          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n\n        end_point = 'Mixed_3c'\n        with tf.variable_scope(end_point):\n          with tf.variable_scope('Branch_0'):\n            branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')\n          with tf.variable_scope('Branch_1'):\n            branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')\n            branch_1 = slim.conv2d(branch_1, 192, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_2'):\n            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')\n            branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_3'):\n            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')\n            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')\n          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n\n        end_point = 'MaxPool_4a_3x3'\n        net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n\n        end_point = 'Mixed_4b'\n        with tf.variable_scope(end_point):\n          with tf.variable_scope('Branch_0'):\n            branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')\n          with tf.variable_scope('Branch_1'):\n            branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')\n            branch_1 = slim.conv2d(branch_1, 208, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_2'):\n            branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')\n            branch_2 = slim.conv2d(branch_2, 48, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_3'):\n            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')\n            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')\n          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n\n        end_point = 'Mixed_4c'\n        with tf.variable_scope(end_point):\n          with tf.variable_scope('Branch_0'):\n            branch_0 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')\n          with tf.variable_scope('Branch_1'):\n            branch_1 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')\n            branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_2'):\n            branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')\n            branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_3'):\n            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')\n            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')\n          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n\n        end_point = 'Mixed_4d'\n        with tf.variable_scope(end_point):\n          with tf.variable_scope('Branch_0'):\n            branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')\n          with tf.variable_scope('Branch_1'):\n            branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')\n            branch_1 = slim.conv2d(branch_1, 256, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_2'):\n            branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')\n            branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_3'):\n            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')\n            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')\n          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n\n        end_point = 'Mixed_4e'\n        with tf.variable_scope(end_point):\n          with tf.variable_scope('Branch_0'):\n            branch_0 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')\n          with tf.variable_scope('Branch_1'):\n            branch_1 = slim.conv2d(net, 144, [1, 1], scope='Conv2d_0a_1x1')\n            branch_1 = slim.conv2d(branch_1, 288, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_2'):\n            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')\n            branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_3'):\n            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')\n            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')\n          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n\n        end_point = 'Mixed_4f'\n        with tf.variable_scope(end_point):\n          with tf.variable_scope('Branch_0'):\n            branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')\n          with tf.variable_scope('Branch_1'):\n            branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')\n            branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_2'):\n            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')\n            branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_3'):\n            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')\n            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')\n          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n\n        end_point = 'MaxPool_5a_2x2'\n        net = slim.max_pool2d(net, [2, 2], stride=2, scope=end_point)\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n\n        end_point = 'Mixed_5b'\n        with tf.variable_scope(end_point):\n          with tf.variable_scope('Branch_0'):\n            branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')\n          with tf.variable_scope('Branch_1'):\n            branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')\n            branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_2'):\n            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')\n            branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0a_3x3')\n          with tf.variable_scope('Branch_3'):\n            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')\n            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')\n          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n\n        end_point = 'Mixed_5c'\n        with tf.variable_scope(end_point):\n          with tf.variable_scope('Branch_0'):\n            branch_0 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1')\n          with tf.variable_scope('Branch_1'):\n            branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')\n            branch_1 = slim.conv2d(branch_1, 384, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_2'):\n            branch_2 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1')\n            branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')\n          with tf.variable_scope('Branch_3'):\n            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')\n            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')\n          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if final_endpoint == end_point: return net, end_points\n    raise ValueError('Unknown final endpoint %s' % final_endpoint)\n\n\ndef inception_v1(inputs,\n                 num_classes=1000,\n                 is_training=True,\n                 dropout_keep_prob=0.8,\n                 prediction_fn=slim.softmax,\n                 spatial_squeeze=True,\n                 reuse=None,\n                 scope='InceptionV1'):\n  \"\"\"Defines the Inception V1 architecture.\n\n  This architecture is defined in:\n\n    Going deeper with convolutions\n    Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,\n    Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.\n    http://arxiv.org/pdf/1409.4842v1.pdf.\n\n  The default image size used to train this network is 224x224.\n\n  Args:\n    inputs: a tensor of size [batch_size, height, width, channels].\n    num_classes: number of predicted classes.\n    is_training: whether is training or not.\n    dropout_keep_prob: the percentage of activation values that are retained.\n    prediction_fn: a function to get predictions out of logits.\n    spatial_squeeze: if True, logits is of shape is [B, C], if false logits is\n        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.\n    reuse: whether or not the network and its variables should be reused. To be\n      able to reuse 'scope' must be given.\n    scope: Optional variable_scope.\n\n  Returns:\n    logits: the pre-softmax activations, a tensor of size\n      [batch_size, num_classes]\n    end_points: a dictionary from components of the network to the corresponding\n      activation.\n  \"\"\"\n  # Final pooling and prediction\n  with tf.variable_scope(scope, 'InceptionV1', [inputs, num_classes],\n                         reuse=reuse) as scope:\n    with slim.arg_scope([slim.batch_norm, slim.dropout],\n                        is_training=is_training):\n      net, end_points = inception_v1_base(inputs, scope=scope)\n      with tf.variable_scope('Logits'):\n        net = slim.avg_pool2d(net, [7, 7], stride=1, scope='MaxPool_0a_7x7')\n        net = slim.dropout(net,\n                           dropout_keep_prob, scope='Dropout_0b')\n        logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,\n                             normalizer_fn=None, scope='Conv2d_0c_1x1')\n        if spatial_squeeze:\n          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')\n\n        end_points['Logits'] = logits\n        end_points['Predictions'] = prediction_fn(logits, scope='Predictions')\n  return logits, end_points\ninception_v1.default_image_size = 224\n\ninception_v1_arg_scope = inception_utils.inception_arg_scope\n"
  },
  {
    "path": "models/slim/nets/inception_v1_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for nets.inception_v1.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nimport tensorflow as tf\n\nfrom nets import inception\n\nslim = tf.contrib.slim\n\n\nclass InceptionV1Test(tf.test.TestCase):\n\n  def testBuildClassificationNetwork(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    logits, end_points = inception.inception_v1(inputs, num_classes)\n    self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [batch_size, num_classes])\n    self.assertTrue('Predictions' in end_points)\n    self.assertListEqual(end_points['Predictions'].get_shape().as_list(),\n                         [batch_size, num_classes])\n\n  def testBuildBaseNetwork(self):\n    batch_size = 5\n    height, width = 224, 224\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    mixed_6c, end_points = inception.inception_v1_base(inputs)\n    self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c'))\n    self.assertListEqual(mixed_6c.get_shape().as_list(),\n                         [batch_size, 7, 7, 1024])\n    expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',\n                          'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b',\n                          'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c',\n                          'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2',\n                          'Mixed_5b', 'Mixed_5c']\n    self.assertItemsEqual(end_points.keys(), expected_endpoints)\n\n  def testBuildOnlyUptoFinalEndpoint(self):\n    batch_size = 5\n    height, width = 224, 224\n    endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',\n                 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',\n                 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d',\n                 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b',\n                 'Mixed_5c']\n    for index, endpoint in enumerate(endpoints):\n      with tf.Graph().as_default():\n        inputs = tf.random_uniform((batch_size, height, width, 3))\n        out_tensor, end_points = inception.inception_v1_base(\n            inputs, final_endpoint=endpoint)\n        self.assertTrue(out_tensor.op.name.startswith(\n            'InceptionV1/' + endpoint))\n        self.assertItemsEqual(endpoints[:index+1], end_points)\n\n  def testBuildAndCheckAllEndPointsUptoMixed5c(self):\n    batch_size = 5\n    height, width = 224, 224\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    _, end_points = inception.inception_v1_base(inputs,\n                                                final_endpoint='Mixed_5c')\n    endpoints_shapes = {'Conv2d_1a_7x7': [5, 112, 112, 64],\n                        'MaxPool_2a_3x3': [5, 56, 56, 64],\n                        'Conv2d_2b_1x1': [5, 56, 56, 64],\n                        'Conv2d_2c_3x3': [5, 56, 56, 192],\n                        'MaxPool_3a_3x3': [5, 28, 28, 192],\n                        'Mixed_3b': [5, 28, 28, 256],\n                        'Mixed_3c': [5, 28, 28, 480],\n                        'MaxPool_4a_3x3': [5, 14, 14, 480],\n                        'Mixed_4b': [5, 14, 14, 512],\n                        'Mixed_4c': [5, 14, 14, 512],\n                        'Mixed_4d': [5, 14, 14, 512],\n                        'Mixed_4e': [5, 14, 14, 528],\n                        'Mixed_4f': [5, 14, 14, 832],\n                        'MaxPool_5a_2x2': [5, 7, 7, 832],\n                        'Mixed_5b': [5, 7, 7, 832],\n                        'Mixed_5c': [5, 7, 7, 1024]}\n\n    self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())\n    for endpoint_name in endpoints_shapes:\n      expected_shape = endpoints_shapes[endpoint_name]\n      self.assertTrue(endpoint_name in end_points)\n      self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),\n                           expected_shape)\n\n  def testModelHasExpectedNumberOfParameters(self):\n    batch_size = 5\n    height, width = 224, 224\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    with slim.arg_scope(inception.inception_v1_arg_scope()):\n      inception.inception_v1_base(inputs)\n    total_params, _ = slim.model_analyzer.analyze_vars(\n        slim.get_model_variables())\n    self.assertAlmostEqual(5607184, total_params)\n\n  def testHalfSizeImages(self):\n    batch_size = 5\n    height, width = 112, 112\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    mixed_5c, _ = inception.inception_v1_base(inputs)\n    self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c'))\n    self.assertListEqual(mixed_5c.get_shape().as_list(),\n                         [batch_size, 4, 4, 1024])\n\n  def testUnknownImageShape(self):\n    tf.reset_default_graph()\n    batch_size = 2\n    height, width = 224, 224\n    num_classes = 1000\n    input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))\n    with self.test_session() as sess:\n      inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))\n      logits, end_points = inception.inception_v1(inputs, num_classes)\n      self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n      pre_pool = end_points['Mixed_5c']\n      feed_dict = {inputs: input_np}\n      tf.global_variables_initializer().run()\n      pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)\n      self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])\n\n  def testUnknowBatchSize(self):\n    batch_size = 1\n    height, width = 224, 224\n    num_classes = 1000\n\n    inputs = tf.placeholder(tf.float32, (None, height, width, 3))\n    logits, _ = inception.inception_v1(inputs, num_classes)\n    self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [None, num_classes])\n    images = tf.random_uniform((batch_size, height, width, 3))\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(logits, {inputs: images.eval()})\n      self.assertEquals(output.shape, (batch_size, num_classes))\n\n  def testEvaluation(self):\n    batch_size = 2\n    height, width = 224, 224\n    num_classes = 1000\n\n    eval_inputs = tf.random_uniform((batch_size, height, width, 3))\n    logits, _ = inception.inception_v1(eval_inputs, num_classes,\n                                       is_training=False)\n    predictions = tf.argmax(logits, 1)\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(predictions)\n      self.assertEquals(output.shape, (batch_size,))\n\n  def testTrainEvalWithReuse(self):\n    train_batch_size = 5\n    eval_batch_size = 2\n    height, width = 224, 224\n    num_classes = 1000\n\n    train_inputs = tf.random_uniform((train_batch_size, height, width, 3))\n    inception.inception_v1(train_inputs, num_classes)\n    eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))\n    logits, _ = inception.inception_v1(eval_inputs, num_classes, reuse=True)\n    predictions = tf.argmax(logits, 1)\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(predictions)\n      self.assertEquals(output.shape, (eval_batch_size,))\n\n  def testLogitsNotSqueezed(self):\n    num_classes = 25\n    images = tf.random_uniform([1, 224, 224, 3])\n    logits, _ = inception.inception_v1(images,\n                                       num_classes=num_classes,\n                                       spatial_squeeze=False)\n\n    with self.test_session() as sess:\n      tf.global_variables_initializer().run()\n      logits_out = sess.run(logits)\n      self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])\n\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "models/slim/nets/inception_v2.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains the definition for inception v2 classification network.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom nets import inception_utils\n\nslim = tf.contrib.slim\ntrunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)\n\n\ndef inception_v2_base(inputs,\n                      final_endpoint='Mixed_5c',\n                      min_depth=16,\n                      depth_multiplier=1.0,\n                      scope=None):\n  \"\"\"Inception v2 (6a2).\n\n  Constructs an Inception v2 network from inputs to the given final endpoint.\n  This method can construct the network up to the layer inception(5b) as\n  described in http://arxiv.org/abs/1502.03167.\n\n  Args:\n    inputs: a tensor of shape [batch_size, height, width, channels].\n    final_endpoint: specifies the endpoint to construct the network up to. It\n      can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',\n      'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a',\n      'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b',\n      'Mixed_5c'].\n    min_depth: Minimum depth value (number of channels) for all convolution ops.\n      Enforced when depth_multiplier < 1, and not an active constraint when\n      depth_multiplier >= 1.\n    depth_multiplier: Float multiplier for the depth (number of channels)\n      for all convolution ops. The value must be greater than zero. Typical\n      usage will be to set this value in (0, 1) to reduce the number of\n      parameters or computation cost of the model.\n    scope: Optional variable_scope.\n\n  Returns:\n    tensor_out: output tensor corresponding to the final_endpoint.\n    end_points: a set of activations for external use, for example summaries or\n                losses.\n\n  Raises:\n    ValueError: if final_endpoint is not set to one of the predefined values,\n                or depth_multiplier <= 0\n  \"\"\"\n\n  # end_points will collect relevant activations for external use, for example\n  # summaries or losses.\n  end_points = {}\n\n  # Used to find thinned depths for each layer.\n  if depth_multiplier <= 0:\n    raise ValueError('depth_multiplier is not greater than zero.')\n  depth = lambda d: max(int(d * depth_multiplier), min_depth)\n\n  with tf.variable_scope(scope, 'InceptionV2', [inputs]):\n    with slim.arg_scope(\n        [slim.conv2d, slim.max_pool2d, slim.avg_pool2d, slim.separable_conv2d],\n        stride=1, padding='SAME'):\n\n      # Note that sizes in the comments below assume an input spatial size of\n      # 224x224, however, the inputs can be of any size greater 32x32.\n\n      # 224 x 224 x 3\n      end_point = 'Conv2d_1a_7x7'\n      # depthwise_multiplier here is different from depth_multiplier.\n      # depthwise_multiplier determines the output channels of the initial\n      # depthwise conv (see docs for tf.nn.separable_conv2d), while\n      # depth_multiplier controls the # channels of the subsequent 1x1\n      # convolution. Must have\n      #   in_channels * depthwise_multipler <= out_channels\n      # so that the separable convolution is not overparameterized.\n      depthwise_multiplier = min(int(depth(64) / 3), 8)\n      net = slim.separable_conv2d(\n          inputs, depth(64), [7, 7], depth_multiplier=depthwise_multiplier,\n          stride=2, weights_initializer=trunc_normal(1.0),\n          scope=end_point)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 112 x 112 x 64\n      end_point = 'MaxPool_2a_3x3'\n      net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 56 x 56 x 64\n      end_point = 'Conv2d_2b_1x1'\n      net = slim.conv2d(net, depth(64), [1, 1], scope=end_point,\n                        weights_initializer=trunc_normal(0.1))\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 56 x 56 x 64\n      end_point = 'Conv2d_2c_3x3'\n      net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 56 x 56 x 192\n      end_point = 'MaxPool_3a_3x3'\n      net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 28 x 28 x 192\n      # Inception module.\n      end_point = 'Mixed_3b'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(\n              net, depth(64), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(64), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(\n              net, depth(64), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],\n                                 scope='Conv2d_0c_3x3')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(\n              branch_3, depth(32), [1, 1],\n              weights_initializer=trunc_normal(0.1),\n              scope='Conv2d_0b_1x1')\n        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if end_point == final_endpoint: return net, end_points\n      # 28 x 28 x 256\n      end_point = 'Mixed_3c'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(\n              net, depth(64), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(\n              net, depth(64), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],\n                                 scope='Conv2d_0c_3x3')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(\n              branch_3, depth(64), [1, 1],\n              weights_initializer=trunc_normal(0.1),\n              scope='Conv2d_0b_1x1')\n        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if end_point == final_endpoint: return net, end_points\n      # 28 x 28 x 320\n      end_point = 'Mixed_4a'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(\n              net, depth(128), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_0 = slim.conv2d(branch_0, depth(160), [3, 3], stride=2,\n                                 scope='Conv2d_1a_3x3')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(\n              net, depth(64), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(\n              branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')\n          branch_1 = slim.conv2d(\n              branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.max_pool2d(\n              net, [3, 3], stride=2, scope='MaxPool_1a_3x3')\n        net = tf.concat(3, [branch_0, branch_1, branch_2])\n        end_points[end_point] = net\n        if end_point == final_endpoint: return net, end_points\n      # 14 x 14 x 576\n      end_point = 'Mixed_4b'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(224), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(\n              net, depth(64), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(\n              branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(\n              net, depth(96), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],\n                                 scope='Conv2d_0c_3x3')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(\n              branch_3, depth(128), [1, 1],\n              weights_initializer=trunc_normal(0.1),\n              scope='Conv2d_0b_1x1')\n        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if end_point == final_endpoint: return net, end_points\n      # 14 x 14 x 576\n      end_point = 'Mixed_4c'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(\n              net, depth(96), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(128), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(\n              net, depth(96), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],\n                                 scope='Conv2d_0c_3x3')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(\n              branch_3, depth(128), [1, 1],\n              weights_initializer=trunc_normal(0.1),\n              scope='Conv2d_0b_1x1')\n        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if end_point == final_endpoint: return net, end_points\n      # 14 x 14 x 576\n      end_point = 'Mixed_4d'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(\n              net, depth(128), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(160), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(\n              net, depth(128), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],\n                                 scope='Conv2d_0c_3x3')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(\n              branch_3, depth(96), [1, 1],\n              weights_initializer=trunc_normal(0.1),\n              scope='Conv2d_0b_1x1')\n        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if end_point == final_endpoint: return net, end_points\n\n      # 14 x 14 x 576\n      end_point = 'Mixed_4e'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(96), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(\n              net, depth(128), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(192), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(\n              net, depth(160), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],\n                                 scope='Conv2d_0c_3x3')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(\n              branch_3, depth(96), [1, 1],\n              weights_initializer=trunc_normal(0.1),\n              scope='Conv2d_0b_1x1')\n        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if end_point == final_endpoint: return net, end_points\n      # 14 x 14 x 576\n      end_point = 'Mixed_5a'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(\n              net, depth(128), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2,\n                                 scope='Conv2d_1a_3x3')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(\n              net, depth(192), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(256), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2,\n                                 scope='Conv2d_1a_3x3')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.max_pool2d(net, [3, 3], stride=2,\n                                     scope='MaxPool_1a_3x3')\n        net = tf.concat(3, [branch_0, branch_1, branch_2])\n        end_points[end_point] = net\n        if end_point == final_endpoint: return net, end_points\n      # 7 x 7 x 1024\n      end_point = 'Mixed_5b'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(\n              net, depth(192), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(\n              net, depth(160), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],\n                                 scope='Conv2d_0c_3x3')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(\n              branch_3, depth(128), [1, 1],\n              weights_initializer=trunc_normal(0.1),\n              scope='Conv2d_0b_1x1')\n        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if end_point == final_endpoint: return net, end_points\n\n      # 7 x 7 x 1024\n      end_point = 'Mixed_5c'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(\n              net, depth(192), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(\n              net, depth(192), [1, 1],\n              weights_initializer=trunc_normal(0.09),\n              scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],\n                                 scope='Conv2d_0c_3x3')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')\n          branch_3 = slim.conv2d(\n              branch_3, depth(128), [1, 1],\n              weights_initializer=trunc_normal(0.1),\n              scope='Conv2d_0b_1x1')\n        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n        end_points[end_point] = net\n        if end_point == final_endpoint: return net, end_points\n    raise ValueError('Unknown final endpoint %s' % final_endpoint)\n\n\ndef inception_v2(inputs,\n                 num_classes=1000,\n                 is_training=True,\n                 dropout_keep_prob=0.8,\n                 min_depth=16,\n                 depth_multiplier=1.0,\n                 prediction_fn=slim.softmax,\n                 spatial_squeeze=True,\n                 reuse=None,\n                 scope='InceptionV2'):\n  \"\"\"Inception v2 model for classification.\n\n  Constructs an Inception v2 network for classification as described in\n  http://arxiv.org/abs/1502.03167.\n\n  The default image size used to train this network is 224x224.\n\n  Args:\n    inputs: a tensor of shape [batch_size, height, width, channels].\n    num_classes: number of predicted classes.\n    is_training: whether is training or not.\n    dropout_keep_prob: the percentage of activation values that are retained.\n    min_depth: Minimum depth value (number of channels) for all convolution ops.\n      Enforced when depth_multiplier < 1, and not an active constraint when\n      depth_multiplier >= 1.\n    depth_multiplier: Float multiplier for the depth (number of channels)\n      for all convolution ops. The value must be greater than zero. Typical\n      usage will be to set this value in (0, 1) to reduce the number of\n      parameters or computation cost of the model.\n    prediction_fn: a function to get predictions out of logits.\n    spatial_squeeze: if True, logits is of shape is [B, C], if false logits is\n        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.\n    reuse: whether or not the network and its variables should be reused. To be\n      able to reuse 'scope' must be given.\n    scope: Optional variable_scope.\n\n  Returns:\n    logits: the pre-softmax activations, a tensor of size\n      [batch_size, num_classes]\n    end_points: a dictionary from components of the network to the corresponding\n      activation.\n\n  Raises:\n    ValueError: if final_endpoint is not set to one of the predefined values,\n                or depth_multiplier <= 0\n  \"\"\"\n  if depth_multiplier <= 0:\n    raise ValueError('depth_multiplier is not greater than zero.')\n\n  # Final pooling and prediction\n  with tf.variable_scope(scope, 'InceptionV2', [inputs, num_classes],\n                         reuse=reuse) as scope:\n    with slim.arg_scope([slim.batch_norm, slim.dropout],\n                        is_training=is_training):\n      net, end_points = inception_v2_base(\n          inputs, scope=scope, min_depth=min_depth,\n          depth_multiplier=depth_multiplier)\n      with tf.variable_scope('Logits'):\n        kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7])\n        net = slim.avg_pool2d(net, kernel_size, padding='VALID',\n                              scope='AvgPool_1a_{}x{}'.format(*kernel_size))\n        # 1 x 1 x 1024\n        net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')\n        logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,\n                             normalizer_fn=None, scope='Conv2d_1c_1x1')\n        if spatial_squeeze:\n          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')\n      end_points['Logits'] = logits\n      end_points['Predictions'] = prediction_fn(logits, scope='Predictions')\n  return logits, end_points\ninception_v2.default_image_size = 224\n\n\ndef _reduced_kernel_size_for_small_input(input_tensor, kernel_size):\n  \"\"\"Define kernel size which is automatically reduced for small input.\n\n  If the shape of the input images is unknown at graph construction time this\n  function assumes that the input images are is large enough.\n\n  Args:\n    input_tensor: input tensor of size [batch_size, height, width, channels].\n    kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]\n\n  Returns:\n    a tensor with the kernel size.\n\n  TODO(jrru): Make this function work with unknown shapes. Theoretically, this\n  can be done with the code below. Problems are two-fold: (1) If the shape was\n  known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot\n  handle tensors that define the kernel size.\n      shape = tf.shape(input_tensor)\n      return = tf.pack([tf.minimum(shape[1], kernel_size[0]),\n                        tf.minimum(shape[2], kernel_size[1])])\n\n  \"\"\"\n  shape = input_tensor.get_shape().as_list()\n  if shape[1] is None or shape[2] is None:\n    kernel_size_out = kernel_size\n  else:\n    kernel_size_out = [min(shape[1], kernel_size[0]),\n                       min(shape[2], kernel_size[1])]\n  return kernel_size_out\n\n\ninception_v2_arg_scope = inception_utils.inception_arg_scope\n"
  },
  {
    "path": "models/slim/nets/inception_v2_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for nets.inception_v2.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nimport tensorflow as tf\n\nfrom nets import inception\n\nslim = tf.contrib.slim\n\n\nclass InceptionV2Test(tf.test.TestCase):\n\n  def testBuildClassificationNetwork(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    logits, end_points = inception.inception_v2(inputs, num_classes)\n    self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [batch_size, num_classes])\n    self.assertTrue('Predictions' in end_points)\n    self.assertListEqual(end_points['Predictions'].get_shape().as_list(),\n                         [batch_size, num_classes])\n\n  def testBuildBaseNetwork(self):\n    batch_size = 5\n    height, width = 224, 224\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    mixed_5c, end_points = inception.inception_v2_base(inputs)\n    self.assertTrue(mixed_5c.op.name.startswith('InceptionV2/Mixed_5c'))\n    self.assertListEqual(mixed_5c.get_shape().as_list(),\n                         [batch_size, 7, 7, 1024])\n    expected_endpoints = ['Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b',\n                          'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a',\n                          'Mixed_5b', 'Mixed_5c', 'Conv2d_1a_7x7',\n                          'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3',\n                          'MaxPool_3a_3x3']\n    self.assertItemsEqual(end_points.keys(), expected_endpoints)\n\n  def testBuildOnlyUptoFinalEndpoint(self):\n    batch_size = 5\n    height, width = 224, 224\n    endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',\n                 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',\n                 'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',\n                 'Mixed_5a', 'Mixed_5b', 'Mixed_5c']\n    for index, endpoint in enumerate(endpoints):\n      with tf.Graph().as_default():\n        inputs = tf.random_uniform((batch_size, height, width, 3))\n        out_tensor, end_points = inception.inception_v2_base(\n            inputs, final_endpoint=endpoint)\n        self.assertTrue(out_tensor.op.name.startswith(\n            'InceptionV2/' + endpoint))\n        self.assertItemsEqual(endpoints[:index+1], end_points)\n\n  def testBuildAndCheckAllEndPointsUptoMixed5c(self):\n    batch_size = 5\n    height, width = 224, 224\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    _, end_points = inception.inception_v2_base(inputs,\n                                                final_endpoint='Mixed_5c')\n    endpoints_shapes = {'Mixed_3b': [batch_size, 28, 28, 256],\n                        'Mixed_3c': [batch_size, 28, 28, 320],\n                        'Mixed_4a': [batch_size, 14, 14, 576],\n                        'Mixed_4b': [batch_size, 14, 14, 576],\n                        'Mixed_4c': [batch_size, 14, 14, 576],\n                        'Mixed_4d': [batch_size, 14, 14, 576],\n                        'Mixed_4e': [batch_size, 14, 14, 576],\n                        'Mixed_5a': [batch_size, 7, 7, 1024],\n                        'Mixed_5b': [batch_size, 7, 7, 1024],\n                        'Mixed_5c': [batch_size, 7, 7, 1024],\n                        'Conv2d_1a_7x7': [batch_size, 112, 112, 64],\n                        'MaxPool_2a_3x3': [batch_size, 56, 56, 64],\n                        'Conv2d_2b_1x1': [batch_size, 56, 56, 64],\n                        'Conv2d_2c_3x3': [batch_size, 56, 56, 192],\n                        'MaxPool_3a_3x3': [batch_size, 28, 28, 192]}\n    self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())\n    for endpoint_name in endpoints_shapes:\n      expected_shape = endpoints_shapes[endpoint_name]\n      self.assertTrue(endpoint_name in end_points)\n      self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),\n                           expected_shape)\n\n  def testModelHasExpectedNumberOfParameters(self):\n    batch_size = 5\n    height, width = 224, 224\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    with slim.arg_scope(inception.inception_v2_arg_scope()):\n      inception.inception_v2_base(inputs)\n    total_params, _ = slim.model_analyzer.analyze_vars(\n        slim.get_model_variables())\n    self.assertAlmostEqual(10173112, total_params)\n\n  def testBuildEndPointsWithDepthMultiplierLessThanOne(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    _, end_points = inception.inception_v2(inputs, num_classes)\n\n    endpoint_keys = [key for key in end_points.keys()\n                     if key.startswith('Mixed') or key.startswith('Conv')]\n\n    _, end_points_with_multiplier = inception.inception_v2(\n        inputs, num_classes, scope='depth_multiplied_net',\n        depth_multiplier=0.5)\n\n    for key in endpoint_keys:\n      original_depth = end_points[key].get_shape().as_list()[3]\n      new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]\n      self.assertEqual(0.5 * original_depth, new_depth)\n\n  def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    _, end_points = inception.inception_v2(inputs, num_classes)\n\n    endpoint_keys = [key for key in end_points.keys()\n                     if key.startswith('Mixed') or key.startswith('Conv')]\n\n    _, end_points_with_multiplier = inception.inception_v2(\n        inputs, num_classes, scope='depth_multiplied_net',\n        depth_multiplier=2.0)\n\n    for key in endpoint_keys:\n      original_depth = end_points[key].get_shape().as_list()[3]\n      new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]\n      self.assertEqual(2.0 * original_depth, new_depth)\n\n  def testRaiseValueErrorWithInvalidDepthMultiplier(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    with self.assertRaises(ValueError):\n      _ = inception.inception_v2(inputs, num_classes, depth_multiplier=-0.1)\n    with self.assertRaises(ValueError):\n      _ = inception.inception_v2(inputs, num_classes, depth_multiplier=0.0)\n\n  def testHalfSizeImages(self):\n    batch_size = 5\n    height, width = 112, 112\n    num_classes = 1000\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    logits, end_points = inception.inception_v2(inputs, num_classes)\n    self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [batch_size, num_classes])\n    pre_pool = end_points['Mixed_5c']\n    self.assertListEqual(pre_pool.get_shape().as_list(),\n                         [batch_size, 4, 4, 1024])\n\n  def testUnknownImageShape(self):\n    tf.reset_default_graph()\n    batch_size = 2\n    height, width = 224, 224\n    num_classes = 1000\n    input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))\n    with self.test_session() as sess:\n      inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))\n      logits, end_points = inception.inception_v2(inputs, num_classes)\n      self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n      pre_pool = end_points['Mixed_5c']\n      feed_dict = {inputs: input_np}\n      tf.global_variables_initializer().run()\n      pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)\n      self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])\n\n  def testUnknowBatchSize(self):\n    batch_size = 1\n    height, width = 224, 224\n    num_classes = 1000\n\n    inputs = tf.placeholder(tf.float32, (None, height, width, 3))\n    logits, _ = inception.inception_v2(inputs, num_classes)\n    self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [None, num_classes])\n    images = tf.random_uniform((batch_size, height, width, 3))\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(logits, {inputs: images.eval()})\n      self.assertEquals(output.shape, (batch_size, num_classes))\n\n  def testEvaluation(self):\n    batch_size = 2\n    height, width = 224, 224\n    num_classes = 1000\n\n    eval_inputs = tf.random_uniform((batch_size, height, width, 3))\n    logits, _ = inception.inception_v2(eval_inputs, num_classes,\n                                       is_training=False)\n    predictions = tf.argmax(logits, 1)\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(predictions)\n      self.assertEquals(output.shape, (batch_size,))\n\n  def testTrainEvalWithReuse(self):\n    train_batch_size = 5\n    eval_batch_size = 2\n    height, width = 150, 150\n    num_classes = 1000\n\n    train_inputs = tf.random_uniform((train_batch_size, height, width, 3))\n    inception.inception_v2(train_inputs, num_classes)\n    eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))\n    logits, _ = inception.inception_v2(eval_inputs, num_classes, reuse=True)\n    predictions = tf.argmax(logits, 1)\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(predictions)\n      self.assertEquals(output.shape, (eval_batch_size,))\n\n  def testLogitsNotSqueezed(self):\n    num_classes = 25\n    images = tf.random_uniform([1, 224, 224, 3])\n    logits, _ = inception.inception_v2(images,\n                                       num_classes=num_classes,\n                                       spatial_squeeze=False)\n\n    with self.test_session() as sess:\n      tf.global_variables_initializer().run()\n      logits_out = sess.run(logits)\n      self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])\n\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "models/slim/nets/inception_v2_tsn.py",
    "content": "\"\"\"Contains the definition for inception v2 (TSN) classification network.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nfrom tensorflow.python.ops import init_ops\nfrom tensorflow.python.platform import tf_logging as logging\n\nslim = tf.contrib.slim\ntrunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)\nrandom_normal = lambda stddev: tf.random_normal_initializer(0.0, stddev)\n\ndef conv_set(net, num_outputs, filter_size, stride=1, weight_std=0.001,\n             padding=0):\n  if padding > 0:\n    net = tf.pad(net, [[0, 0], [padding, padding], [padding, padding], [0, 0]])\n  net = slim.conv2d(\n    net, num_outputs, filter_size,\n    stride=stride,\n    padding='VALID')\n  net = slim.batch_norm(net,\n                        updates_collections=tf.GraphKeys.UPDATE_OPS,\n                        epsilon=1e-5,\n                        decay=0.9,\n                        scale=True)\n  net = tf.nn.relu(net)\n  return net\n\n\ndef pool(net, pool_type='avg', kernel=3, stride=1, padding=0):\n  if pool_type == 'avg':\n    fn = slim.avg_pool2d\n  elif pool_type == 'max':\n    fn = slim.max_pool2d\n  else:\n    raise ValueError('Unknown pool type')\n  with tf.name_scope('%s_pool' % pool_type):\n    net = fn(net, [kernel, kernel], stride=stride,\n             padding='VALID' if padding==0 else 'SAME')\n  return net\n\n\ndef inception_module(net, small_module=False,\n                     num_outputs=[64,64,64,32,64,96,96],\n                     force_max_pool=False):\n  all_nets = []\n  if not small_module:\n    with tf.variable_scope('1x1'):\n      net_1 = conv_set(net, num_outputs[0], [1, 1])\n    all_nets.append(net_1)\n\n  with tf.variable_scope('3x3_reduce'):\n    net_2 = conv_set(net, num_outputs[1], [1, 1])\n  with tf.variable_scope('3x3'):\n    net_2 = conv_set(net_2, num_outputs[2], [3, 3],\n                     padding=1,\n                     stride=2 if small_module else 1)\n  all_nets.append(net_2)\n\n  with tf.variable_scope('double_3x3_reduce'):\n    net_3 = conv_set(net, num_outputs[4], [1, 1])\n  with tf.variable_scope('double_3x3_1'):\n    net_3 = conv_set(net_3, num_outputs[5], [3, 3], padding=1)\n  with tf.variable_scope('double_3x3_2'):\n    net_3 = conv_set(net_3, num_outputs[6], [3, 3], padding=1,\n                     stride=2 if small_module else 1)\n  all_nets.append(net_3)\n\n  with tf.variable_scope('pool'):\n    if small_module:\n      net_4 = pool(net, 'max', 3, 2, 1)\n    elif force_max_pool:\n      net_4 = pool(net, 'max', 3, 1, 1)\n    else:\n      net_4 = pool(net, 'avg', 3, 1, 1)\n  if not small_module:\n    with tf.variable_scope('pool_proj'):\n      net_4 = conv_set(net_4, num_outputs[3], [1, 1])\n  all_nets.append(net_4)\n\n  net = tf.concat(all_nets, 3)\n  return net\n\n\ndef inception_v2_tsn_base(inputs,\n                          final_endpoint='Mixed_5c',\n                          min_depth=16,\n                          depth_multiplier=1.0,\n                          scope=None,\n                          is_training=False,\n                          train_top_bn=False):\n  \"\"\"Inception v2 (TSN code).\n\n  \"\"\"\n\n  # end_points will collect relevant activations for external use, for example\n  # summaries or losses.\n  end_points = {}\n\n  with tf.variable_scope(scope, 'InceptionV2_TSN', [inputs]):\n      # 224 x 224 x 3\n      end_point = 'conv1/7x7_s2'\n      with tf.variable_scope(end_point):\n        with slim.arg_scope(\n          [slim.batch_norm],\n          is_training=is_training if train_top_bn else False,\n          trainable=True if train_top_bn else False):\n          net = conv_set(inputs, 64, [7, 7],\n                         stride=2,\n                         padding=3)\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 112 x 112 x 64\n      end_point = 'pool1/3x3_s2'\n      net = slim.max_pool2d(net, [3, 3], scope=end_point,\n                            stride=2, padding='SAME')\n      # net = pool(net, 'max', 3, 2, 1)\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 56 x 56 x 64\n      end_point = 'conv2/3x3_reduce'\n      with tf.variable_scope(end_point):\n        net = conv_set(net, 64, [1, 1], weight_std=0.1,\n                       padding=0)\n      # net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2,\n      #                       padding='SAME')\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      end_point = 'conv2/3x3'\n      with tf.variable_scope(end_point):\n        net = conv_set(net, 192, [3, 3], weight_std=0.1, padding=1)\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      end_point = 'pool2/3x3_s2'\n      net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2,\n                            padding='SAME')\n      # net = pool(net, 'max', 3, 2, 1)\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      # Inception module.\n      end_point = 'inception_3a'\n      with tf.variable_scope(end_point):\n        net = inception_module(net)\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      end_point = 'inception_3b'\n      with tf.variable_scope(end_point):\n        net = inception_module(net, num_outputs=[64,64,96,64,64,96,96])\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      end_point = 'inception_3c'\n      with tf.variable_scope(end_point):\n        net = inception_module(net, small_module=True,\n                               num_outputs=[-1,128,160,-1,64,96,96])\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      end_point = 'inception_4a'\n      with tf.variable_scope(end_point):\n        net = inception_module(net, num_outputs=[224,64,96,128,96,128,128])\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      end_point = 'inception_4b'\n      with tf.variable_scope(end_point):\n        net = inception_module(net, num_outputs=[192,96,128,128,96,128,128])\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      end_point = 'inception_4c'\n      with tf.variable_scope(end_point):\n        net = inception_module(net, num_outputs=[160,128,160,128,128,160,160])\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      end_point = 'inception_4d'\n      with tf.variable_scope(end_point):\n        net = inception_module(net, num_outputs=[96,128,192,128,160,192,192])\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      end_point = 'inception_4e'\n      with tf.variable_scope(end_point):\n        net = inception_module(net, small_module=True,\n                               num_outputs=[-1,128,192,-1,192,256,256])\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      end_point = 'inception_5a'\n      with tf.variable_scope(end_point):\n        net = inception_module(net, num_outputs=[352,192,320,128,160,224,224])\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      end_point = 'inception_5b'\n      with tf.variable_scope(end_point):\n        net = inception_module(net, num_outputs=[352,192,320,128,192,224,224],\n                              force_max_pool=True)\n      end_points[tf.get_variable_scope().name + '/' + end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n  return net, end_points\n\n\ndef inception_v2_tsn(inputs,\n                     num_classes=1000,\n                     is_training=True,\n                     dropout_keep_prob=0.2,\n                     min_depth=16,\n                     depth_multiplier=1.0,\n                     prediction_fn=slim.softmax,\n                     spatial_squeeze=True,\n                     reuse=None,\n                     conv_only=None,\n                     conv_endpoint='inception_5b',\n                     # conv_endpoint='inception_5a',  # testing for now\n                     train_top_bn=False,\n                     scope='InceptionV2_TSN'):\n  \"\"\"Inception v2 model for video classification.\n\n  \"\"\"\n  if depth_multiplier <= 0:\n    raise ValueError('depth_multiplier is not greater than zero.')\n\n  # Final pooling and prediction\n  with tf.variable_scope(scope, 'InceptionV2_TSN', [inputs, num_classes],\n                         reuse=reuse) as scope:\n    with slim.arg_scope([slim.dropout],\n                        is_training=is_training):\n      with slim.arg_scope([slim.batch_norm],\n                          is_training=False,\n                          trainable=False):\n        net, end_points = inception_v2_tsn_base(\n            inputs, scope=scope, min_depth=min_depth,\n            depth_multiplier=depth_multiplier,\n            final_endpoint=conv_endpoint if conv_only else None,\n            is_training=is_training,\n            train_top_bn=train_top_bn)\n        if conv_only:\n          return net, end_points\n        with tf.variable_scope('Logits'):\n          kernel_size = _reduced_kernel_size_for_small_input(net, [100, 100])\n          net = slim.avg_pool2d(net, kernel_size, padding='VALID', stride=1,\n                                scope='AvgPool_Logits_{}x{}'.format(*kernel_size))\n          # The following would give the same output/performance too.\n          # net = tf.reduce_mean(net, axis=[1,2], keep_dims=True)\n          # 1 x 1 x 1024\n          logging.info('Using dropout %f' % (1-dropout_keep_prob))\n          net = slim.dropout(net, keep_prob=dropout_keep_prob,\n                             scope='Dropout_Logits')\n          logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,\n                               normalizer_fn=None,\n                               weights_initializer=random_normal(0.001),\n                               biases_initializer=init_ops.zeros_initializer())\n          if spatial_squeeze:\n            logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')\n        end_points['Logits'] = logits\n        end_points['Predictions'] = prediction_fn(logits, scope='Predictions')\n  return logits, end_points\ninception_v2_tsn.default_image_size = 224\n\n\ndef _reduced_kernel_size_for_small_input(input_tensor, kernel_size):\n  \"\"\"Define kernel size which is automatically reduced for small input.\n\n  If the shape of the input images is unknown at graph construction time this\n  function assumes that the input images are is large enough.\n\n  Args:\n    input_tensor: input tensor of size [batch_size, height, width, channels].\n    kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]\n\n  Returns:\n    a tensor with the kernel size.\n\n  TODO(jrru): Make this function work with unknown shapes. Theoretically, this\n  can be done with the code below. Problems are two-fold: (1) If the shape was\n  known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot\n  handle tensors that define the kernel size.\n      shape = tf.shape(input_tensor)\n      return = tf.pack([tf.minimum(shape[1], kernel_size[0]),\n                        tf.minimum(shape[2], kernel_size[1])])\n\n  \"\"\"\n  shape = input_tensor.get_shape().as_list()\n  if shape[1] is None or shape[2] is None:\n    kernel_size_out = kernel_size\n  else:\n    kernel_size_out = [min(shape[1], kernel_size[0]),\n                       min(shape[2], kernel_size[1])]\n  return kernel_size_out\n\n\ndef inception_v2_tsn_arg_scope(weight_decay=0.00004):\n  \"\"\"Defines the default InceptionV2 arg scope.\n\n  Args:\n    weight_decay: The weight decay to use for regularizing the model.\n\n  Returns:\n    An `arg_scope` to use for the inception v3 model.\n  \"\"\"\n  batch_norm_params = {\n      # Decay for the moving averages.\n      'decay': 0.9997,\n      # epsilon to prevent 0s in variance.\n      'epsilon': 0.001,\n      # collection containing update_ops.\n      'updates_collections': tf.GraphKeys.UPDATE_OPS,\n      # Allow a gamma variable\n      'scale': True,\n  }\n\n  # Set weight_decay for weights in Conv and FC layers.\n  with slim.arg_scope([slim.conv2d, slim.fully_connected],\n                      weights_regularizer=slim.l2_regularizer(weight_decay)):\n    with slim.arg_scope(\n        [slim.conv2d],\n        weights_initializer=tf.contrib.layers.xavier_initializer(),\n        activation_fn=None,  # manually added later, as I need to add BN after\n                             # the convolution\n        biases_initializer=init_ops.constant_initializer(value=0.2),\n        normalizer_fn=None) as sc:\n      return sc\n"
  },
  {
    "path": "models/slim/nets/inception_v3.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains the definition for inception v3 classification network.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom nets import inception_utils\n\nslim = tf.contrib.slim\ntrunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)\nrandom_normal = lambda stddev: tf.random_normal_initializer(0.0, stddev)\n\n\ndef inception_v3_base(inputs,\n                      final_endpoint='Mixed_7c',\n                      min_depth=16,\n                      depth_multiplier=1.0,\n                      scope=None):\n  \"\"\"Inception model from http://arxiv.org/abs/1512.00567.\n\n  Constructs an Inception v3 network from inputs to the given final endpoint.\n  This method can construct the network up to the final inception block\n  Mixed_7c.\n\n  Note that the names of the layers in the paper do not correspond to the names\n  of the endpoints registered by this function although they build the same\n  network.\n\n  Here is a mapping from the old_names to the new names:\n  Old name          | New name\n  =======================================\n  conv0             | Conv2d_1a_3x3\n  conv1             | Conv2d_2a_3x3\n  conv2             | Conv2d_2b_3x3\n  pool1             | MaxPool_3a_3x3\n  conv3             | Conv2d_3b_1x1\n  conv4             | Conv2d_4a_3x3\n  pool2             | MaxPool_5a_3x3\n  mixed_35x35x256a  | Mixed_5b\n  mixed_35x35x288a  | Mixed_5c\n  mixed_35x35x288b  | Mixed_5d\n  mixed_17x17x768a  | Mixed_6a\n  mixed_17x17x768b  | Mixed_6b\n  mixed_17x17x768c  | Mixed_6c\n  mixed_17x17x768d  | Mixed_6d\n  mixed_17x17x768e  | Mixed_6e\n  mixed_8x8x1280a   | Mixed_7a\n  mixed_8x8x2048a   | Mixed_7b\n  mixed_8x8x2048b   | Mixed_7c\n\n  Args:\n    inputs: a tensor of size [batch_size, height, width, channels].\n    final_endpoint: specifies the endpoint to construct the network up to. It\n      can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',\n      'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',\n      'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c',\n      'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'].\n    min_depth: Minimum depth value (number of channels) for all convolution ops.\n      Enforced when depth_multiplier < 1, and not an active constraint when\n      depth_multiplier >= 1.\n    depth_multiplier: Float multiplier for the depth (number of channels)\n      for all convolution ops. The value must be greater than zero. Typical\n      usage will be to set this value in (0, 1) to reduce the number of\n      parameters or computation cost of the model.\n    scope: Optional variable_scope.\n\n  Returns:\n    tensor_out: output tensor corresponding to the final_endpoint.\n    end_points: a set of activations for external use, for example summaries or\n                losses.\n\n  Raises:\n    ValueError: if final_endpoint is not set to one of the predefined values,\n                or depth_multiplier <= 0\n  \"\"\"\n  # end_points will collect relevant activations for external use, for example\n  # summaries or losses.\n  end_points = {}\n\n  if depth_multiplier <= 0:\n    raise ValueError('depth_multiplier is not greater than zero.')\n  depth = lambda d: max(int(d * depth_multiplier), min_depth)\n\n  with tf.variable_scope(scope, 'InceptionV3', [inputs]):\n    with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],\n                        stride=1, padding='VALID'):\n      # 299 x 299 x 3\n      end_point = 'Conv2d_1a_3x3'\n      net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 149 x 149 x 32\n      end_point = 'Conv2d_2a_3x3'\n      net = slim.conv2d(net, depth(32), [3, 3], scope=end_point)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 147 x 147 x 32\n      end_point = 'Conv2d_2b_3x3'\n      net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 147 x 147 x 64\n      end_point = 'MaxPool_3a_3x3'\n      net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 73 x 73 x 64\n      end_point = 'Conv2d_3b_1x1'\n      net = slim.conv2d(net, depth(80), [1, 1], scope=end_point)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 73 x 73 x 80.\n      end_point = 'Conv2d_4a_3x3'\n      net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 71 x 71 x 192.\n      end_point = 'MaxPool_5a_3x3'\n      net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # 35 x 35 x 192.\n\n    # Inception blocks\n    with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],\n                        stride=1, padding='SAME'):\n      # mixed: 35 x 35 x 256.\n      end_point = 'Mixed_5b'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],\n                                 scope='Conv2d_0b_5x5')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],\n                                 scope='Conv2d_0c_3x3')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(branch_3, depth(32), [1, 1],\n                                 scope='Conv2d_0b_1x1')\n        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      # mixed_1: 35 x 35 x 288.\n      end_point = 'Mixed_5c'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],\n                                 scope='Conv_1_0c_5x5')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(net, depth(64), [1, 1],\n                                 scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],\n                                 scope='Conv2d_0c_3x3')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],\n                                 scope='Conv2d_0b_1x1')\n        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      # mixed_2: 35 x 35 x 288.\n      end_point = 'Mixed_5d'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],\n                                 scope='Conv2d_0b_5x5')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],\n                                 scope='Conv2d_0c_3x3')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],\n                                 scope='Conv2d_0b_1x1')\n        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      # mixed_3: 17 x 17 x 768.\n      end_point = 'Mixed_6a'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2,\n                                 padding='VALID', scope='Conv2d_1a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],\n                                 scope='Conv2d_0b_3x3')\n          branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2,\n                                 padding='VALID', scope='Conv2d_1a_1x1')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',\n                                     scope='MaxPool_1a_3x3')\n        net = tf.concat([branch_0, branch_1, branch_2], 3)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      # mixed4: 17 x 17 x 768.\n      end_point = 'Mixed_6b'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(128), [1, 7],\n                                 scope='Conv2d_0b_1x7')\n          branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],\n                                 scope='Conv2d_0c_7x1')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],\n                                 scope='Conv2d_0b_7x1')\n          branch_2 = slim.conv2d(branch_2, depth(128), [1, 7],\n                                 scope='Conv2d_0c_1x7')\n          branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],\n                                 scope='Conv2d_0d_7x1')\n          branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],\n                                 scope='Conv2d_0e_1x7')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],\n                                 scope='Conv2d_0b_1x1')\n        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      # mixed_5: 17 x 17 x 768.\n      end_point = 'Mixed_6c'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],\n                                 scope='Conv2d_0b_1x7')\n          branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],\n                                 scope='Conv2d_0c_7x1')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],\n                                 scope='Conv2d_0b_7x1')\n          branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],\n                                 scope='Conv2d_0c_1x7')\n          branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],\n                                 scope='Conv2d_0d_7x1')\n          branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],\n                                 scope='Conv2d_0e_1x7')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],\n                                 scope='Conv2d_0b_1x1')\n        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # mixed_6: 17 x 17 x 768.\n      end_point = 'Mixed_6d'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],\n                                 scope='Conv2d_0b_1x7')\n          branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],\n                                 scope='Conv2d_0c_7x1')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],\n                                 scope='Conv2d_0b_7x1')\n          branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],\n                                 scope='Conv2d_0c_1x7')\n          branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],\n                                 scope='Conv2d_0d_7x1')\n          branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],\n                                 scope='Conv2d_0e_1x7')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],\n                                 scope='Conv2d_0b_1x1')\n        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      # mixed_7: 17 x 17 x 768.\n      end_point = 'Mixed_6e'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],\n                                 scope='Conv2d_0b_1x7')\n          branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],\n                                 scope='Conv2d_0c_7x1')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],\n                                 scope='Conv2d_0b_7x1')\n          branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],\n                                 scope='Conv2d_0c_1x7')\n          branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],\n                                 scope='Conv2d_0d_7x1')\n          branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],\n                                 scope='Conv2d_0e_1x7')\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],\n                                 scope='Conv2d_0b_1x1')\n        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      # mixed_8: 8 x 8 x 1280.\n      end_point = 'Mixed_7a'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')\n          branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2,\n                                 padding='VALID', scope='Conv2d_1a_3x3')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],\n                                 scope='Conv2d_0b_1x7')\n          branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],\n                                 scope='Conv2d_0c_7x1')\n          branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2,\n                                 padding='VALID', scope='Conv2d_1a_3x3')\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',\n                                     scope='MaxPool_1a_3x3')\n        net = tf.concat([branch_0, branch_1, branch_2], 3)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n      # mixed_9: 8 x 8 x 2048.\n      end_point = 'Mixed_7b'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')\n          branch_1 = tf.concat([\n              slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),\n              slim.conv2d(branch_1, depth(384), [3, 1],\n                          scope='Conv2d_0b_3x1')], 3)\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(\n              branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')\n          branch_2 = tf.concat([\n              slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),\n              slim.conv2d(branch_2, depth(384), [3, 1],\n                          scope='Conv2d_0d_3x1')], 3)\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(\n              branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')\n        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n\n      # mixed_10: 8 x 8 x 2048.\n      end_point = 'Mixed_7c'\n      with tf.variable_scope(end_point):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')\n          branch_1 = tf.concat([\n              slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),\n              slim.conv2d(branch_1, depth(384), [3, 1],\n                          scope='Conv2d_0c_3x1')], 3)\n        with tf.variable_scope('Branch_2'):\n          branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')\n          branch_2 = slim.conv2d(\n              branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')\n          branch_2 = tf.concat([\n              slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),\n              slim.conv2d(branch_2, depth(384), [3, 1],\n                          scope='Conv2d_0d_3x1')], 3)\n        with tf.variable_scope('Branch_3'):\n          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')\n          branch_3 = slim.conv2d(\n              branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')\n        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)\n      end_points[end_point] = net\n      if end_point == final_endpoint: return net, end_points\n    raise ValueError('Unknown final endpoint %s' % final_endpoint)\n\n\ndef inception_v3(inputs,\n                 num_classes=1000,\n                 is_training=True,\n                 dropout_keep_prob=0.8,\n                 min_depth=16,\n                 depth_multiplier=1.0,\n                 prediction_fn=slim.softmax,\n                 spatial_squeeze=True,\n                 reuse=None,\n                 scope='InceptionV3'):\n  \"\"\"Inception model from http://arxiv.org/abs/1512.00567.\n\n  \"Rethinking the Inception Architecture for Computer Vision\"\n\n  Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,\n  Zbigniew Wojna.\n\n  With the default arguments this method constructs the exact model defined in\n  the paper. However, one can experiment with variations of the inception_v3\n  network by changing arguments dropout_keep_prob, min_depth and\n  depth_multiplier.\n\n  The default image size used to train this network is 299x299.\n\n  Args:\n    inputs: a tensor of size [batch_size, height, width, channels].\n    num_classes: number of predicted classes.\n    is_training: whether is training or not.\n    dropout_keep_prob: the percentage of activation values that are retained.\n    min_depth: Minimum depth value (number of channels) for all convolution ops.\n      Enforced when depth_multiplier < 1, and not an active constraint when\n      depth_multiplier >= 1.\n    depth_multiplier: Float multiplier for the depth (number of channels)\n      for all convolution ops. The value must be greater than zero. Typical\n      usage will be to set this value in (0, 1) to reduce the number of\n      parameters or computation cost of the model.\n    prediction_fn: a function to get predictions out of logits.\n    spatial_squeeze: if True, logits is of shape is [B, C], if false logits is\n        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.\n    reuse: whether or not the network and its variables should be reused. To be\n      able to reuse 'scope' must be given.\n    scope: Optional variable_scope.\n\n  Returns:\n    logits: the pre-softmax activations, a tensor of size\n      [batch_size, num_classes]\n    end_points: a dictionary from components of the network to the corresponding\n      activation.\n\n  Raises:\n    ValueError: if 'depth_multiplier' is less than or equal to zero.\n  \"\"\"\n  if depth_multiplier <= 0:\n    raise ValueError('depth_multiplier is not greater than zero.')\n  depth = lambda d: max(int(d * depth_multiplier), min_depth)\n\n  with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes],\n                         reuse=reuse) as scope:\n    with slim.arg_scope([slim.batch_norm, slim.dropout],\n                        is_training=is_training):\n      net, end_points = inception_v3_base(\n          inputs, scope=scope, min_depth=min_depth,\n          depth_multiplier=depth_multiplier)\n\n      # Auxiliary Head logits\n      with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],\n                          stride=1, padding='SAME'):\n        aux_logits = end_points['Mixed_6e']\n        with tf.variable_scope('AuxLogits'):\n          # rgirdhar: for large images, in pose\n          kernel_size = _reduced_kernel_size_for_small_input(net, [30, 30])\n          aux_logits = slim.avg_pool2d(\n              aux_logits, kernel_size, stride=3, padding='VALID',\n              scope='AvgPool_1a_5x5')\n          aux_logits = slim.conv2d(aux_logits, depth(128), [1, 1],\n                                   scope='Conv2d_1b_1x1')\n\n          # Shape of feature map before the final layer.\n          kernel_size = _reduced_kernel_size_for_small_input(\n              aux_logits, [5, 5])\n          aux_logits = slim.conv2d(\n              aux_logits, depth(768), kernel_size,\n              weights_initializer=trunc_normal(0.01),\n              padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size))\n          aux_logits = slim.conv2d(\n              aux_logits, num_classes, [1, 1], activation_fn=None,\n              normalizer_fn=None, weights_initializer=trunc_normal(0.001),\n              scope='Conv2d_2b_1x1')\n          if spatial_squeeze:\n            aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze')\n          end_points['AuxLogits'] = aux_logits\n\n      # Final pooling and prediction\n      with tf.variable_scope('Logits'):\n        # kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8])\n        # rgirdhar: for large images, in pose\n        kernel_size = _reduced_kernel_size_for_small_input(net, [30, 30])\n        net = slim.avg_pool2d(net, kernel_size, padding='VALID',\n                              scope='AvgPool_1a_{}x{}'.format(*kernel_size))\n        # 1 x 1 x 2048\n        net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')\n        end_points['PreLogits'] = net\n        # 2048\n        logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,\n                             normalizer_fn=None, scope='Conv2d_1c_1x1')\n        if spatial_squeeze:\n          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')\n        # 1000\n      end_points['Logits'] = logits\n      end_points['Predictions'] = prediction_fn(logits, scope='Predictions')\n  return logits, end_points\ninception_v3.default_image_size = 299\n\n\ndef _reduced_kernel_size_for_small_input(input_tensor, kernel_size):\n  \"\"\"Define kernel size which is automatically reduced for small input.\n\n  If the shape of the input images is unknown at graph construction time this\n  function assumes that the input images are is large enough.\n\n  Args:\n    input_tensor: input tensor of size [batch_size, height, width, channels].\n    kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]\n\n  Returns:\n    a tensor with the kernel size.\n\n  TODO(jrru): Make this function work with unknown shapes. Theoretically, this\n  can be done with the code below. Problems are two-fold: (1) If the shape was\n  known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot\n  handle tensors that define the kernel size.\n      shape = tf.shape(input_tensor)\n      return = tf.pack([tf.minimum(shape[1], kernel_size[0]),\n                        tf.minimum(shape[2], kernel_size[1])])\n\n  \"\"\"\n  shape = input_tensor.get_shape().as_list()\n  if shape[1] is None or shape[2] is None:\n    kernel_size_out = kernel_size\n  else:\n    kernel_size_out = [min(shape[1], kernel_size[0]),\n                       min(shape[2], kernel_size[1])]\n  return kernel_size_out\n\n\ninception_v3_arg_scope = inception_utils.inception_arg_scope\n"
  },
  {
    "path": "models/slim/nets/inception_v3_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for nets.inception_v1.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nimport tensorflow as tf\n\nfrom nets import inception\n\nslim = tf.contrib.slim\n\n\nclass InceptionV3Test(tf.test.TestCase):\n\n  def testBuildClassificationNetwork(self):\n    batch_size = 5\n    height, width = 299, 299\n    num_classes = 1000\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    logits, end_points = inception.inception_v3(inputs, num_classes)\n    self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [batch_size, num_classes])\n    self.assertTrue('Predictions' in end_points)\n    self.assertListEqual(end_points['Predictions'].get_shape().as_list(),\n                         [batch_size, num_classes])\n\n  def testBuildBaseNetwork(self):\n    batch_size = 5\n    height, width = 299, 299\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    final_endpoint, end_points = inception.inception_v3_base(inputs)\n    self.assertTrue(final_endpoint.op.name.startswith(\n        'InceptionV3/Mixed_7c'))\n    self.assertListEqual(final_endpoint.get_shape().as_list(),\n                         [batch_size, 8, 8, 2048])\n    expected_endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',\n                          'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',\n                          'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',\n                          'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',\n                          'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']\n    self.assertItemsEqual(end_points.keys(), expected_endpoints)\n\n  def testBuildOnlyUptoFinalEndpoint(self):\n    batch_size = 5\n    height, width = 299, 299\n    endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',\n                 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',\n                 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',\n                 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',\n                 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']\n\n    for index, endpoint in enumerate(endpoints):\n      with tf.Graph().as_default():\n        inputs = tf.random_uniform((batch_size, height, width, 3))\n        out_tensor, end_points = inception.inception_v3_base(\n            inputs, final_endpoint=endpoint)\n        self.assertTrue(out_tensor.op.name.startswith(\n            'InceptionV3/' + endpoint))\n        self.assertItemsEqual(endpoints[:index+1], end_points)\n\n  def testBuildAndCheckAllEndPointsUptoMixed7c(self):\n    batch_size = 5\n    height, width = 299, 299\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    _, end_points = inception.inception_v3_base(\n        inputs, final_endpoint='Mixed_7c')\n    endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32],\n                        'Conv2d_2a_3x3': [batch_size, 147, 147, 32],\n                        'Conv2d_2b_3x3': [batch_size, 147, 147, 64],\n                        'MaxPool_3a_3x3': [batch_size, 73, 73, 64],\n                        'Conv2d_3b_1x1': [batch_size, 73, 73, 80],\n                        'Conv2d_4a_3x3': [batch_size, 71, 71, 192],\n                        'MaxPool_5a_3x3': [batch_size, 35, 35, 192],\n                        'Mixed_5b': [batch_size, 35, 35, 256],\n                        'Mixed_5c': [batch_size, 35, 35, 288],\n                        'Mixed_5d': [batch_size, 35, 35, 288],\n                        'Mixed_6a': [batch_size, 17, 17, 768],\n                        'Mixed_6b': [batch_size, 17, 17, 768],\n                        'Mixed_6c': [batch_size, 17, 17, 768],\n                        'Mixed_6d': [batch_size, 17, 17, 768],\n                        'Mixed_6e': [batch_size, 17, 17, 768],\n                        'Mixed_7a': [batch_size, 8, 8, 1280],\n                        'Mixed_7b': [batch_size, 8, 8, 2048],\n                        'Mixed_7c': [batch_size, 8, 8, 2048]}\n    self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())\n    for endpoint_name in endpoints_shapes:\n      expected_shape = endpoints_shapes[endpoint_name]\n      self.assertTrue(endpoint_name in end_points)\n      self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),\n                           expected_shape)\n\n  def testModelHasExpectedNumberOfParameters(self):\n    batch_size = 5\n    height, width = 299, 299\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    with slim.arg_scope(inception.inception_v3_arg_scope()):\n      inception.inception_v3_base(inputs)\n    total_params, _ = slim.model_analyzer.analyze_vars(\n        slim.get_model_variables())\n    self.assertAlmostEqual(21802784, total_params)\n\n  def testBuildEndPoints(self):\n    batch_size = 5\n    height, width = 299, 299\n    num_classes = 1000\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    _, end_points = inception.inception_v3(inputs, num_classes)\n    self.assertTrue('Logits' in end_points)\n    logits = end_points['Logits']\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [batch_size, num_classes])\n    self.assertTrue('AuxLogits' in end_points)\n    aux_logits = end_points['AuxLogits']\n    self.assertListEqual(aux_logits.get_shape().as_list(),\n                         [batch_size, num_classes])\n    self.assertTrue('Mixed_7c' in end_points)\n    pre_pool = end_points['Mixed_7c']\n    self.assertListEqual(pre_pool.get_shape().as_list(),\n                         [batch_size, 8, 8, 2048])\n    self.assertTrue('PreLogits' in end_points)\n    pre_logits = end_points['PreLogits']\n    self.assertListEqual(pre_logits.get_shape().as_list(),\n                         [batch_size, 1, 1, 2048])\n\n  def testBuildEndPointsWithDepthMultiplierLessThanOne(self):\n    batch_size = 5\n    height, width = 299, 299\n    num_classes = 1000\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    _, end_points = inception.inception_v3(inputs, num_classes)\n\n    endpoint_keys = [key for key in end_points.keys()\n                     if key.startswith('Mixed') or key.startswith('Conv')]\n\n    _, end_points_with_multiplier = inception.inception_v3(\n        inputs, num_classes, scope='depth_multiplied_net',\n        depth_multiplier=0.5)\n\n    for key in endpoint_keys:\n      original_depth = end_points[key].get_shape().as_list()[3]\n      new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]\n      self.assertEqual(0.5 * original_depth, new_depth)\n\n  def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self):\n    batch_size = 5\n    height, width = 299, 299\n    num_classes = 1000\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    _, end_points = inception.inception_v3(inputs, num_classes)\n\n    endpoint_keys = [key for key in end_points.keys()\n                     if key.startswith('Mixed') or key.startswith('Conv')]\n\n    _, end_points_with_multiplier = inception.inception_v3(\n        inputs, num_classes, scope='depth_multiplied_net',\n        depth_multiplier=2.0)\n\n    for key in endpoint_keys:\n      original_depth = end_points[key].get_shape().as_list()[3]\n      new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]\n      self.assertEqual(2.0 * original_depth, new_depth)\n\n  def testRaiseValueErrorWithInvalidDepthMultiplier(self):\n    batch_size = 5\n    height, width = 299, 299\n    num_classes = 1000\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    with self.assertRaises(ValueError):\n      _ = inception.inception_v3(inputs, num_classes, depth_multiplier=-0.1)\n    with self.assertRaises(ValueError):\n      _ = inception.inception_v3(inputs, num_classes, depth_multiplier=0.0)\n\n  def testHalfSizeImages(self):\n    batch_size = 5\n    height, width = 150, 150\n    num_classes = 1000\n\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    logits, end_points = inception.inception_v3(inputs, num_classes)\n    self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [batch_size, num_classes])\n    pre_pool = end_points['Mixed_7c']\n    self.assertListEqual(pre_pool.get_shape().as_list(),\n                         [batch_size, 3, 3, 2048])\n\n  def testUnknownImageShape(self):\n    tf.reset_default_graph()\n    batch_size = 2\n    height, width = 299, 299\n    num_classes = 1000\n    input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))\n    with self.test_session() as sess:\n      inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))\n      logits, end_points = inception.inception_v3(inputs, num_classes)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n      pre_pool = end_points['Mixed_7c']\n      feed_dict = {inputs: input_np}\n      tf.global_variables_initializer().run()\n      pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)\n      self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 8, 2048])\n\n  def testUnknowBatchSize(self):\n    batch_size = 1\n    height, width = 299, 299\n    num_classes = 1000\n\n    inputs = tf.placeholder(tf.float32, (None, height, width, 3))\n    logits, _ = inception.inception_v3(inputs, num_classes)\n    self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [None, num_classes])\n    images = tf.random_uniform((batch_size, height, width, 3))\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(logits, {inputs: images.eval()})\n      self.assertEquals(output.shape, (batch_size, num_classes))\n\n  def testEvaluation(self):\n    batch_size = 2\n    height, width = 299, 299\n    num_classes = 1000\n\n    eval_inputs = tf.random_uniform((batch_size, height, width, 3))\n    logits, _ = inception.inception_v3(eval_inputs, num_classes,\n                                       is_training=False)\n    predictions = tf.argmax(logits, 1)\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(predictions)\n      self.assertEquals(output.shape, (batch_size,))\n\n  def testTrainEvalWithReuse(self):\n    train_batch_size = 5\n    eval_batch_size = 2\n    height, width = 150, 150\n    num_classes = 1000\n\n    train_inputs = tf.random_uniform((train_batch_size, height, width, 3))\n    inception.inception_v3(train_inputs, num_classes)\n    eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))\n    logits, _ = inception.inception_v3(eval_inputs, num_classes,\n                                       is_training=False, reuse=True)\n    predictions = tf.argmax(logits, 1)\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(predictions)\n      self.assertEquals(output.shape, (eval_batch_size,))\n\n  def testLogitsNotSqueezed(self):\n    num_classes = 25\n    images = tf.random_uniform([1, 299, 299, 3])\n    logits, _ = inception.inception_v3(images,\n                                       num_classes=num_classes,\n                                       spatial_squeeze=False)\n\n    with self.test_session() as sess:\n      tf.global_variables_initializer().run()\n      logits_out = sess.run(logits)\n      self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])\n\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "models/slim/nets/inception_v4.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains the definition of the Inception V4 architecture.\n\nAs described in http://arxiv.org/abs/1602.07261.\n\n  Inception-v4, Inception-ResNet and the Impact of Residual Connections\n    on Learning\n  Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom nets import inception_utils\n\nslim = tf.contrib.slim\n\n\ndef block_inception_a(inputs, scope=None, reuse=None):\n  \"\"\"Builds Inception-A block for Inception v4 network.\"\"\"\n  # By default use stride=1 and SAME padding\n  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],\n                      stride=1, padding='SAME'):\n    with tf.variable_scope(scope, 'BlockInceptionA', [inputs], reuse=reuse):\n      with tf.variable_scope('Branch_0'):\n        branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1')\n      with tf.variable_scope('Branch_1'):\n        branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')\n        branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')\n      with tf.variable_scope('Branch_2'):\n        branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')\n        branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')\n        branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')\n      with tf.variable_scope('Branch_3'):\n        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')\n        branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1')\n      return tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n\n\ndef block_reduction_a(inputs, scope=None, reuse=None):\n  \"\"\"Builds Reduction-A block for Inception v4 network.\"\"\"\n  # By default use stride=1 and SAME padding\n  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],\n                      stride=1, padding='SAME'):\n    with tf.variable_scope(scope, 'BlockReductionA', [inputs], reuse=reuse):\n      with tf.variable_scope('Branch_0'):\n        branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID',\n                               scope='Conv2d_1a_3x3')\n      with tf.variable_scope('Branch_1'):\n        branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')\n        branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')\n        branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2,\n                               padding='VALID', scope='Conv2d_1a_3x3')\n      with tf.variable_scope('Branch_2'):\n        branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',\n                                   scope='MaxPool_1a_3x3')\n      return tf.concat(3, [branch_0, branch_1, branch_2])\n\n\ndef block_inception_b(inputs, scope=None, reuse=None):\n  \"\"\"Builds Inception-B block for Inception v4 network.\"\"\"\n  # By default use stride=1 and SAME padding\n  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],\n                      stride=1, padding='SAME'):\n    with tf.variable_scope(scope, 'BlockInceptionB', [inputs], reuse=reuse):\n      with tf.variable_scope('Branch_0'):\n        branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')\n      with tf.variable_scope('Branch_1'):\n        branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')\n        branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7')\n        branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1')\n      with tf.variable_scope('Branch_2'):\n        branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')\n        branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1')\n        branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7')\n        branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1')\n        branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7')\n      with tf.variable_scope('Branch_3'):\n        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')\n        branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')\n      return tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n\n\ndef block_reduction_b(inputs, scope=None, reuse=None):\n  \"\"\"Builds Reduction-B block for Inception v4 network.\"\"\"\n  # By default use stride=1 and SAME padding\n  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],\n                      stride=1, padding='SAME'):\n    with tf.variable_scope(scope, 'BlockReductionB', [inputs], reuse=reuse):\n      with tf.variable_scope('Branch_0'):\n        branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')\n        branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2,\n                               padding='VALID', scope='Conv2d_1a_3x3')\n      with tf.variable_scope('Branch_1'):\n        branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')\n        branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7')\n        branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1')\n        branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2,\n                               padding='VALID', scope='Conv2d_1a_3x3')\n      with tf.variable_scope('Branch_2'):\n        branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',\n                                   scope='MaxPool_1a_3x3')\n      return tf.concat(3, [branch_0, branch_1, branch_2])\n\n\ndef block_inception_c(inputs, scope=None, reuse=None):\n  \"\"\"Builds Inception-C block for Inception v4 network.\"\"\"\n  # By default use stride=1 and SAME padding\n  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],\n                      stride=1, padding='SAME'):\n    with tf.variable_scope(scope, 'BlockInceptionC', [inputs], reuse=reuse):\n      with tf.variable_scope('Branch_0'):\n        branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')\n      with tf.variable_scope('Branch_1'):\n        branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')\n        branch_1 = tf.concat(3, [\n            slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'),\n            slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')])\n      with tf.variable_scope('Branch_2'):\n        branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')\n        branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1')\n        branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3')\n        branch_2 = tf.concat(3, [\n            slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'),\n            slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')])\n      with tf.variable_scope('Branch_3'):\n        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')\n        branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1')\n      return tf.concat(3, [branch_0, branch_1, branch_2, branch_3])\n\n\ndef inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):\n  \"\"\"Creates the Inception V4 network up to the given final endpoint.\n\n  Args:\n    inputs: a 4-D tensor of size [batch_size, height, width, 3].\n    final_endpoint: specifies the endpoint to construct the network up to.\n      It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',\n      'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',\n      'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e',\n      'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c',\n      'Mixed_7d']\n    scope: Optional variable_scope.\n\n  Returns:\n    logits: the logits outputs of the model.\n    end_points: the set of end_points from the inception model.\n\n  Raises:\n    ValueError: if final_endpoint is not set to one of the predefined values,\n  \"\"\"\n  end_points = {}\n\n  def add_and_check_final(name, net):\n    end_points[name] = net\n    return name == final_endpoint\n\n  with tf.variable_scope(scope, 'InceptionV4', [inputs]):\n    with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],\n                        stride=1, padding='SAME'):\n      # 299 x 299 x 3\n      net = slim.conv2d(inputs, 32, [3, 3], stride=2,\n                        padding='VALID', scope='Conv2d_1a_3x3')\n      if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points\n      # 149 x 149 x 32\n      net = slim.conv2d(net, 32, [3, 3], padding='VALID',\n                        scope='Conv2d_2a_3x3')\n      if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points\n      # 147 x 147 x 32\n      net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3')\n      if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points\n      # 147 x 147 x 64\n      with tf.variable_scope('Mixed_3a'):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',\n                                     scope='MaxPool_0a_3x3')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID',\n                                 scope='Conv2d_0a_3x3')\n        net = tf.concat(3, [branch_0, branch_1])\n        if add_and_check_final('Mixed_3a', net): return net, end_points\n\n      # 73 x 73 x 160\n      with tf.variable_scope('Mixed_4a'):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')\n          branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID',\n                                 scope='Conv2d_1a_3x3')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')\n          branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7')\n          branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1')\n          branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID',\n                                 scope='Conv2d_1a_3x3')\n        net = tf.concat(3, [branch_0, branch_1])\n        if add_and_check_final('Mixed_4a', net): return net, end_points\n\n      # 71 x 71 x 192\n      with tf.variable_scope('Mixed_5a'):\n        with tf.variable_scope('Branch_0'):\n          branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID',\n                                 scope='Conv2d_1a_3x3')\n        with tf.variable_scope('Branch_1'):\n          branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',\n                                     scope='MaxPool_1a_3x3')\n        net = tf.concat(3, [branch_0, branch_1])\n        if add_and_check_final('Mixed_5a', net): return net, end_points\n\n      # 35 x 35 x 384\n      # 4 x Inception-A blocks\n      for idx in xrange(4):\n        block_scope = 'Mixed_5' + chr(ord('b') + idx)\n        net = block_inception_a(net, block_scope)\n        if add_and_check_final(block_scope, net): return net, end_points\n\n      # 35 x 35 x 384\n      # Reduction-A block\n      net = block_reduction_a(net, 'Mixed_6a')\n      if add_and_check_final('Mixed_6a', net): return net, end_points\n\n      # 17 x 17 x 1024\n      # 7 x Inception-B blocks\n      for idx in xrange(7):\n        block_scope = 'Mixed_6' + chr(ord('b') + idx)\n        net = block_inception_b(net, block_scope)\n        if add_and_check_final(block_scope, net): return net, end_points\n\n      # 17 x 17 x 1024\n      # Reduction-B block\n      net = block_reduction_b(net, 'Mixed_7a')\n      if add_and_check_final('Mixed_7a', net): return net, end_points\n\n      # 8 x 8 x 1536\n      # 3 x Inception-C blocks\n      for idx in xrange(3):\n        block_scope = 'Mixed_7' + chr(ord('b') + idx)\n        net = block_inception_c(net, block_scope)\n        if add_and_check_final(block_scope, net): return net, end_points\n  raise ValueError('Unknown final endpoint %s' % final_endpoint)\n\n\ndef inception_v4(inputs, num_classes=1001, is_training=True,\n                 dropout_keep_prob=0.8,\n                 reuse=None,\n                 scope='InceptionV4',\n                 create_aux_logits=True):\n  \"\"\"Creates the Inception V4 model.\n\n  Args:\n    inputs: a 4-D tensor of size [batch_size, height, width, 3].\n    num_classes: number of predicted classes.\n    is_training: whether is training or not.\n    dropout_keep_prob: float, the fraction to keep before final layer.\n    reuse: whether or not the network and its variables should be reused. To be\n      able to reuse 'scope' must be given.\n    scope: Optional variable_scope.\n    create_aux_logits: Whether to include the auxilliary logits.\n\n  Returns:\n    logits: the logits outputs of the model.\n    end_points: the set of end_points from the inception model.\n  \"\"\"\n  end_points = {}\n  with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope:\n    with slim.arg_scope([slim.batch_norm, slim.dropout],\n                        is_training=is_training):\n      net, end_points = inception_v4_base(inputs, scope=scope)\n\n      with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],\n                          stride=1, padding='SAME'):\n        # Auxiliary Head logits\n        if create_aux_logits:\n          with tf.variable_scope('AuxLogits'):\n            # 17 x 17 x 1024\n            aux_logits = end_points['Mixed_6h']\n            aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3,\n                                         padding='VALID',\n                                         scope='AvgPool_1a_5x5')\n            aux_logits = slim.conv2d(aux_logits, 128, [1, 1],\n                                     scope='Conv2d_1b_1x1')\n            aux_logits = slim.conv2d(aux_logits, 768,\n                                     aux_logits.get_shape()[1:3],\n                                     padding='VALID', scope='Conv2d_2a')\n            aux_logits = slim.flatten(aux_logits)\n            aux_logits = slim.fully_connected(aux_logits, num_classes,\n                                              activation_fn=None,\n                                              scope='Aux_logits')\n            end_points['AuxLogits'] = aux_logits\n\n        # Final pooling and prediction\n        with tf.variable_scope('Logits'):\n          # 8 x 8 x 1536\n          net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',\n                                scope='AvgPool_1a')\n          # 1 x 1 x 1536\n          net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b')\n          net = slim.flatten(net, scope='PreLogitsFlatten')\n          end_points['PreLogitsFlatten'] = net\n          # 1536\n          logits = slim.fully_connected(net, num_classes, activation_fn=None,\n                                        scope='Logits')\n          end_points['Logits'] = logits\n          end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')\n    return logits, end_points\ninception_v4.default_image_size = 299\n\n\ninception_v4_arg_scope = inception_utils.inception_arg_scope\n"
  },
  {
    "path": "models/slim/nets/inception_v4_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for slim.inception_v4.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom nets import inception\n\n\nclass InceptionTest(tf.test.TestCase):\n\n  def testBuildLogits(self):\n    batch_size = 5\n    height, width = 299, 299\n    num_classes = 1000\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    logits, end_points = inception.inception_v4(inputs, num_classes)\n    auxlogits = end_points['AuxLogits']\n    predictions = end_points['Predictions']\n    self.assertTrue(auxlogits.op.name.startswith('InceptionV4/AuxLogits'))\n    self.assertListEqual(auxlogits.get_shape().as_list(),\n                         [batch_size, num_classes])\n    self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [batch_size, num_classes])\n    self.assertTrue(predictions.op.name.startswith(\n        'InceptionV4/Logits/Predictions'))\n    self.assertListEqual(predictions.get_shape().as_list(),\n                         [batch_size, num_classes])\n\n  def testBuildWithoutAuxLogits(self):\n    batch_size = 5\n    height, width = 299, 299\n    num_classes = 1000\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    logits, endpoints = inception.inception_v4(inputs, num_classes,\n                                               create_aux_logits=False)\n    self.assertFalse('AuxLogits' in endpoints)\n    self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [batch_size, num_classes])\n\n  def testAllEndPointsShapes(self):\n    batch_size = 5\n    height, width = 299, 299\n    num_classes = 1000\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    _, end_points = inception.inception_v4(inputs, num_classes)\n    endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32],\n                        'Conv2d_2a_3x3': [batch_size, 147, 147, 32],\n                        'Conv2d_2b_3x3': [batch_size, 147, 147, 64],\n                        'Mixed_3a': [batch_size, 73, 73, 160],\n                        'Mixed_4a': [batch_size, 71, 71, 192],\n                        'Mixed_5a': [batch_size, 35, 35, 384],\n                        # 4 x Inception-A blocks\n                        'Mixed_5b': [batch_size, 35, 35, 384],\n                        'Mixed_5c': [batch_size, 35, 35, 384],\n                        'Mixed_5d': [batch_size, 35, 35, 384],\n                        'Mixed_5e': [batch_size, 35, 35, 384],\n                        # Reduction-A block\n                        'Mixed_6a': [batch_size, 17, 17, 1024],\n                        # 7 x Inception-B blocks\n                        'Mixed_6b': [batch_size, 17, 17, 1024],\n                        'Mixed_6c': [batch_size, 17, 17, 1024],\n                        'Mixed_6d': [batch_size, 17, 17, 1024],\n                        'Mixed_6e': [batch_size, 17, 17, 1024],\n                        'Mixed_6f': [batch_size, 17, 17, 1024],\n                        'Mixed_6g': [batch_size, 17, 17, 1024],\n                        'Mixed_6h': [batch_size, 17, 17, 1024],\n                        # Reduction-A block\n                        'Mixed_7a': [batch_size, 8, 8, 1536],\n                        # 3 x Inception-C blocks\n                        'Mixed_7b': [batch_size, 8, 8, 1536],\n                        'Mixed_7c': [batch_size, 8, 8, 1536],\n                        'Mixed_7d': [batch_size, 8, 8, 1536],\n                        # Logits and predictions\n                        'AuxLogits': [batch_size, num_classes],\n                        'PreLogitsFlatten': [batch_size, 1536],\n                        'Logits': [batch_size, num_classes],\n                        'Predictions': [batch_size, num_classes]}\n    self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())\n    for endpoint_name in endpoints_shapes:\n      expected_shape = endpoints_shapes[endpoint_name]\n      self.assertTrue(endpoint_name in end_points)\n      self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),\n                           expected_shape)\n\n  def testBuildBaseNetwork(self):\n    batch_size = 5\n    height, width = 299, 299\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    net, end_points = inception.inception_v4_base(inputs)\n    self.assertTrue(net.op.name.startswith(\n        'InceptionV4/Mixed_7d'))\n    self.assertListEqual(net.get_shape().as_list(), [batch_size, 8, 8, 1536])\n    expected_endpoints = [\n        'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a',\n        'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',\n        'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',\n        'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a',\n        'Mixed_7b', 'Mixed_7c', 'Mixed_7d']\n    self.assertItemsEqual(end_points.keys(), expected_endpoints)\n    for name, op in end_points.iteritems():\n      self.assertTrue(op.name.startswith('InceptionV4/' + name))\n\n  def testBuildOnlyUpToFinalEndpoint(self):\n    batch_size = 5\n    height, width = 299, 299\n    all_endpoints = [\n        'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a',\n        'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',\n        'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',\n        'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a',\n        'Mixed_7b', 'Mixed_7c', 'Mixed_7d']\n    for index, endpoint in enumerate(all_endpoints):\n      with tf.Graph().as_default():\n        inputs = tf.random_uniform((batch_size, height, width, 3))\n        out_tensor, end_points = inception.inception_v4_base(\n            inputs, final_endpoint=endpoint)\n        self.assertTrue(out_tensor.op.name.startswith(\n            'InceptionV4/' + endpoint))\n        self.assertItemsEqual(all_endpoints[:index+1], end_points)\n\n  def testVariablesSetDevice(self):\n    batch_size = 5\n    height, width = 299, 299\n    num_classes = 1000\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    # Force all Variables to reside on the device.\n    with tf.variable_scope('on_cpu'), tf.device('/cpu:0'):\n      inception.inception_v4(inputs, num_classes)\n    with tf.variable_scope('on_gpu'), tf.device('/gpu:0'):\n      inception.inception_v4(inputs, num_classes)\n    for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_cpu'):\n      self.assertDeviceEqual(v.device, '/cpu:0')\n    for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_gpu'):\n      self.assertDeviceEqual(v.device, '/gpu:0')\n\n  def testHalfSizeImages(self):\n    batch_size = 5\n    height, width = 150, 150\n    num_classes = 1000\n    inputs = tf.random_uniform((batch_size, height, width, 3))\n    logits, end_points = inception.inception_v4(inputs, num_classes)\n    self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [batch_size, num_classes])\n    pre_pool = end_points['Mixed_7d']\n    self.assertListEqual(pre_pool.get_shape().as_list(),\n                         [batch_size, 3, 3, 1536])\n\n  def testUnknownBatchSize(self):\n    batch_size = 1\n    height, width = 299, 299\n    num_classes = 1000\n    with self.test_session() as sess:\n      inputs = tf.placeholder(tf.float32, (None, height, width, 3))\n      logits, _ = inception.inception_v4(inputs, num_classes)\n      self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [None, num_classes])\n      images = tf.random_uniform((batch_size, height, width, 3))\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(logits, {inputs: images.eval()})\n      self.assertEquals(output.shape, (batch_size, num_classes))\n\n  def testEvaluation(self):\n    batch_size = 2\n    height, width = 299, 299\n    num_classes = 1000\n    with self.test_session() as sess:\n      eval_inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = inception.inception_v4(eval_inputs,\n                                         num_classes,\n                                         is_training=False)\n      predictions = tf.argmax(logits, 1)\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(predictions)\n      self.assertEquals(output.shape, (batch_size,))\n\n  def testTrainEvalWithReuse(self):\n    train_batch_size = 5\n    eval_batch_size = 2\n    height, width = 150, 150\n    num_classes = 1000\n    with self.test_session() as sess:\n      train_inputs = tf.random_uniform((train_batch_size, height, width, 3))\n      inception.inception_v4(train_inputs, num_classes)\n      eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))\n      logits, _ = inception.inception_v4(eval_inputs,\n                                         num_classes,\n                                         is_training=False,\n                                         reuse=True)\n      predictions = tf.argmax(logits, 1)\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(predictions)\n      self.assertEquals(output.shape, (eval_batch_size,))\n\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "models/slim/nets/lenet.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains a variant of the LeNet model definition.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nslim = tf.contrib.slim\n\n\ndef lenet(images, num_classes=10, is_training=False,\n          dropout_keep_prob=0.5,\n          prediction_fn=slim.softmax,\n          scope='LeNet'):\n  \"\"\"Creates a variant of the LeNet model.\n\n  Note that since the output is a set of 'logits', the values fall in the\n  interval of (-infinity, infinity). Consequently, to convert the outputs to a\n  probability distribution over the characters, one will need to convert them\n  using the softmax function:\n\n        logits = lenet.lenet(images, is_training=False)\n        probabilities = tf.nn.softmax(logits)\n        predictions = tf.argmax(logits, 1)\n\n  Args:\n    images: A batch of `Tensors` of size [batch_size, height, width, channels].\n    num_classes: the number of classes in the dataset.\n    is_training: specifies whether or not we're currently training the model.\n      This variable will determine the behaviour of the dropout layer.\n    dropout_keep_prob: the percentage of activation values that are retained.\n    prediction_fn: a function to get predictions out of logits.\n    scope: Optional variable_scope.\n\n  Returns:\n    logits: the pre-softmax activations, a tensor of size\n      [batch_size, `num_classes`]\n    end_points: a dictionary from components of the network to the corresponding\n      activation.\n  \"\"\"\n  end_points = {}\n\n  with tf.variable_scope(scope, 'LeNet', [images, num_classes]):\n    net = slim.conv2d(images, 32, [5, 5], scope='conv1')\n    net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')\n    net = slim.conv2d(net, 64, [5, 5], scope='conv2')\n    net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')\n    net = slim.flatten(net)\n    end_points['Flatten'] = net\n\n    net = slim.fully_connected(net, 1024, scope='fc3')\n    net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                       scope='dropout3')\n    logits = slim.fully_connected(net, num_classes, activation_fn=None,\n                                  scope='fc4')\n\n  end_points['Logits'] = logits\n  end_points['Predictions'] = prediction_fn(logits, scope='Predictions')\n\n  return logits, end_points\nlenet.default_image_size = 28\n\n\ndef lenet_arg_scope(weight_decay=0.0):\n  \"\"\"Defines the default lenet argument scope.\n\n  Args:\n    weight_decay: The weight decay to use for regularizing the model.\n\n  Returns:\n    An `arg_scope` to use for the inception v3 model.\n  \"\"\"\n  with slim.arg_scope(\n      [slim.conv2d, slim.fully_connected],\n      weights_regularizer=slim.l2_regularizer(weight_decay),\n      weights_initializer=tf.truncated_normal_initializer(stddev=0.1),\n      activation_fn=tf.nn.relu) as sc:\n    return sc\n"
  },
  {
    "path": "models/slim/nets/nets_factory.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains a factory for building various models.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nimport functools\n\nimport tensorflow as tf\nimport numpy as np\nimport sys\n\nfrom nets import alexnet\nfrom nets import cifarnet\nfrom nets import inception\nfrom nets import lenet\nfrom nets import overfeat\nfrom nets import resnet_v1\nfrom nets import resnet_v2\nfrom nets import vgg\n\nsys.path.append('libs/tensorflow_compact_bilinear_pooling/')\nfrom compact_bilinear_pooling import compact_bilinear_pooling_layer\n\nslim = tf.contrib.slim\n\nnetworks_map = {'alexnet_v2': alexnet.alexnet_v2,\n                'cifarnet': cifarnet.cifarnet,\n                'overfeat': overfeat.overfeat,\n                'vgg_a': vgg.vgg_a,\n                'vgg_16': vgg.vgg_16,\n                'vgg_19': vgg.vgg_19,\n                'inception_v1': inception.inception_v1,\n                'inception_v2': inception.inception_v2,\n                'inception_v2_tsn': inception.inception_v2_tsn,\n                'inception_v3': inception.inception_v3,\n                'inception_v4': inception.inception_v4,\n                'inception_resnet_v2': inception.inception_resnet_v2,\n                'lenet': lenet.lenet,\n                'resnet_v1_50': resnet_v1.resnet_v1_50,\n                'resnet_v1_101': resnet_v1.resnet_v1_101,\n                'resnet_v1_152': resnet_v1.resnet_v1_152,\n                'resnet_v1_200': resnet_v1.resnet_v1_200,\n                'resnet_v2_50': resnet_v2.resnet_v2_50,\n                'resnet_v2_101': resnet_v2.resnet_v2_101,\n                'resnet_v2_152': resnet_v2.resnet_v2_152,\n                'resnet_v2_200': resnet_v2.resnet_v2_200,\n               }\n\nlast_conv_map = {'inception_v3': 'Mixed_7c',\n                 'inception_v2_tsn': 'InceptionV2_TSN/inception_5b',\n                 'resnet_v1_101': 'resnet_v1_101/block4',\n                 'vgg_16': 'vgg_16/conv5',\n                }\n\narg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope,\n                  'cifarnet': cifarnet.cifarnet_arg_scope,\n                  'overfeat': overfeat.overfeat_arg_scope,\n                  'vgg_a': vgg.vgg_arg_scope,\n                  'vgg_16': vgg.vgg_arg_scope,\n                  'vgg_19': vgg.vgg_arg_scope,\n                  'inception_v1': inception.inception_v3_arg_scope,\n                  'inception_v2': inception.inception_v3_arg_scope,\n                  'inception_v2_tsn': inception.inception_v2_tsn_arg_scope,\n                  'inception_v3': inception.inception_v3_arg_scope,\n                  'inception_v4': inception.inception_v4_arg_scope,\n                  'inception_resnet_v2':\n                  inception.inception_resnet_v2_arg_scope,\n                  'lenet': lenet.lenet_arg_scope,\n                  'resnet_v1_50': resnet_v1.resnet_arg_scope,\n                  'resnet_v1_101': resnet_v1.resnet_arg_scope,\n                  'resnet_v1_152': resnet_v1.resnet_arg_scope,\n                  'resnet_v1_200': resnet_v1.resnet_arg_scope,\n                  'resnet_v2_50': resnet_v2.resnet_arg_scope,\n                  'resnet_v2_101': resnet_v2.resnet_arg_scope,\n                  'resnet_v2_152': resnet_v2.resnet_arg_scope,\n                  'resnet_v2_200': resnet_v2.resnet_arg_scope,\n                 }\n\n\ndef get_network_fn(name, num_classes, num_pose_keypoints, cfg,\n                   weight_decay=0.0, is_training=False):\n  \"\"\"Returns a network_fn such as `logits, end_points = network_fn(images)`.\n\n  Args:\n    name: The name of the network.\n    num_classes: The number of classes to use for classification.\n    num_pose_keypoints: The number of channels to output for pose.\n    weight_decay: The l2 coefficient for the model weights.\n    is_training: `True` if the model is being used for training and `False`\n      otherwise.\n\n  Returns:\n    network_fn: A function that applies the model to a batch of images. It has\n      the following signature:\n        logits, end_points = network_fn(images)\n  Raises:\n    ValueError: If network `name` is not recognized.\n  \"\"\"\n  if name not in networks_map:\n    raise ValueError('Name of network unknown %s' % name)\n  arg_scope = arg_scopes_map[name](weight_decay=weight_decay)\n  func = networks_map[name]\n  @functools.wraps(func)\n  def network_fn(images):\n    with slim.arg_scope(arg_scope):\n      frames_per_video = 1  # same for single image datasets\n      if images.get_shape().ndims == 5:\n        im_shape = images.get_shape().as_list()\n        frames_per_video = im_shape[1]\n        images = tf.reshape(\n          images, [-1, im_shape[-3], im_shape[-2], im_shape[-1]])\n\n      # Main Network Function\n      kwargs = {}\n      if cfg.NET.DROPOUT >= 0:  # if -1, then just ignore it and use nw def.\n        kwargs['dropout_keep_prob'] = (1-cfg.NET.DROPOUT)\n      logits, end_points = func(images, num_classes, is_training=is_training,\n                                train_top_bn=cfg.NET.TRAIN_TOP_BN,\n                                **kwargs)\n\n      # rgirdhar: add another end point for heatmap prediction\n      try:\n        last_conv = end_points[last_conv_map[name]]\n      except:\n        raise ValueError('End point {} not found. Choose from: {}'.format(\n          last_conv_map[name], ' '.join(end_points)))\n      random_normal = lambda stddev: tf.random_normal_initializer(0.0, stddev)\n\n      with slim.arg_scope([slim.dropout],\n                          is_training=is_training,\n                          keep_prob=0.2 if cfg.NET.DROPOUT < 0\n                                        else (1.0-cfg.NET.DROPOUT)):\n        with tf.variable_scope('PoseLogits'):\n          last_conv_pose_name = getattr(\n            cfg.NET.LAST_CONV_MAP_FOR_POSE, name)\n          last_conv_pose = end_points[last_conv_pose_name]\n          pose_pre_logits = slim.conv2d(\n            last_conv_pose, 768, [1, 1],\n            weights_initializer=random_normal(0.001),\n            activation_fn=tf.nn.relu,\n            normalizer_fn=None,\n            biases_initializer=tf.zeros_initializer(),\n            padding='SAME', scope='ExtraConv2d_1x1')\n          pose_logits = slim.conv2d(pose_pre_logits, num_pose_keypoints, [1, 1], activation_fn=None,\n                                    normalizer_fn=None, scope='Conv2d_1c_1x1')\n          end_points['PoseLogits'] = pose_logits\n\n        if cfg.NET.USE_POSE_ATTENTION_LOGITS:\n          with tf.variable_scope('PoseAttention'):\n            # use the pose prediction as an attention map to get the features\n            # step1: split pose logits over channels\n            pose_logits_parts = tf.split(\n              pose_logits, pose_logits.get_shape().as_list()[-1],\n              axis=pose_logits.get_shape().ndims-1)\n            part_logits = []\n            # allows to choose which dimension of pose to use for heatmaps\n            parts_to_use = pose_logits_parts\n            if cfg.NET.USE_POSE_ATTENTION_LOGITS_DIMS != [-1]:\n              parts_to_use = (np.array(pose_logits_parts)[\n                cfg.NET.USE_POSE_ATTENTION_LOGITS_DIMS]).tolist()\n            tf.logging.info('Using {} parts for pose attention logits'.format(\n              len(parts_to_use)))\n            for part in parts_to_use:\n              part_logits.append(tf.reduce_mean(part * last_conv, axis=[1,2],\n                                                keep_dims=True))\n            if cfg.NET.USE_POSE_ATTENTION_LOGITS_AVGED_HMAP:\n              part_logits.append(tf.reduce_mean(\n                last_conv * tf.reduce_mean(pose_logits, axis=-1, keep_dims=True),\n                axis=[1,2], keep_dims=True))\n            part_logits.append(tf.reduce_mean(last_conv, axis=[1,2],\n                                              keep_dims=True))\n            net = tf.concat(part_logits, axis=-1)\n            net = slim.dropout(net)\n            logits = slim.conv2d(net, num_classes, [1, 1],\n                                 weights_initializer=random_normal(0.001),\n                                 biases_initializer=tf.zeros_initializer(),\n                                 activation_fn=None,\n                                 normalizer_fn=None)\n        elif cfg.NET.USE_POSE_LOGITS_DIRECTLY:\n          with tf.variable_scope('ActionFromPose'):\n            net = tf.reduce_mean(\n              pose_pre_logits, axis=[1, 2], keep_dims=True)\n            net = slim.conv2d(net, 768, [1, 1],\n                              normalizer_fn=None,\n                              weights_initializer=random_normal(0.001),\n                              biases_initializer=tf.zeros_initializer())\n            if cfg.NET.USE_POSE_LOGITS_DIRECTLY_PLUS_LOGITS:\n              net = tf.concat([\n                net, tf.reduce_mean(last_conv, axis=[1, 2], keep_dims=True)],\n                axis=-1)\n            net = slim.dropout(net)\n            logits = slim.conv2d(net, num_classes, [1, 1],\n                                 weights_initializer=random_normal(0.001),\n                                 biases_initializer=tf.zeros_initializer(),\n                                 activation_fn=None,\n                                 normalizer_fn=None)\n        elif cfg.NET.USE_POSE_LOGITS_DIRECTLY_v2:\n          with tf.variable_scope('ActionFromPose_v2'):\n            net = tf.concat([\n              pose_pre_logits,\n              last_conv],\n              axis=-1)\n            if cfg.NET.USE_POSE_LOGITS_DIRECTLY_v2_EXTRA_LAYER:\n              net = tf.nn.relu(net)\n              net = slim.conv2d(net, net.get_shape().as_list()[-1], [1, 1],\n                                weights_initializer=random_normal(0.001),\n                                biases_initializer=tf.zeros_initializer())\n            net = tf.reduce_mean(net, axis=[1, 2], keep_dims=True)\n            net = slim.dropout(net)\n            logits = slim.conv2d(net, num_classes, [1, 1],\n                                 weights_initializer=random_normal(0.001),\n                                 biases_initializer=tf.zeros_initializer(),\n                                 activation_fn=None,\n                                 normalizer_fn=None)\n        elif cfg.NET.USE_COMPACT_BILINEAR_POOLING:\n          last_conv_shape = last_conv.get_shape().as_list()\n          net = compact_bilinear_pooling_layer(\n            last_conv, last_conv, last_conv_shape[-1])\n          net.set_shape([last_conv_shape[0], last_conv_shape[-1]])\n          net = tf.expand_dims(tf.expand_dims(\n            net, 1), 1)\n          net = slim.dropout(net)\n          logits = slim.conv2d(net, num_classes, [1, 1],\n                               weights_initializer=random_normal(0.001),\n                               biases_initializer=tf.zeros_initializer(),\n                               activation_fn=None,\n                               normalizer_fn=None)\n        elif cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION:\n          with tf.variable_scope('PosePrelogitsBasedAttention'):\n            # If the following is set, just train on top of image features,\n            # don't add the prelogits at all. This was useful as pose seemed to\n            # not help with it at all.\n            if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_SINGLE_LAYER_ATT:\n              net = last_conv\n            else:\n              net = pose_pre_logits\n            # nMaps = num_classes if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_PER_CLASS else 1\n            # For simplicity, since multiple maps doesn't seem to help, I'm\n            # not allowing that to keep the following code simple.\n            # nMaps = 1\n            # For NIPS2017 rebuttal, they wanted to see nums with per-class\n            # attention, so doing that too\n            nMaps = num_classes if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_PER_CLASS else 1\n            all_att_logits = []\n            for rank_id in range(cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_RANK):\n              scope_name = 'Conv2d_PrePose_Attn'\n              if rank_id >= 1:\n                scope_name += str(rank_id)\n              net = slim.conv2d(net, nMaps,\n                                [1, 1],\n                                weights_initializer=random_normal(0.001),\n                                biases_initializer=tf.zeros_initializer(),\n                                activation_fn=None,\n                                normalizer_fn=None,\n                                scope=scope_name)\n              all_att_logits.append(net)\n            if len(all_att_logits) > 1:\n              attention_logits = tf.stack(all_att_logits, axis=-1)\n            else:\n              attention_logits = all_att_logits[0]\n\n            if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_SOFTMAX_ATT:\n              # bring the number of channels earlier to make softmax easier\n              attention_logits = tf.transpose(attention_logits, [0, 3, 1, 2])\n              att_shape = attention_logits.get_shape().as_list()\n              attention_logits = tf.reshape(\n                attention_logits, [att_shape[0], att_shape[1], -1])\n              attention_logits = tf.nn.softmax(attention_logits)\n              attention_logits = tf.reshape(attention_logits, att_shape)\n              attention_logits = tf.transpose(attention_logits, [0, 2, 3, 1])\n            if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_RELU_ATT:\n              attention_logits = tf.nn.relu(attention_logits)\n            end_points['PosePrelogitsBasedAttention'] = attention_logits\n\n            if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_WITH_POSE_FEAT:\n              if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_WITH_POSE_FEAT_2LAYER:\n                pose_logits = slim.conv2d(\n                  pose_logits, pose_logits.get_shape()[-1],\n                  [1, 1], weights_initializer=random_normal(0.001),\n                  biases_initializer=tf.zeros_initializer())\n              last_conv = tf.concat([last_conv, pose_logits], axis=-1)\n            last_conv = slim.dropout(last_conv)\n            # Top-down attention\n            all_logits = []\n            for _ in range(cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_RANK):\n              logits = slim.conv2d(last_conv, num_classes, [1, 1],\n                                   weights_initializer=random_normal(0.001),\n                                   biases_initializer=tf.zeros_initializer(),\n                                   activation_fn=None, normalizer_fn=None)\n              all_logits.append(logits)\n            if len(all_logits) > 1:\n              logits = tf.stack(all_logits, axis=-1)\n            else:\n              logits = all_logits[0]\n            end_points['TopDownAttention'] = logits\n\n            # attended_feats = []\n            # for attention_logit in tf.unstack(attention_logits, axis=-1):\n            #   attended_feats.append(tf.reduce_mean(\n            #     tf.expand_dims(attention_logit, axis=-1) * logits,\n            #     axis=[1,2],\n            #     keep_dims=True))\n            # attended_feat = tf.stack(attended_feats, axis=-1)\n            # # Since only 1 attention map (asserted above)\n            # logits = attended_feat[..., 0]\n\n            # better way to do the above:\n            logits = tf.reduce_mean(\n              attention_logits * logits,\n              axis=[1, 2],\n              keep_dims=True)\n            if logits.get_shape().ndims == 5:\n              # i.e. rank was > 1\n              logits = tf.reduce_sum(logits, axis=-1)\n\n            # if nMaps == 1:\n            #   # remove the extra dimension that is added for multi-class\n            #   # attention case\n            #   attended_feat = attended_feat[..., 0]\n            #   logits = slim.conv2d(attended_feat, num_classes, [1, 1],\n            #                        weights_initializer=random_normal(0.001),\n            #                        biases_initializer=tf.zeros_initializer(),\n            #                        activation_fn=None,\n            #                        normalizer_fn=None)\n            # else:\n            #   logits = tf.concat([\n            #     slim.conv2d(el, 1, [1, 1],\n            #                 weights_initializer=random_normal(0.001),\n            #                 biases_initializer=tf.zeros_initializer(),\n            #                 activation_fn=None,\n            #                 normalizer_fn=None) for el in\n            #     tf.unstack(attended_feat, axis=-1)], axis=-1)\n        # This is just to protect against the case where I don't do any of the\n        # above and get the original logits from the network, which has already\n        # been squeezed, or in case of vgg 16, passed through fc layers\n        if logits.get_shape().ndims > 2:\n          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')\n        end_points['Logits'] = logits\n\n      if frames_per_video > 1:\n        with tf.name_scope('FramePooling'):\n          # for now stick with avg pool\n          end_points['logits_beforePool'] = logits\n          old_logits = logits\n          logits = tf.stack([el for el in tf.split(\n            old_logits, int(old_logits.get_shape().as_list()[0] /\n                            frames_per_video))])\n          if cfg.NET.USE_TEMPORAL_ATT:\n            with tf.variable_scope('TemporalAttention'):\n              logits = tf.expand_dims(logits, axis=-2)  #[bs, 3, 1, nc]\n              logits_att = slim.conv2d(\n                logits, 1, [1, 1],\n                weights_initializer=random_normal(0.001),\n                biases_initializer=tf.constant_initializer(\n                  1.0 / logits.get_shape().as_list()[1]),\n                activation_fn=None, normalizer_fn=None)\n              logits = logits * logits_att\n              logits = tf.squeeze(logits, axis=-2)\n              end_points['TemporalAttention'] = logits_att\n          logits = tf.reduce_mean(logits, axis=1)\n      return logits, end_points\n\n  if hasattr(func, 'default_image_size'):\n    network_fn.default_image_size = func.default_image_size\n\n  return network_fn\n"
  },
  {
    "path": "models/slim/nets/nets_factory_test.py",
    "content": "# Copyright 2016 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Tests for slim.inception.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\n\nimport tensorflow as tf\n\nfrom nets import nets_factory\n\n\nclass NetworksTest(tf.test.TestCase):\n\n  def testGetNetworkFn(self):\n    batch_size = 5\n    num_classes = 1000\n    for net in nets_factory.networks_map:\n      with self.test_session():\n        net_fn = nets_factory.get_network_fn(net, num_classes)\n        # Most networks use 224 as their default_image_size\n        image_size = getattr(net_fn, 'default_image_size', 224)\n        inputs = tf.random_uniform((batch_size, image_size, image_size, 3))\n        logits, end_points = net_fn(inputs)\n        self.assertTrue(isinstance(logits, tf.Tensor))\n        self.assertTrue(isinstance(end_points, dict))\n        self.assertEqual(logits.get_shape().as_list()[0], batch_size)\n        self.assertEqual(logits.get_shape().as_list()[-1], num_classes)\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "models/slim/nets/overfeat.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains the model definition for the OverFeat network.\n\nThe definition for the network was obtained from:\n  OverFeat: Integrated Recognition, Localization and Detection using\n  Convolutional Networks\n  Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and\n  Yann LeCun, 2014\n  http://arxiv.org/abs/1312.6229\n\nUsage:\n  with slim.arg_scope(overfeat.overfeat_arg_scope()):\n    outputs, end_points = overfeat.overfeat(inputs)\n\n@@overfeat\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nslim = tf.contrib.slim\ntrunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)\n\n\ndef overfeat_arg_scope(weight_decay=0.0005):\n  with slim.arg_scope([slim.conv2d, slim.fully_connected],\n                      activation_fn=tf.nn.relu,\n                      weights_regularizer=slim.l2_regularizer(weight_decay),\n                      biases_initializer=tf.zeros_initializer):\n    with slim.arg_scope([slim.conv2d], padding='SAME'):\n      with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:\n        return arg_sc\n\n\ndef overfeat(inputs,\n             num_classes=1000,\n             is_training=True,\n             dropout_keep_prob=0.5,\n             spatial_squeeze=True,\n             scope='overfeat'):\n  \"\"\"Contains the model definition for the OverFeat network.\n\n  The definition for the network was obtained from:\n    OverFeat: Integrated Recognition, Localization and Detection using\n    Convolutional Networks\n    Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and\n    Yann LeCun, 2014\n    http://arxiv.org/abs/1312.6229\n\n  Note: All the fully_connected layers have been transformed to conv2d layers.\n        To use in classification mode, resize input to 231x231. To use in fully\n        convolutional mode, set spatial_squeeze to false.\n\n  Args:\n    inputs: a tensor of size [batch_size, height, width, channels].\n    num_classes: number of predicted classes.\n    is_training: whether or not the model is being trained.\n    dropout_keep_prob: the probability that activations are kept in the dropout\n      layers during training.\n    spatial_squeeze: whether or not should squeeze the spatial dimensions of the\n      outputs. Useful to remove unnecessary dimensions for classification.\n    scope: Optional scope for the variables.\n\n  Returns:\n    the last op containing the log predictions and end_points dict.\n\n  \"\"\"\n  with tf.variable_scope(scope, 'overfeat', [inputs]) as sc:\n    end_points_collection = sc.name + '_end_points'\n    # Collect outputs for conv2d, fully_connected and max_pool2d\n    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],\n                        outputs_collections=end_points_collection):\n      net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',\n                        scope='conv1')\n      net = slim.max_pool2d(net, [2, 2], scope='pool1')\n      net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2')\n      net = slim.max_pool2d(net, [2, 2], scope='pool2')\n      net = slim.conv2d(net, 512, [3, 3], scope='conv3')\n      net = slim.conv2d(net, 1024, [3, 3], scope='conv4')\n      net = slim.conv2d(net, 1024, [3, 3], scope='conv5')\n      net = slim.max_pool2d(net, [2, 2], scope='pool5')\n      with slim.arg_scope([slim.conv2d],\n                          weights_initializer=trunc_normal(0.005),\n                          biases_initializer=tf.constant_initializer(0.1)):\n        # Use conv2d instead of fully_connected layers.\n        net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6')\n        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                           scope='dropout6')\n        net = slim.conv2d(net, 4096, [1, 1], scope='fc7')\n        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                           scope='dropout7')\n        net = slim.conv2d(net, num_classes, [1, 1],\n                          activation_fn=None,\n                          normalizer_fn=None,\n                          biases_initializer=tf.zeros_initializer,\n                          scope='fc8')\n      # Convert end_points_collection into a end_point dict.\n      end_points = slim.utils.convert_collection_to_dict(end_points_collection)\n      if spatial_squeeze:\n        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')\n        end_points[sc.name + '/fc8'] = net\n      return net, end_points\noverfeat.default_image_size = 231\n"
  },
  {
    "path": "models/slim/nets/overfeat_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for slim.nets.overfeat.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom nets import overfeat\n\nslim = tf.contrib.slim\n\n\nclass OverFeatTest(tf.test.TestCase):\n\n  def testBuild(self):\n    batch_size = 5\n    height, width = 231, 231\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = overfeat.overfeat(inputs, num_classes)\n      self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed')\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n\n  def testFullyConvolutional(self):\n    batch_size = 1\n    height, width = 281, 281\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False)\n      self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd')\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, 2, 2, num_classes])\n\n  def testEndPoints(self):\n    batch_size = 5\n    height, width = 231, 231\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      _, end_points = overfeat.overfeat(inputs, num_classes)\n      expected_names = ['overfeat/conv1',\n                        'overfeat/pool1',\n                        'overfeat/conv2',\n                        'overfeat/pool2',\n                        'overfeat/conv3',\n                        'overfeat/conv4',\n                        'overfeat/conv5',\n                        'overfeat/pool5',\n                        'overfeat/fc6',\n                        'overfeat/fc7',\n                        'overfeat/fc8'\n                       ]\n      self.assertSetEqual(set(end_points.keys()), set(expected_names))\n\n  def testModelVariables(self):\n    batch_size = 5\n    height, width = 231, 231\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      overfeat.overfeat(inputs, num_classes)\n      expected_names = ['overfeat/conv1/weights',\n                        'overfeat/conv1/biases',\n                        'overfeat/conv2/weights',\n                        'overfeat/conv2/biases',\n                        'overfeat/conv3/weights',\n                        'overfeat/conv3/biases',\n                        'overfeat/conv4/weights',\n                        'overfeat/conv4/biases',\n                        'overfeat/conv5/weights',\n                        'overfeat/conv5/biases',\n                        'overfeat/fc6/weights',\n                        'overfeat/fc6/biases',\n                        'overfeat/fc7/weights',\n                        'overfeat/fc7/biases',\n                        'overfeat/fc8/weights',\n                        'overfeat/fc8/biases',\n                       ]\n      model_variables = [v.op.name for v in slim.get_model_variables()]\n      self.assertSetEqual(set(model_variables), set(expected_names))\n\n  def testEvaluation(self):\n    batch_size = 2\n    height, width = 231, 231\n    num_classes = 1000\n    with self.test_session():\n      eval_inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = overfeat.overfeat(eval_inputs, is_training=False)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n      predictions = tf.argmax(logits, 1)\n      self.assertListEqual(predictions.get_shape().as_list(), [batch_size])\n\n  def testTrainEvalWithReuse(self):\n    train_batch_size = 2\n    eval_batch_size = 1\n    train_height, train_width = 231, 231\n    eval_height, eval_width = 281, 281\n    num_classes = 1000\n    with self.test_session():\n      train_inputs = tf.random_uniform(\n          (train_batch_size, train_height, train_width, 3))\n      logits, _ = overfeat.overfeat(train_inputs)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [train_batch_size, num_classes])\n      tf.get_variable_scope().reuse_variables()\n      eval_inputs = tf.random_uniform(\n          (eval_batch_size, eval_height, eval_width, 3))\n      logits, _ = overfeat.overfeat(eval_inputs, is_training=False,\n                                    spatial_squeeze=False)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [eval_batch_size, 2, 2, num_classes])\n      logits = tf.reduce_mean(logits, [1, 2])\n      predictions = tf.argmax(logits, 1)\n      self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])\n\n  def testForward(self):\n    batch_size = 1\n    height, width = 231, 231\n    with self.test_session() as sess:\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = overfeat.overfeat(inputs)\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(logits)\n      self.assertTrue(output.any())\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "models/slim/nets/resnet_utils.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains building blocks for various versions of Residual Networks.\n\nResidual networks (ResNets) were proposed in:\n  Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun\n  Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015\n\nMore variants were introduced in:\n  Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun\n  Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016\n\nWe can obtain different ResNet variants by changing the network depth, width,\nand form of residual unit. This module implements the infrastructure for\nbuilding them. Concrete ResNet units and full ResNet networks are implemented in\nthe accompanying resnet_v1.py and resnet_v2.py modules.\n\nCompared to https://github.com/KaimingHe/deep-residual-networks, in the current\nimplementation we subsample the output activations in the last residual unit of\neach block, instead of subsampling the input activations in the first residual\nunit of each block. The two implementations give identical results but our\nimplementation is more memory efficient.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport collections\nimport tensorflow as tf\n\nslim = tf.contrib.slim\n\n\nclass Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):\n  \"\"\"A named tuple describing a ResNet block.\n\n  Its parts are:\n    scope: The scope of the `Block`.\n    unit_fn: The ResNet unit function which takes as input a `Tensor` and\n      returns another `Tensor` with the output of the ResNet unit.\n    args: A list of length equal to the number of units in the `Block`. The list\n      contains one (depth, depth_bottleneck, stride) tuple for each unit in the\n      block to serve as argument to unit_fn.\n  \"\"\"\n\n\ndef subsample(inputs, factor, scope=None):\n  \"\"\"Subsamples the input along the spatial dimensions.\n\n  Args:\n    inputs: A `Tensor` of size [batch, height_in, width_in, channels].\n    factor: The subsampling factor.\n    scope: Optional variable_scope.\n\n  Returns:\n    output: A `Tensor` of size [batch, height_out, width_out, channels] with the\n      input, either intact (if factor == 1) or subsampled (if factor > 1).\n  \"\"\"\n  if factor == 1:\n    return inputs\n  else:\n    return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)\n\n\ndef conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):\n  \"\"\"Strided 2-D convolution with 'SAME' padding.\n\n  When stride > 1, then we do explicit zero-padding, followed by conv2d with\n  'VALID' padding.\n\n  Note that\n\n     net = conv2d_same(inputs, num_outputs, 3, stride=stride)\n\n  is equivalent to\n\n     net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME')\n     net = subsample(net, factor=stride)\n\n  whereas\n\n     net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME')\n\n  is different when the input's height or width is even, which is why we add the\n  current function. For more details, see ResnetUtilsTest.testConv2DSameEven().\n\n  Args:\n    inputs: A 4-D tensor of size [batch, height_in, width_in, channels].\n    num_outputs: An integer, the number of output filters.\n    kernel_size: An int with the kernel_size of the filters.\n    stride: An integer, the output stride.\n    rate: An integer, rate for atrous convolution.\n    scope: Scope.\n\n  Returns:\n    output: A 4-D tensor of size [batch, height_out, width_out, channels] with\n      the convolution output.\n  \"\"\"\n  if stride == 1:\n    return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate,\n                       padding='SAME', scope=scope)\n  else:\n    kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)\n    pad_total = kernel_size_effective - 1\n    pad_beg = pad_total // 2\n    pad_end = pad_total - pad_beg\n    inputs = tf.pad(inputs,\n                    [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])\n    return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,\n                       rate=rate, padding='VALID', scope=scope)\n\n\n@slim.add_arg_scope\ndef stack_blocks_dense(net, blocks, output_stride=None,\n                       outputs_collections=None):\n  \"\"\"Stacks ResNet `Blocks` and controls output feature density.\n\n  First, this function creates scopes for the ResNet in the form of\n  'block_name/unit_1', 'block_name/unit_2', etc.\n\n  Second, this function allows the user to explicitly control the ResNet\n  output_stride, which is the ratio of the input to output spatial resolution.\n  This is useful for dense prediction tasks such as semantic segmentation or\n  object detection.\n\n  Most ResNets consist of 4 ResNet blocks and subsample the activations by a\n  factor of 2 when transitioning between consecutive ResNet blocks. This results\n  to a nominal ResNet output_stride equal to 8. If we set the output_stride to\n  half the nominal network stride (e.g., output_stride=4), then we compute\n  responses twice.\n\n  Control of the output feature density is implemented by atrous convolution.\n\n  Args:\n    net: A `Tensor` of size [batch, height, width, channels].\n    blocks: A list of length equal to the number of ResNet `Blocks`. Each\n      element is a ResNet `Block` object describing the units in the `Block`.\n    output_stride: If `None`, then the output will be computed at the nominal\n      network stride. If output_stride is not `None`, it specifies the requested\n      ratio of input to output spatial resolution, which needs to be equal to\n      the product of unit strides from the start up to some level of the ResNet.\n      For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1,\n      then valid values for the output_stride are 1, 2, 6, 24 or None (which\n      is equivalent to output_stride=24).\n    outputs_collections: Collection to add the ResNet block outputs.\n\n  Returns:\n    net: Output tensor with stride equal to the specified output_stride.\n\n  Raises:\n    ValueError: If the target output_stride is not valid.\n  \"\"\"\n  # The current_stride variable keeps track of the effective stride of the\n  # activations. This allows us to invoke atrous convolution whenever applying\n  # the next residual unit would result in the activations having stride larger\n  # than the target output_stride.\n  current_stride = 1\n\n  # The atrous convolution rate parameter.\n  rate = 1\n\n  for block in blocks:\n    with tf.variable_scope(block.scope, 'block', [net]) as sc:\n      for i, unit in enumerate(block.args):\n        if output_stride is not None and current_stride > output_stride:\n          raise ValueError('The target output_stride cannot be reached.')\n\n        with tf.variable_scope('unit_%d' % (i + 1), values=[net]):\n          unit_depth, unit_depth_bottleneck, unit_stride = unit\n\n          # If we have reached the target output_stride, then we need to employ\n          # atrous convolution with stride=1 and multiply the atrous rate by the\n          # current unit's stride for use in subsequent layers.\n          if output_stride is not None and current_stride == output_stride:\n            net = block.unit_fn(net,\n                                depth=unit_depth,\n                                depth_bottleneck=unit_depth_bottleneck,\n                                stride=1,\n                                rate=rate)\n            rate *= unit_stride\n\n          else:\n            net = block.unit_fn(net,\n                                depth=unit_depth,\n                                depth_bottleneck=unit_depth_bottleneck,\n                                stride=unit_stride,\n                                rate=1)\n            current_stride *= unit_stride\n      net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)\n\n  if output_stride is not None and current_stride != output_stride:\n    raise ValueError('The target output_stride cannot be reached.')\n\n  return net\n\n\ndef resnet_arg_scope(weight_decay=0.0001,\n                     batch_norm_decay=0.997,\n                     batch_norm_epsilon=1e-5,\n                     batch_norm_scale=True):\n  \"\"\"Defines the default ResNet arg scope.\n\n  TODO(gpapan): The batch-normalization related default values above are\n    appropriate for use in conjunction with the reference ResNet models\n    released at https://github.com/KaimingHe/deep-residual-networks. When\n    training ResNets from scratch, they might need to be tuned.\n\n  Args:\n    weight_decay: The weight decay to use for regularizing the model.\n    batch_norm_decay: The moving average decay when estimating layer activation\n      statistics in batch normalization.\n    batch_norm_epsilon: Small constant to prevent division by zero when\n      normalizing activations by their variance in batch normalization.\n    batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the\n      activations in the batch normalization layer.\n\n  Returns:\n    An `arg_scope` to use for the resnet models.\n  \"\"\"\n  batch_norm_params = {\n      'decay': batch_norm_decay,\n      'epsilon': batch_norm_epsilon,\n      'scale': batch_norm_scale,\n      'updates_collections': tf.GraphKeys.UPDATE_OPS,\n  }\n\n  with slim.arg_scope(\n      [slim.conv2d],\n      weights_regularizer=slim.l2_regularizer(weight_decay),\n      weights_initializer=slim.variance_scaling_initializer(),\n      activation_fn=tf.nn.relu,\n      normalizer_fn=slim.batch_norm,\n      normalizer_params=batch_norm_params):\n    with slim.arg_scope([slim.batch_norm], **batch_norm_params):\n      # The following implies padding='SAME' for pool1, which makes feature\n      # alignment easier for dense prediction tasks. This is also used in\n      # https://github.com/facebook/fb.resnet.torch. However the accompanying\n      # code of 'Deep Residual Learning for Image Recognition' uses\n      # padding='VALID' for pool1. You can switch to that choice by setting\n      # slim.arg_scope([slim.max_pool2d], padding='VALID').\n      with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:\n        return arg_sc\n"
  },
  {
    "path": "models/slim/nets/resnet_v1.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains definitions for the original form of Residual Networks.\n\nThe 'v1' residual networks (ResNets) implemented in this module were proposed\nby:\n[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun\n    Deep Residual Learning for Image Recognition. arXiv:1512.03385\n\nOther variants were introduced in:\n[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun\n    Identity Mappings in Deep Residual Networks. arXiv: 1603.05027\n\nThe networks defined in this module utilize the bottleneck building block of\n[1] with projection shortcuts only for increasing depths. They employ batch\nnormalization *after* every weight layer. This is the architecture used by\nMSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and\nResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1'\narchitecture and the alternative 'v2' architecture of [2] which uses batch\nnormalization *before* every weight layer in the so-called full pre-activation\nunits.\n\nTypical use:\n\n   from tensorflow.contrib.slim.nets import resnet_v1\n\nResNet-101 for image classification into 1000 classes:\n\n   # inputs has shape [batch, 224, 224, 3]\n   with slim.arg_scope(resnet_v1.resnet_arg_scope()):\n      net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False)\n\nResNet-101 for semantic segmentation into 21 classes:\n\n   # inputs has shape [batch, 513, 513, 3]\n   with slim.arg_scope(resnet_v1.resnet_arg_scope()):\n      net, end_points = resnet_v1.resnet_v1_101(inputs,\n                                                21,\n                                                is_training=False,\n                                                global_pool=False,\n                                                output_stride=16)\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom nets import resnet_utils\n\n\nresnet_arg_scope = resnet_utils.resnet_arg_scope\nslim = tf.contrib.slim\n\n\n@slim.add_arg_scope\ndef bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,\n               outputs_collections=None, scope=None):\n  \"\"\"Bottleneck residual unit variant with BN after convolutions.\n\n  This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for\n  its definition. Note that we use here the bottleneck variant which has an\n  extra bottleneck layer.\n\n  When putting together two consecutive ResNet blocks that use this unit, one\n  should use stride = 2 in the last unit of the first block.\n\n  Args:\n    inputs: A tensor of size [batch, height, width, channels].\n    depth: The depth of the ResNet unit output.\n    depth_bottleneck: The depth of the bottleneck layers.\n    stride: The ResNet unit's stride. Determines the amount of downsampling of\n      the units output compared to its input.\n    rate: An integer, rate for atrous convolution.\n    outputs_collections: Collection to add the ResNet unit output.\n    scope: Optional variable_scope.\n\n  Returns:\n    The ResNet unit's output.\n  \"\"\"\n  with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:\n    depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)\n    if depth == depth_in:\n      shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')\n    else:\n      shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride,\n                             activation_fn=None, scope='shortcut')\n\n    residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1,\n                           scope='conv1')\n    residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,\n                                        rate=rate, scope='conv2')\n    residual = slim.conv2d(residual, depth, [1, 1], stride=1,\n                           activation_fn=None, scope='conv3')\n\n    output = tf.nn.relu(shortcut + residual)\n\n    return slim.utils.collect_named_outputs(outputs_collections,\n                                            sc.original_name_scope,\n                                            output)\n\n\ndef resnet_v1(inputs,\n              blocks,\n              num_classes=None,\n              is_training=True,\n              global_pool=True,\n              output_stride=None,\n              include_root_block=True,\n              train_top_bn=False,\n              dropout_keep_prob=1.0,\n              reuse=None,\n              scope=None):\n  \"\"\"Generator for v1 ResNet models.\n\n  This function generates a family of ResNet v1 models. See the resnet_v1_*()\n  methods for specific model instantiations, obtained by selecting different\n  block instantiations that produce ResNets of various depths.\n\n  Training for image classification on Imagenet is usually done with [224, 224]\n  inputs, resulting in [7, 7] feature maps at the output of the last ResNet\n  block for the ResNets defined in [1] that have nominal stride equal to 32.\n  However, for dense prediction tasks we advise that one uses inputs with\n  spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In\n  this case the feature maps at the ResNet output will have spatial shape\n  [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]\n  and corners exactly aligned with the input image corners, which greatly\n  facilitates alignment of the features to the image. Using as input [225, 225]\n  images results in [8, 8] feature maps at the output of the last ResNet block.\n\n  For dense prediction tasks, the ResNet needs to run in fully-convolutional\n  (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all\n  have nominal stride equal to 32 and a good choice in FCN mode is to use\n  output_stride=16 in order to increase the density of the computed features at\n  small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.\n\n  Args:\n    inputs: A tensor of size [batch, height_in, width_in, channels].\n    blocks: A list of length equal to the number of ResNet blocks. Each element\n      is a resnet_utils.Block object describing the units in the block.\n    num_classes: Number of predicted classes for classification tasks. If None\n      we return the features before the logit layer.\n    is_training: whether is training or not.\n    global_pool: If True, we perform global average pooling before computing the\n      logits. Set to True for image classification, False for dense prediction.\n    output_stride: If None, then the output will be computed at the nominal\n      network stride. If output_stride is not None, it specifies the requested\n      ratio of input to output spatial resolution.\n    include_root_block: If True, include the initial convolution followed by\n      max-pooling, if False excludes it.\n    train_top_bn: If True, then train batch norm for the root block, but make\n      it testing mode for the rest of the network. If False (default), keep all\n      the batch norms training.\n    dropout_keep_prob: (0, 1]. If <1, will apply dropout on the final layer\n      after avg pooling.\n    reuse: whether or not the network and its variables should be reused. To be\n      able to reuse 'scope' must be given.\n    scope: Optional variable_scope.\n\n  Returns:\n    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].\n      If global_pool is False, then height_out and width_out are reduced by a\n      factor of output_stride compared to the respective height_in and width_in,\n      else both height_out and width_out equal one. If num_classes is None, then\n      net is the output of the last ResNet block, potentially after global\n      average pooling. If num_classes is not None, net contains the pre-softmax\n      activations.\n    end_points: A dictionary from components of the network to the corresponding\n      activation.\n\n  Raises:\n    ValueError: If the target output_stride is not valid.\n  \"\"\"\n  with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:\n    end_points_collection = sc.name + '_end_points'\n    with slim.arg_scope([slim.conv2d, bottleneck,\n                         resnet_utils.stack_blocks_dense],\n                        outputs_collections=end_points_collection):\n      with slim.arg_scope(\n        [slim.batch_norm],\n        is_training=is_training if not train_top_bn else False,\n        trainable=True if not train_top_bn else False):\n        net = inputs\n        if include_root_block:\n          if output_stride is not None:\n            if output_stride % 4 != 0:\n              raise ValueError('The output_stride needs to be a multiple of 4.')\n            output_stride /= 4\n          with slim.arg_scope([slim.batch_norm],\n                              is_training=is_training,\n                              trainable=True):\n            net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')\n          net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')\n        net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)\n        if global_pool:\n          # Global average pooling.\n          net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)\n        if dropout_keep_prob < 1.0:\n          tf.logging.info('ResNet v1: Using dropout {}.'.format(\n            1-dropout_keep_prob))\n          net = slim.dropout(net, keep_prob=dropout_keep_prob,\n                             is_training=is_training)\n        if num_classes is not None:\n          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,\n                            normalizer_fn=None, scope='logits')\n        # Convert end_points_collection into a dictionary of end_points.\n        end_points = slim.utils.convert_collection_to_dict(end_points_collection)\n        if num_classes is not None:\n          end_points['predictions'] = slim.softmax(net, scope='predictions')\n        return net, end_points\nresnet_v1.default_image_size = 224\n\n\ndef resnet_v1_50(inputs,\n                 num_classes=None,\n                 is_training=True,\n                 global_pool=True,\n                 output_stride=None,\n                 reuse=None,\n                 scope='resnet_v1_50'):\n  \"\"\"ResNet-50 model of [1]. See resnet_v1() for arg and return description.\"\"\"\n  blocks = [\n      resnet_utils.Block(\n          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),\n      resnet_utils.Block(\n          'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),\n      resnet_utils.Block(\n          'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),\n      resnet_utils.Block(\n          'block4', bottleneck, [(2048, 512, 1)] * 3)\n  ]\n  return resnet_v1(inputs, blocks, num_classes, is_training,\n                   global_pool=global_pool, output_stride=output_stride,\n                   include_root_block=True, reuse=reuse, scope=scope)\n\n\ndef resnet_v1_101(inputs,\n                  num_classes=None,\n                  is_training=True,\n                  global_pool=True,\n                  output_stride=None,\n                  reuse=None,\n                  train_top_bn=None,\n                  dropout_keep_prob=1.0,\n                  scope='resnet_v1_101'):\n  \"\"\"ResNet-101 model of [1]. See resnet_v1() for arg and return description.\"\"\"\n  blocks = [\n      resnet_utils.Block(\n          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),\n      resnet_utils.Block(\n          'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),\n      resnet_utils.Block(\n          'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),\n      resnet_utils.Block(\n          'block4', bottleneck, [(2048, 512, 1)] * 3)\n  ]\n  return resnet_v1(inputs, blocks, num_classes, is_training,\n                   global_pool=global_pool, output_stride=output_stride,\n                   include_root_block=True, train_top_bn=train_top_bn,\n                   dropout_keep_prob=dropout_keep_prob,\n                   reuse=reuse, scope=scope)\n\n\ndef resnet_v1_152(inputs,\n                  num_classes=None,\n                  is_training=True,\n                  global_pool=True,\n                  output_stride=None,\n                  reuse=None,\n                  scope='resnet_v1_152'):\n  \"\"\"ResNet-152 model of [1]. See resnet_v1() for arg and return description.\"\"\"\n  blocks = [\n      resnet_utils.Block(\n          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),\n      resnet_utils.Block(\n          'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),\n      resnet_utils.Block(\n          'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),\n      resnet_utils.Block(\n          'block4', bottleneck, [(2048, 512, 1)] * 3)]\n  return resnet_v1(inputs, blocks, num_classes, is_training,\n                   global_pool=global_pool, output_stride=output_stride,\n                   include_root_block=True, reuse=reuse, scope=scope)\n\n\ndef resnet_v1_200(inputs,\n                  num_classes=None,\n                  is_training=True,\n                  global_pool=True,\n                  output_stride=None,\n                  reuse=None,\n                  scope='resnet_v1_200'):\n  \"\"\"ResNet-200 model of [2]. See resnet_v1() for arg and return description.\"\"\"\n  blocks = [\n      resnet_utils.Block(\n          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),\n      resnet_utils.Block(\n          'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),\n      resnet_utils.Block(\n          'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),\n      resnet_utils.Block(\n          'block4', bottleneck, [(2048, 512, 1)] * 3)]\n  return resnet_v1(inputs, blocks, num_classes, is_training,\n                   global_pool=global_pool, output_stride=output_stride,\n                   include_root_block=True, reuse=reuse, scope=scope)\n"
  },
  {
    "path": "models/slim/nets/resnet_v1_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for slim.nets.resnet_v1.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nimport tensorflow as tf\n\nfrom nets import resnet_utils\nfrom nets import resnet_v1\n\nslim = tf.contrib.slim\n\n\ndef create_test_input(batch_size, height, width, channels):\n  \"\"\"Create test input tensor.\n\n  Args:\n    batch_size: The number of images per batch or `None` if unknown.\n    height: The height of each image or `None` if unknown.\n    width: The width of each image or `None` if unknown.\n    channels: The number of channels per image or `None` if unknown.\n\n  Returns:\n    Either a placeholder `Tensor` of dimension\n      [batch_size, height, width, channels] if any of the inputs are `None` or a\n    constant `Tensor` with the mesh grid values along the spatial dimensions.\n  \"\"\"\n  if None in [batch_size, height, width, channels]:\n    return tf.placeholder(tf.float32, (batch_size, height, width, channels))\n  else:\n    return tf.to_float(\n        np.tile(\n            np.reshape(\n                np.reshape(np.arange(height), [height, 1]) +\n                np.reshape(np.arange(width), [1, width]),\n                [1, height, width, 1]),\n            [batch_size, 1, 1, channels]))\n\n\nclass ResnetUtilsTest(tf.test.TestCase):\n\n  def testSubsampleThreeByThree(self):\n    x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1])\n    x = resnet_utils.subsample(x, 2)\n    expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1])\n    with self.test_session():\n      self.assertAllClose(x.eval(), expected.eval())\n\n  def testSubsampleFourByFour(self):\n    x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1])\n    x = resnet_utils.subsample(x, 2)\n    expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1])\n    with self.test_session():\n      self.assertAllClose(x.eval(), expected.eval())\n\n  def testConv2DSameEven(self):\n    n, n2 = 4, 2\n\n    # Input image.\n    x = create_test_input(1, n, n, 1)\n\n    # Convolution kernel.\n    w = create_test_input(1, 3, 3, 1)\n    w = tf.reshape(w, [3, 3, 1, 1])\n\n    tf.get_variable('Conv/weights', initializer=w)\n    tf.get_variable('Conv/biases', initializer=tf.zeros([1]))\n    tf.get_variable_scope().reuse_variables()\n\n    y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')\n    y1_expected = tf.to_float([[14, 28, 43, 26],\n                               [28, 48, 66, 37],\n                               [43, 66, 84, 46],\n                               [26, 37, 46, 22]])\n    y1_expected = tf.reshape(y1_expected, [1, n, n, 1])\n\n    y2 = resnet_utils.subsample(y1, 2)\n    y2_expected = tf.to_float([[14, 43],\n                               [43, 84]])\n    y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])\n\n    y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')\n    y3_expected = y2_expected\n\n    y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')\n    y4_expected = tf.to_float([[48, 37],\n                               [37, 22]])\n    y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1])\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      self.assertAllClose(y1.eval(), y1_expected.eval())\n      self.assertAllClose(y2.eval(), y2_expected.eval())\n      self.assertAllClose(y3.eval(), y3_expected.eval())\n      self.assertAllClose(y4.eval(), y4_expected.eval())\n\n  def testConv2DSameOdd(self):\n    n, n2 = 5, 3\n\n    # Input image.\n    x = create_test_input(1, n, n, 1)\n\n    # Convolution kernel.\n    w = create_test_input(1, 3, 3, 1)\n    w = tf.reshape(w, [3, 3, 1, 1])\n\n    tf.get_variable('Conv/weights', initializer=w)\n    tf.get_variable('Conv/biases', initializer=tf.zeros([1]))\n    tf.get_variable_scope().reuse_variables()\n\n    y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')\n    y1_expected = tf.to_float([[14, 28, 43, 58, 34],\n                               [28, 48, 66, 84, 46],\n                               [43, 66, 84, 102, 55],\n                               [58, 84, 102, 120, 64],\n                               [34, 46, 55, 64, 30]])\n    y1_expected = tf.reshape(y1_expected, [1, n, n, 1])\n\n    y2 = resnet_utils.subsample(y1, 2)\n    y2_expected = tf.to_float([[14, 43, 34],\n                               [43, 84, 55],\n                               [34, 55, 30]])\n    y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])\n\n    y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')\n    y3_expected = y2_expected\n\n    y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')\n    y4_expected = y2_expected\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      self.assertAllClose(y1.eval(), y1_expected.eval())\n      self.assertAllClose(y2.eval(), y2_expected.eval())\n      self.assertAllClose(y3.eval(), y3_expected.eval())\n      self.assertAllClose(y4.eval(), y4_expected.eval())\n\n  def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):\n    \"\"\"A plain ResNet without extra layers before or after the ResNet blocks.\"\"\"\n    with tf.variable_scope(scope, values=[inputs]):\n      with slim.arg_scope([slim.conv2d], outputs_collections='end_points'):\n        net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride)\n        end_points = dict(tf.get_collection('end_points'))\n        return net, end_points\n\n  def testEndPointsV1(self):\n    \"\"\"Test the end points of a tiny v1 bottleneck network.\"\"\"\n    bottleneck = resnet_v1.bottleneck\n    blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),\n              resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])]\n    inputs = create_test_input(2, 32, 16, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      _, end_points = self._resnet_plain(inputs, blocks, scope='tiny')\n    expected = [\n        'tiny/block1/unit_1/bottleneck_v1/shortcut',\n        'tiny/block1/unit_1/bottleneck_v1/conv1',\n        'tiny/block1/unit_1/bottleneck_v1/conv2',\n        'tiny/block1/unit_1/bottleneck_v1/conv3',\n        'tiny/block1/unit_2/bottleneck_v1/conv1',\n        'tiny/block1/unit_2/bottleneck_v1/conv2',\n        'tiny/block1/unit_2/bottleneck_v1/conv3',\n        'tiny/block2/unit_1/bottleneck_v1/shortcut',\n        'tiny/block2/unit_1/bottleneck_v1/conv1',\n        'tiny/block2/unit_1/bottleneck_v1/conv2',\n        'tiny/block2/unit_1/bottleneck_v1/conv3',\n        'tiny/block2/unit_2/bottleneck_v1/conv1',\n        'tiny/block2/unit_2/bottleneck_v1/conv2',\n        'tiny/block2/unit_2/bottleneck_v1/conv3']\n    self.assertItemsEqual(expected, end_points)\n\n  def _stack_blocks_nondense(self, net, blocks):\n    \"\"\"A simplified ResNet Block stacker without output stride control.\"\"\"\n    for block in blocks:\n      with tf.variable_scope(block.scope, 'block', [net]):\n        for i, unit in enumerate(block.args):\n          depth, depth_bottleneck, stride = unit\n          with tf.variable_scope('unit_%d' % (i + 1), values=[net]):\n            net = block.unit_fn(net,\n                                depth=depth,\n                                depth_bottleneck=depth_bottleneck,\n                                stride=stride,\n                                rate=1)\n    return net\n\n  def _atrousValues(self, bottleneck):\n    \"\"\"Verify the values of dense feature extraction by atrous convolution.\n\n    Make sure that dense feature extraction by stack_blocks_dense() followed by\n    subsampling gives identical results to feature extraction at the nominal\n    network output stride using the simple self._stack_blocks_nondense() above.\n\n    Args:\n      bottleneck: The bottleneck function.\n    \"\"\"\n    blocks = [\n        resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),\n        resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),\n        resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),\n        resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])\n    ]\n    nominal_stride = 8\n\n    # Test both odd and even input dimensions.\n    height = 30\n    width = 31\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      with slim.arg_scope([slim.batch_norm], is_training=False):\n        for output_stride in [1, 2, 4, 8, None]:\n          with tf.Graph().as_default():\n            with self.test_session() as sess:\n              tf.set_random_seed(0)\n              inputs = create_test_input(1, height, width, 3)\n              # Dense feature extraction followed by subsampling.\n              output = resnet_utils.stack_blocks_dense(inputs,\n                                                       blocks,\n                                                       output_stride)\n              if output_stride is None:\n                factor = 1\n              else:\n                factor = nominal_stride // output_stride\n\n              output = resnet_utils.subsample(output, factor)\n              # Make the two networks use the same weights.\n              tf.get_variable_scope().reuse_variables()\n              # Feature extraction at the nominal network rate.\n              expected = self._stack_blocks_nondense(inputs, blocks)\n              sess.run(tf.global_variables_initializer())\n              output, expected = sess.run([output, expected])\n              self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)\n\n  def testAtrousValuesBottleneck(self):\n    self._atrousValues(resnet_v1.bottleneck)\n\n\nclass ResnetCompleteNetworkTest(tf.test.TestCase):\n  \"\"\"Tests with complete small ResNet v1 networks.\"\"\"\n\n  def _resnet_small(self,\n                    inputs,\n                    num_classes=None,\n                    is_training=True,\n                    global_pool=True,\n                    output_stride=None,\n                    include_root_block=True,\n                    reuse=None,\n                    scope='resnet_v1_small'):\n    \"\"\"A shallow and thin ResNet v1 for faster tests.\"\"\"\n    bottleneck = resnet_v1.bottleneck\n    blocks = [\n        resnet_utils.Block(\n            'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]),\n        resnet_utils.Block(\n            'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]),\n        resnet_utils.Block(\n            'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]),\n        resnet_utils.Block(\n            'block4', bottleneck, [(32, 8, 1)] * 2)]\n    return resnet_v1.resnet_v1(inputs, blocks, num_classes,\n                               is_training=is_training,\n                               global_pool=global_pool,\n                               output_stride=output_stride,\n                               include_root_block=include_root_block,\n                               reuse=reuse,\n                               scope=scope)\n\n  def testClassificationEndPoints(self):\n    global_pool = True\n    num_classes = 10\n    inputs = create_test_input(2, 224, 224, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      logits, end_points = self._resnet_small(inputs, num_classes,\n                                              global_pool=global_pool,\n                                              scope='resnet')\n    self.assertTrue(logits.op.name.startswith('resnet/logits'))\n    self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])\n    self.assertTrue('predictions' in end_points)\n    self.assertListEqual(end_points['predictions'].get_shape().as_list(),\n                         [2, 1, 1, num_classes])\n\n  def testClassificationShapes(self):\n    global_pool = True\n    num_classes = 10\n    inputs = create_test_input(2, 224, 224, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      _, end_points = self._resnet_small(inputs, num_classes,\n                                         global_pool=global_pool,\n                                         scope='resnet')\n      endpoint_to_shape = {\n          'resnet/block1': [2, 28, 28, 4],\n          'resnet/block2': [2, 14, 14, 8],\n          'resnet/block3': [2, 7, 7, 16],\n          'resnet/block4': [2, 7, 7, 32]}\n      for endpoint in endpoint_to_shape:\n        shape = endpoint_to_shape[endpoint]\n        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)\n\n  def testFullyConvolutionalEndpointShapes(self):\n    global_pool = False\n    num_classes = 10\n    inputs = create_test_input(2, 321, 321, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      _, end_points = self._resnet_small(inputs, num_classes,\n                                         global_pool=global_pool,\n                                         scope='resnet')\n      endpoint_to_shape = {\n          'resnet/block1': [2, 41, 41, 4],\n          'resnet/block2': [2, 21, 21, 8],\n          'resnet/block3': [2, 11, 11, 16],\n          'resnet/block4': [2, 11, 11, 32]}\n      for endpoint in endpoint_to_shape:\n        shape = endpoint_to_shape[endpoint]\n        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)\n\n  def testRootlessFullyConvolutionalEndpointShapes(self):\n    global_pool = False\n    num_classes = 10\n    inputs = create_test_input(2, 128, 128, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      _, end_points = self._resnet_small(inputs, num_classes,\n                                         global_pool=global_pool,\n                                         include_root_block=False,\n                                         scope='resnet')\n      endpoint_to_shape = {\n          'resnet/block1': [2, 64, 64, 4],\n          'resnet/block2': [2, 32, 32, 8],\n          'resnet/block3': [2, 16, 16, 16],\n          'resnet/block4': [2, 16, 16, 32]}\n      for endpoint in endpoint_to_shape:\n        shape = endpoint_to_shape[endpoint]\n        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)\n\n  def testAtrousFullyConvolutionalEndpointShapes(self):\n    global_pool = False\n    num_classes = 10\n    output_stride = 8\n    inputs = create_test_input(2, 321, 321, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      _, end_points = self._resnet_small(inputs,\n                                         num_classes,\n                                         global_pool=global_pool,\n                                         output_stride=output_stride,\n                                         scope='resnet')\n      endpoint_to_shape = {\n          'resnet/block1': [2, 41, 41, 4],\n          'resnet/block2': [2, 41, 41, 8],\n          'resnet/block3': [2, 41, 41, 16],\n          'resnet/block4': [2, 41, 41, 32]}\n      for endpoint in endpoint_to_shape:\n        shape = endpoint_to_shape[endpoint]\n        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)\n\n  def testAtrousFullyConvolutionalValues(self):\n    \"\"\"Verify dense feature extraction with atrous convolution.\"\"\"\n    nominal_stride = 32\n    for output_stride in [4, 8, 16, 32, None]:\n      with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n        with tf.Graph().as_default():\n          with self.test_session() as sess:\n            tf.set_random_seed(0)\n            inputs = create_test_input(2, 81, 81, 3)\n            # Dense feature extraction followed by subsampling.\n            output, _ = self._resnet_small(inputs, None, is_training=False,\n                                           global_pool=False,\n                                           output_stride=output_stride)\n            if output_stride is None:\n              factor = 1\n            else:\n              factor = nominal_stride // output_stride\n            output = resnet_utils.subsample(output, factor)\n            # Make the two networks use the same weights.\n            tf.get_variable_scope().reuse_variables()\n            # Feature extraction at the nominal network rate.\n            expected, _ = self._resnet_small(inputs, None, is_training=False,\n                                             global_pool=False)\n            sess.run(tf.global_variables_initializer())\n            self.assertAllClose(output.eval(), expected.eval(),\n                                atol=1e-4, rtol=1e-4)\n\n  def testUnknownBatchSize(self):\n    batch = 2\n    height, width = 65, 65\n    global_pool = True\n    num_classes = 10\n    inputs = create_test_input(None, height, width, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      logits, _ = self._resnet_small(inputs, num_classes,\n                                     global_pool=global_pool,\n                                     scope='resnet')\n    self.assertTrue(logits.op.name.startswith('resnet/logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [None, 1, 1, num_classes])\n    images = create_test_input(batch, height, width, 3)\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(logits, {inputs: images.eval()})\n      self.assertEqual(output.shape, (batch, 1, 1, num_classes))\n\n  def testFullyConvolutionalUnknownHeightWidth(self):\n    batch = 2\n    height, width = 65, 65\n    global_pool = False\n    inputs = create_test_input(batch, None, None, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      output, _ = self._resnet_small(inputs, None, global_pool=global_pool)\n    self.assertListEqual(output.get_shape().as_list(),\n                         [batch, None, None, 32])\n    images = create_test_input(batch, height, width, 3)\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(output, {inputs: images.eval()})\n      self.assertEqual(output.shape, (batch, 3, 3, 32))\n\n  def testAtrousFullyConvolutionalUnknownHeightWidth(self):\n    batch = 2\n    height, width = 65, 65\n    global_pool = False\n    output_stride = 8\n    inputs = create_test_input(batch, None, None, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      output, _ = self._resnet_small(inputs,\n                                     None,\n                                     global_pool=global_pool,\n                                     output_stride=output_stride)\n    self.assertListEqual(output.get_shape().as_list(),\n                         [batch, None, None, 32])\n    images = create_test_input(batch, height, width, 3)\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(output, {inputs: images.eval()})\n      self.assertEqual(output.shape, (batch, 9, 9, 32))\n\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "models/slim/nets/resnet_v2.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains definitions for the preactivation form of Residual Networks.\n\nResidual networks (ResNets) were originally proposed in:\n[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun\n    Deep Residual Learning for Image Recognition. arXiv:1512.03385\n\nThe full preactivation 'v2' ResNet variant implemented in this module was\nintroduced by:\n[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun\n    Identity Mappings in Deep Residual Networks. arXiv: 1603.05027\n\nThe key difference of the full preactivation 'v2' variant compared to the\n'v1' variant in [1] is the use of batch normalization before every weight layer.\nAnother difference is that 'v2' ResNets do not include an activation function in\nthe main pathway. Also see [2; Fig. 4e].\n\nTypical use:\n\n   from tensorflow.contrib.slim.nets import resnet_v2\n\nResNet-101 for image classification into 1000 classes:\n\n   # inputs has shape [batch, 224, 224, 3]\n   with slim.arg_scope(resnet_v2.resnet_arg_scope()):\n      net, end_points = resnet_v2.resnet_v2_101(inputs, 1000, is_training=False)\n\nResNet-101 for semantic segmentation into 21 classes:\n\n   # inputs has shape [batch, 513, 513, 3]\n   with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training)):\n      net, end_points = resnet_v2.resnet_v2_101(inputs,\n                                                21,\n                                                is_training=False,\n                                                global_pool=False,\n                                                output_stride=16)\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom nets import resnet_utils\n\nslim = tf.contrib.slim\nresnet_arg_scope = resnet_utils.resnet_arg_scope\n\n\n@slim.add_arg_scope\ndef bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,\n               outputs_collections=None, scope=None):\n  \"\"\"Bottleneck residual unit variant with BN before convolutions.\n\n  This is the full preactivation residual unit variant proposed in [2]. See\n  Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck\n  variant which has an extra bottleneck layer.\n\n  When putting together two consecutive ResNet blocks that use this unit, one\n  should use stride = 2 in the last unit of the first block.\n\n  Args:\n    inputs: A tensor of size [batch, height, width, channels].\n    depth: The depth of the ResNet unit output.\n    depth_bottleneck: The depth of the bottleneck layers.\n    stride: The ResNet unit's stride. Determines the amount of downsampling of\n      the units output compared to its input.\n    rate: An integer, rate for atrous convolution.\n    outputs_collections: Collection to add the ResNet unit output.\n    scope: Optional variable_scope.\n\n  Returns:\n    The ResNet unit's output.\n  \"\"\"\n  with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:\n    depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)\n    preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')\n    if depth == depth_in:\n      shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')\n    else:\n      shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,\n                             normalizer_fn=None, activation_fn=None,\n                             scope='shortcut')\n\n    residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,\n                           scope='conv1')\n    residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,\n                                        rate=rate, scope='conv2')\n    residual = slim.conv2d(residual, depth, [1, 1], stride=1,\n                           normalizer_fn=None, activation_fn=None,\n                           scope='conv3')\n\n    output = shortcut + residual\n\n    return slim.utils.collect_named_outputs(outputs_collections,\n                                            sc.original_name_scope,\n                                            output)\n\n\ndef resnet_v2(inputs,\n              blocks,\n              num_classes=None,\n              is_training=True,\n              global_pool=True,\n              output_stride=None,\n              include_root_block=True,\n              reuse=None,\n              scope=None):\n  \"\"\"Generator for v2 (preactivation) ResNet models.\n\n  This function generates a family of ResNet v2 models. See the resnet_v2_*()\n  methods for specific model instantiations, obtained by selecting different\n  block instantiations that produce ResNets of various depths.\n\n  Training for image classification on Imagenet is usually done with [224, 224]\n  inputs, resulting in [7, 7] feature maps at the output of the last ResNet\n  block for the ResNets defined in [1] that have nominal stride equal to 32.\n  However, for dense prediction tasks we advise that one uses inputs with\n  spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In\n  this case the feature maps at the ResNet output will have spatial shape\n  [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]\n  and corners exactly aligned with the input image corners, which greatly\n  facilitates alignment of the features to the image. Using as input [225, 225]\n  images results in [8, 8] feature maps at the output of the last ResNet block.\n\n  For dense prediction tasks, the ResNet needs to run in fully-convolutional\n  (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all\n  have nominal stride equal to 32 and a good choice in FCN mode is to use\n  output_stride=16 in order to increase the density of the computed features at\n  small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.\n\n  Args:\n    inputs: A tensor of size [batch, height_in, width_in, channels].\n    blocks: A list of length equal to the number of ResNet blocks. Each element\n      is a resnet_utils.Block object describing the units in the block.\n    num_classes: Number of predicted classes for classification tasks. If None\n      we return the features before the logit layer.\n    is_training: whether is training or not.\n    global_pool: If True, we perform global average pooling before computing the\n      logits. Set to True for image classification, False for dense prediction.\n    output_stride: If None, then the output will be computed at the nominal\n      network stride. If output_stride is not None, it specifies the requested\n      ratio of input to output spatial resolution.\n    include_root_block: If True, include the initial convolution followed by\n      max-pooling, if False excludes it. If excluded, `inputs` should be the\n      results of an activation-less convolution.\n    reuse: whether or not the network and its variables should be reused. To be\n      able to reuse 'scope' must be given.\n    scope: Optional variable_scope.\n\n\n  Returns:\n    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].\n      If global_pool is False, then height_out and width_out are reduced by a\n      factor of output_stride compared to the respective height_in and width_in,\n      else both height_out and width_out equal one. If num_classes is None, then\n      net is the output of the last ResNet block, potentially after global\n      average pooling. If num_classes is not None, net contains the pre-softmax\n      activations.\n    end_points: A dictionary from components of the network to the corresponding\n      activation.\n\n  Raises:\n    ValueError: If the target output_stride is not valid.\n  \"\"\"\n  with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:\n    end_points_collection = sc.name + '_end_points'\n    with slim.arg_scope([slim.conv2d, bottleneck,\n                         resnet_utils.stack_blocks_dense],\n                        outputs_collections=end_points_collection):\n      with slim.arg_scope([slim.batch_norm], is_training=is_training):\n        net = inputs\n        if include_root_block:\n          if output_stride is not None:\n            if output_stride % 4 != 0:\n              raise ValueError('The output_stride needs to be a multiple of 4.')\n            output_stride /= 4\n          # We do not include batch normalization or activation functions in\n          # conv1 because the first ResNet unit will perform these. Cf.\n          # Appendix of [2].\n          with slim.arg_scope([slim.conv2d],\n                              activation_fn=None, normalizer_fn=None):\n            net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')\n          net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')\n        net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)\n        # This is needed because the pre-activation variant does not have batch\n        # normalization or activation functions in the residual unit output. See\n        # Appendix of [2].\n        net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')\n        if global_pool:\n          # Global average pooling.\n          net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)\n        if num_classes is not None:\n          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,\n                            normalizer_fn=None, scope='logits')\n        # Convert end_points_collection into a dictionary of end_points.\n        end_points = slim.utils.convert_collection_to_dict(end_points_collection)\n        if num_classes is not None:\n          end_points['predictions'] = slim.softmax(net, scope='predictions')\n        return net, end_points\nresnet_v2.default_image_size = 224\n\n\ndef resnet_v2_50(inputs,\n                 num_classes=None,\n                 is_training=True,\n                 global_pool=True,\n                 output_stride=None,\n                 reuse=None,\n                 scope='resnet_v2_50'):\n  \"\"\"ResNet-50 model of [1]. See resnet_v2() for arg and return description.\"\"\"\n  blocks = [\n      resnet_utils.Block(\n          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),\n      resnet_utils.Block(\n          'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),\n      resnet_utils.Block(\n          'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),\n      resnet_utils.Block(\n          'block4', bottleneck, [(2048, 512, 1)] * 3)]\n  return resnet_v2(inputs, blocks, num_classes, is_training=is_training,\n                   global_pool=global_pool, output_stride=output_stride,\n                   include_root_block=True, reuse=reuse, scope=scope)\n\n\ndef resnet_v2_101(inputs,\n                  num_classes=None,\n                  is_training=True,\n                  global_pool=True,\n                  output_stride=None,\n                  reuse=None,\n                  scope='resnet_v2_101'):\n  \"\"\"ResNet-101 model of [1]. See resnet_v2() for arg and return description.\"\"\"\n  blocks = [\n      resnet_utils.Block(\n          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),\n      resnet_utils.Block(\n          'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),\n      resnet_utils.Block(\n          'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),\n      resnet_utils.Block(\n          'block4', bottleneck, [(2048, 512, 1)] * 3)]\n  return resnet_v2(inputs, blocks, num_classes, is_training=is_training,\n                   global_pool=global_pool, output_stride=output_stride,\n                   include_root_block=True, reuse=reuse, scope=scope)\n\n\ndef resnet_v2_152(inputs,\n                  num_classes=None,\n                  is_training=True,\n                  global_pool=True,\n                  output_stride=None,\n                  reuse=None,\n                  scope='resnet_v2_152'):\n  \"\"\"ResNet-152 model of [1]. See resnet_v2() for arg and return description.\"\"\"\n  blocks = [\n      resnet_utils.Block(\n          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),\n      resnet_utils.Block(\n          'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),\n      resnet_utils.Block(\n          'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),\n      resnet_utils.Block(\n          'block4', bottleneck, [(2048, 512, 1)] * 3)]\n  return resnet_v2(inputs, blocks, num_classes, is_training=is_training,\n                   global_pool=global_pool, output_stride=output_stride,\n                   include_root_block=True, reuse=reuse, scope=scope)\n\n\ndef resnet_v2_200(inputs,\n                  num_classes=None,\n                  is_training=True,\n                  global_pool=True,\n                  output_stride=None,\n                  reuse=None,\n                  scope='resnet_v2_200'):\n  \"\"\"ResNet-200 model of [2]. See resnet_v2() for arg and return description.\"\"\"\n  blocks = [\n      resnet_utils.Block(\n          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),\n      resnet_utils.Block(\n          'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),\n      resnet_utils.Block(\n          'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),\n      resnet_utils.Block(\n          'block4', bottleneck, [(2048, 512, 1)] * 3)]\n  return resnet_v2(inputs, blocks, num_classes, is_training=is_training,\n                   global_pool=global_pool, output_stride=output_stride,\n                   include_root_block=True, reuse=reuse, scope=scope)\n"
  },
  {
    "path": "models/slim/nets/resnet_v2_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for slim.nets.resnet_v2.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport numpy as np\nimport tensorflow as tf\n\nfrom nets import resnet_utils\nfrom nets import resnet_v2\n\nslim = tf.contrib.slim\n\n\ndef create_test_input(batch_size, height, width, channels):\n  \"\"\"Create test input tensor.\n\n  Args:\n    batch_size: The number of images per batch or `None` if unknown.\n    height: The height of each image or `None` if unknown.\n    width: The width of each image or `None` if unknown.\n    channels: The number of channels per image or `None` if unknown.\n\n  Returns:\n    Either a placeholder `Tensor` of dimension\n      [batch_size, height, width, channels] if any of the inputs are `None` or a\n    constant `Tensor` with the mesh grid values along the spatial dimensions.\n  \"\"\"\n  if None in [batch_size, height, width, channels]:\n    return tf.placeholder(tf.float32, (batch_size, height, width, channels))\n  else:\n    return tf.to_float(\n        np.tile(\n            np.reshape(\n                np.reshape(np.arange(height), [height, 1]) +\n                np.reshape(np.arange(width), [1, width]),\n                [1, height, width, 1]),\n            [batch_size, 1, 1, channels]))\n\n\nclass ResnetUtilsTest(tf.test.TestCase):\n\n  def testSubsampleThreeByThree(self):\n    x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1])\n    x = resnet_utils.subsample(x, 2)\n    expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1])\n    with self.test_session():\n      self.assertAllClose(x.eval(), expected.eval())\n\n  def testSubsampleFourByFour(self):\n    x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1])\n    x = resnet_utils.subsample(x, 2)\n    expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1])\n    with self.test_session():\n      self.assertAllClose(x.eval(), expected.eval())\n\n  def testConv2DSameEven(self):\n    n, n2 = 4, 2\n\n    # Input image.\n    x = create_test_input(1, n, n, 1)\n\n    # Convolution kernel.\n    w = create_test_input(1, 3, 3, 1)\n    w = tf.reshape(w, [3, 3, 1, 1])\n\n    tf.get_variable('Conv/weights', initializer=w)\n    tf.get_variable('Conv/biases', initializer=tf.zeros([1]))\n    tf.get_variable_scope().reuse_variables()\n\n    y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')\n    y1_expected = tf.to_float([[14, 28, 43, 26],\n                               [28, 48, 66, 37],\n                               [43, 66, 84, 46],\n                               [26, 37, 46, 22]])\n    y1_expected = tf.reshape(y1_expected, [1, n, n, 1])\n\n    y2 = resnet_utils.subsample(y1, 2)\n    y2_expected = tf.to_float([[14, 43],\n                               [43, 84]])\n    y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])\n\n    y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')\n    y3_expected = y2_expected\n\n    y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')\n    y4_expected = tf.to_float([[48, 37],\n                               [37, 22]])\n    y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1])\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      self.assertAllClose(y1.eval(), y1_expected.eval())\n      self.assertAllClose(y2.eval(), y2_expected.eval())\n      self.assertAllClose(y3.eval(), y3_expected.eval())\n      self.assertAllClose(y4.eval(), y4_expected.eval())\n\n  def testConv2DSameOdd(self):\n    n, n2 = 5, 3\n\n    # Input image.\n    x = create_test_input(1, n, n, 1)\n\n    # Convolution kernel.\n    w = create_test_input(1, 3, 3, 1)\n    w = tf.reshape(w, [3, 3, 1, 1])\n\n    tf.get_variable('Conv/weights', initializer=w)\n    tf.get_variable('Conv/biases', initializer=tf.zeros([1]))\n    tf.get_variable_scope().reuse_variables()\n\n    y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')\n    y1_expected = tf.to_float([[14, 28, 43, 58, 34],\n                               [28, 48, 66, 84, 46],\n                               [43, 66, 84, 102, 55],\n                               [58, 84, 102, 120, 64],\n                               [34, 46, 55, 64, 30]])\n    y1_expected = tf.reshape(y1_expected, [1, n, n, 1])\n\n    y2 = resnet_utils.subsample(y1, 2)\n    y2_expected = tf.to_float([[14, 43, 34],\n                               [43, 84, 55],\n                               [34, 55, 30]])\n    y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])\n\n    y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')\n    y3_expected = y2_expected\n\n    y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')\n    y4_expected = y2_expected\n\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      self.assertAllClose(y1.eval(), y1_expected.eval())\n      self.assertAllClose(y2.eval(), y2_expected.eval())\n      self.assertAllClose(y3.eval(), y3_expected.eval())\n      self.assertAllClose(y4.eval(), y4_expected.eval())\n\n  def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):\n    \"\"\"A plain ResNet without extra layers before or after the ResNet blocks.\"\"\"\n    with tf.variable_scope(scope, values=[inputs]):\n      with slim.arg_scope([slim.conv2d], outputs_collections='end_points'):\n        net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride)\n        end_points = dict(tf.get_collection('end_points'))\n        return net, end_points\n\n  def testEndPointsV2(self):\n    \"\"\"Test the end points of a tiny v2 bottleneck network.\"\"\"\n    bottleneck = resnet_v2.bottleneck\n    blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),\n              resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])]\n    inputs = create_test_input(2, 32, 16, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      _, end_points = self._resnet_plain(inputs, blocks, scope='tiny')\n    expected = [\n        'tiny/block1/unit_1/bottleneck_v2/shortcut',\n        'tiny/block1/unit_1/bottleneck_v2/conv1',\n        'tiny/block1/unit_1/bottleneck_v2/conv2',\n        'tiny/block1/unit_1/bottleneck_v2/conv3',\n        'tiny/block1/unit_2/bottleneck_v2/conv1',\n        'tiny/block1/unit_2/bottleneck_v2/conv2',\n        'tiny/block1/unit_2/bottleneck_v2/conv3',\n        'tiny/block2/unit_1/bottleneck_v2/shortcut',\n        'tiny/block2/unit_1/bottleneck_v2/conv1',\n        'tiny/block2/unit_1/bottleneck_v2/conv2',\n        'tiny/block2/unit_1/bottleneck_v2/conv3',\n        'tiny/block2/unit_2/bottleneck_v2/conv1',\n        'tiny/block2/unit_2/bottleneck_v2/conv2',\n        'tiny/block2/unit_2/bottleneck_v2/conv3']\n    self.assertItemsEqual(expected, end_points)\n\n  def _stack_blocks_nondense(self, net, blocks):\n    \"\"\"A simplified ResNet Block stacker without output stride control.\"\"\"\n    for block in blocks:\n      with tf.variable_scope(block.scope, 'block', [net]):\n        for i, unit in enumerate(block.args):\n          depth, depth_bottleneck, stride = unit\n          with tf.variable_scope('unit_%d' % (i + 1), values=[net]):\n            net = block.unit_fn(net,\n                                depth=depth,\n                                depth_bottleneck=depth_bottleneck,\n                                stride=stride,\n                                rate=1)\n    return net\n\n  def _atrousValues(self, bottleneck):\n    \"\"\"Verify the values of dense feature extraction by atrous convolution.\n\n    Make sure that dense feature extraction by stack_blocks_dense() followed by\n    subsampling gives identical results to feature extraction at the nominal\n    network output stride using the simple self._stack_blocks_nondense() above.\n\n    Args:\n      bottleneck: The bottleneck function.\n    \"\"\"\n    blocks = [\n        resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),\n        resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),\n        resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),\n        resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])\n    ]\n    nominal_stride = 8\n\n    # Test both odd and even input dimensions.\n    height = 30\n    width = 31\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      with slim.arg_scope([slim.batch_norm], is_training=False):\n        for output_stride in [1, 2, 4, 8, None]:\n          with tf.Graph().as_default():\n            with self.test_session() as sess:\n              tf.set_random_seed(0)\n              inputs = create_test_input(1, height, width, 3)\n              # Dense feature extraction followed by subsampling.\n              output = resnet_utils.stack_blocks_dense(inputs,\n                                                       blocks,\n                                                       output_stride)\n              if output_stride is None:\n                factor = 1\n              else:\n                factor = nominal_stride // output_stride\n\n              output = resnet_utils.subsample(output, factor)\n              # Make the two networks use the same weights.\n              tf.get_variable_scope().reuse_variables()\n              # Feature extraction at the nominal network rate.\n              expected = self._stack_blocks_nondense(inputs, blocks)\n              sess.run(tf.global_variables_initializer())\n              output, expected = sess.run([output, expected])\n              self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)\n\n  def testAtrousValuesBottleneck(self):\n    self._atrousValues(resnet_v2.bottleneck)\n\n\nclass ResnetCompleteNetworkTest(tf.test.TestCase):\n  \"\"\"Tests with complete small ResNet v2 networks.\"\"\"\n\n  def _resnet_small(self,\n                    inputs,\n                    num_classes=None,\n                    is_training=True,\n                    global_pool=True,\n                    output_stride=None,\n                    include_root_block=True,\n                    reuse=None,\n                    scope='resnet_v2_small'):\n    \"\"\"A shallow and thin ResNet v2 for faster tests.\"\"\"\n    bottleneck = resnet_v2.bottleneck\n    blocks = [\n        resnet_utils.Block(\n            'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]),\n        resnet_utils.Block(\n            'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]),\n        resnet_utils.Block(\n            'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]),\n        resnet_utils.Block(\n            'block4', bottleneck, [(32, 8, 1)] * 2)]\n    return resnet_v2.resnet_v2(inputs, blocks, num_classes,\n                               is_training=is_training,\n                               global_pool=global_pool,\n                               output_stride=output_stride,\n                               include_root_block=include_root_block,\n                               reuse=reuse,\n                               scope=scope)\n\n  def testClassificationEndPoints(self):\n    global_pool = True\n    num_classes = 10\n    inputs = create_test_input(2, 224, 224, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      logits, end_points = self._resnet_small(inputs, num_classes,\n                                              global_pool=global_pool,\n                                              scope='resnet')\n    self.assertTrue(logits.op.name.startswith('resnet/logits'))\n    self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])\n    self.assertTrue('predictions' in end_points)\n    self.assertListEqual(end_points['predictions'].get_shape().as_list(),\n                         [2, 1, 1, num_classes])\n\n  def testClassificationShapes(self):\n    global_pool = True\n    num_classes = 10\n    inputs = create_test_input(2, 224, 224, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      _, end_points = self._resnet_small(inputs, num_classes,\n                                         global_pool=global_pool,\n                                         scope='resnet')\n      endpoint_to_shape = {\n          'resnet/block1': [2, 28, 28, 4],\n          'resnet/block2': [2, 14, 14, 8],\n          'resnet/block3': [2, 7, 7, 16],\n          'resnet/block4': [2, 7, 7, 32]}\n      for endpoint in endpoint_to_shape:\n        shape = endpoint_to_shape[endpoint]\n        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)\n\n  def testFullyConvolutionalEndpointShapes(self):\n    global_pool = False\n    num_classes = 10\n    inputs = create_test_input(2, 321, 321, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      _, end_points = self._resnet_small(inputs, num_classes,\n                                         global_pool=global_pool,\n                                         scope='resnet')\n      endpoint_to_shape = {\n          'resnet/block1': [2, 41, 41, 4],\n          'resnet/block2': [2, 21, 21, 8],\n          'resnet/block3': [2, 11, 11, 16],\n          'resnet/block4': [2, 11, 11, 32]}\n      for endpoint in endpoint_to_shape:\n        shape = endpoint_to_shape[endpoint]\n        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)\n\n  def testRootlessFullyConvolutionalEndpointShapes(self):\n    global_pool = False\n    num_classes = 10\n    inputs = create_test_input(2, 128, 128, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      _, end_points = self._resnet_small(inputs, num_classes,\n                                         global_pool=global_pool,\n                                         include_root_block=False,\n                                         scope='resnet')\n      endpoint_to_shape = {\n          'resnet/block1': [2, 64, 64, 4],\n          'resnet/block2': [2, 32, 32, 8],\n          'resnet/block3': [2, 16, 16, 16],\n          'resnet/block4': [2, 16, 16, 32]}\n      for endpoint in endpoint_to_shape:\n        shape = endpoint_to_shape[endpoint]\n        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)\n\n  def testAtrousFullyConvolutionalEndpointShapes(self):\n    global_pool = False\n    num_classes = 10\n    output_stride = 8\n    inputs = create_test_input(2, 321, 321, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      _, end_points = self._resnet_small(inputs,\n                                         num_classes,\n                                         global_pool=global_pool,\n                                         output_stride=output_stride,\n                                         scope='resnet')\n      endpoint_to_shape = {\n          'resnet/block1': [2, 41, 41, 4],\n          'resnet/block2': [2, 41, 41, 8],\n          'resnet/block3': [2, 41, 41, 16],\n          'resnet/block4': [2, 41, 41, 32]}\n      for endpoint in endpoint_to_shape:\n        shape = endpoint_to_shape[endpoint]\n        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)\n\n  def testAtrousFullyConvolutionalValues(self):\n    \"\"\"Verify dense feature extraction with atrous convolution.\"\"\"\n    nominal_stride = 32\n    for output_stride in [4, 8, 16, 32, None]:\n      with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n        with tf.Graph().as_default():\n          with self.test_session() as sess:\n            tf.set_random_seed(0)\n            inputs = create_test_input(2, 81, 81, 3)\n            # Dense feature extraction followed by subsampling.\n            output, _ = self._resnet_small(inputs, None,\n                                           is_training=False,\n                                           global_pool=False,\n                                           output_stride=output_stride)\n            if output_stride is None:\n              factor = 1\n            else:\n              factor = nominal_stride // output_stride\n            output = resnet_utils.subsample(output, factor)\n            # Make the two networks use the same weights.\n            tf.get_variable_scope().reuse_variables()\n            # Feature extraction at the nominal network rate.\n            expected, _ = self._resnet_small(inputs, None,\n                                             is_training=False,\n                                             global_pool=False)\n            sess.run(tf.global_variables_initializer())\n            self.assertAllClose(output.eval(), expected.eval(),\n                                atol=1e-4, rtol=1e-4)\n\n  def testUnknownBatchSize(self):\n    batch = 2\n    height, width = 65, 65\n    global_pool = True\n    num_classes = 10\n    inputs = create_test_input(None, height, width, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      logits, _ = self._resnet_small(inputs, num_classes,\n                                     global_pool=global_pool,\n                                     scope='resnet')\n    self.assertTrue(logits.op.name.startswith('resnet/logits'))\n    self.assertListEqual(logits.get_shape().as_list(),\n                         [None, 1, 1, num_classes])\n    images = create_test_input(batch, height, width, 3)\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(logits, {inputs: images.eval()})\n      self.assertEqual(output.shape, (batch, 1, 1, num_classes))\n\n  def testFullyConvolutionalUnknownHeightWidth(self):\n    batch = 2\n    height, width = 65, 65\n    global_pool = False\n    inputs = create_test_input(batch, None, None, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      output, _ = self._resnet_small(inputs, None,\n                                     global_pool=global_pool)\n    self.assertListEqual(output.get_shape().as_list(),\n                         [batch, None, None, 32])\n    images = create_test_input(batch, height, width, 3)\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(output, {inputs: images.eval()})\n      self.assertEqual(output.shape, (batch, 3, 3, 32))\n\n  def testAtrousFullyConvolutionalUnknownHeightWidth(self):\n    batch = 2\n    height, width = 65, 65\n    global_pool = False\n    output_stride = 8\n    inputs = create_test_input(batch, None, None, 3)\n    with slim.arg_scope(resnet_utils.resnet_arg_scope()):\n      output, _ = self._resnet_small(inputs,\n                                     None,\n                                     global_pool=global_pool,\n                                     output_stride=output_stride)\n    self.assertListEqual(output.get_shape().as_list(),\n                         [batch, None, None, 32])\n    images = create_test_input(batch, height, width, 3)\n    with self.test_session() as sess:\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(output, {inputs: images.eval()})\n      self.assertEqual(output.shape, (batch, 9, 9, 32))\n\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "models/slim/nets/vgg.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains model definitions for versions of the Oxford VGG network.\n\nThese model definitions were introduced in the following technical report:\n\n  Very Deep Convolutional Networks For Large-Scale Image Recognition\n  Karen Simonyan and Andrew Zisserman\n  arXiv technical report, 2015\n  PDF: http://arxiv.org/pdf/1409.1556.pdf\n  ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf\n  CC-BY-4.0\n\nMore information can be obtained from the VGG website:\nwww.robots.ox.ac.uk/~vgg/research/very_deep/\n\nUsage:\n  with slim.arg_scope(vgg.vgg_arg_scope()):\n    outputs, end_points = vgg.vgg_a(inputs)\n\n  with slim.arg_scope(vgg.vgg_arg_scope()):\n    outputs, end_points = vgg.vgg_16(inputs)\n\n@@vgg_a\n@@vgg_16\n@@vgg_19\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nslim = tf.contrib.slim\n\n\ndef vgg_arg_scope(weight_decay=0.0005):\n  \"\"\"Defines the VGG arg scope.\n\n  Args:\n    weight_decay: The l2 regularization coefficient.\n\n  Returns:\n    An arg_scope.\n  \"\"\"\n  with slim.arg_scope([slim.conv2d, slim.fully_connected],\n                      activation_fn=tf.nn.relu,\n                      weights_regularizer=slim.l2_regularizer(weight_decay),\n                      biases_initializer=tf.zeros_initializer()):\n    with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:\n      return arg_sc\n\n\ndef vgg_a(inputs,\n          num_classes=1000,\n          is_training=True,\n          dropout_keep_prob=0.5,\n          spatial_squeeze=False,\n          scope='vgg_a'):\n  \"\"\"Oxford Net VGG 11-Layers version A Example.\n\n  Note: All the fully_connected layers have been transformed to conv2d layers.\n        To use in classification mode, resize input to 224x224.\n\n  Args:\n    inputs: a tensor of size [batch_size, height, width, channels].\n    num_classes: number of predicted classes.\n    is_training: whether or not the model is being trained.\n    dropout_keep_prob: the probability that activations are kept in the dropout\n      layers during training.\n    spatial_squeeze: whether or not should squeeze the spatial dimensions of the\n      outputs. Useful to remove unnecessary dimensions for classification.\n    scope: Optional scope for the variables.\n\n  Returns:\n    the last op containing the log predictions and end_points dict.\n  \"\"\"\n  with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc:\n    end_points_collection = sc.name + '_end_points'\n    # Collect outputs for conv2d, fully_connected and max_pool2d.\n    with slim.arg_scope([slim.conv2d, slim.max_pool2d],\n                        outputs_collections=end_points_collection):\n      net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1')\n      net = slim.max_pool2d(net, [2, 2], scope='pool1')\n      net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2')\n      net = slim.max_pool2d(net, [2, 2], scope='pool2')\n      net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3')\n      net = slim.max_pool2d(net, [2, 2], scope='pool3')\n      net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4')\n      net = slim.max_pool2d(net, [2, 2], scope='pool4')\n      net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')\n      net = slim.max_pool2d(net, [2, 2], scope='pool5')\n      # Use conv2d instead of fully_connected layers.\n      net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')\n      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                         scope='dropout6')\n      net = slim.conv2d(net, 4096, [1, 1], scope='fc7')\n      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                         scope='dropout7')\n      net = slim.conv2d(net, num_classes, [1, 1],\n                        activation_fn=None,\n                        normalizer_fn=None,\n                        scope='fc8')\n      # Convert end_points_collection into a end_point dict.\n      end_points = slim.utils.convert_collection_to_dict(end_points_collection)\n      if spatial_squeeze:\n        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')\n        end_points[sc.name + '/fc8'] = net\n      return net, end_points\nvgg_a.default_image_size = 224\n\n\ndef vgg_16(inputs,\n           num_classes=1000,\n           is_training=True,\n           dropout_keep_prob=0.5,\n           spatial_squeeze=False,\n           train_top_bn=None,  # ignore, just for consistency\n           scope='vgg_16'):\n  \"\"\"Oxford Net VGG 16-Layers version D Example.\n\n  Note: All the fully_connected layers have been transformed to conv2d layers.\n        To use in classification mode, resize input to 224x224.\n\n  Args:\n    inputs: a tensor of size [batch_size, height, width, channels].\n    num_classes: number of predicted classes.\n    is_training: whether or not the model is being trained.\n    dropout_keep_prob: the probability that activations are kept in the dropout\n      layers during training.\n    spatial_squeeze: whether or not should squeeze the spatial dimensions of the\n      outputs. Useful to remove unnecessary dimensions for classification.\n    scope: Optional scope for the variables.\n\n  Returns:\n    the last op containing the log predictions and end_points dict.\n  \"\"\"\n  with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:\n    end_points_collection = sc.name + '_end_points'\n    # Collect outputs for conv2d, fully_connected and max_pool2d.\n    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],\n                        outputs_collections=end_points_collection):\n      net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')\n      net = slim.max_pool2d(net, [2, 2], scope='pool1')\n      net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')\n      net = slim.max_pool2d(net, [2, 2], scope='pool2')\n      net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')\n      net = slim.max_pool2d(net, [2, 2], scope='pool3')\n      net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')\n      net = slim.max_pool2d(net, [2, 2], scope='pool4')\n      # net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')\n      # rgirdhar: remove the relu from last layer\n      net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')\n      net = slim.conv2d(net, 512, [3, 3], activation_fn=None,\n                        scope='conv5/conv5_3')\n      conv5_output = net\n      net = tf.nn.relu(net)\n      net = slim.max_pool2d(net, [2, 2], scope='pool5')\n      # Use conv2d instead of fully_connected layers.\n      net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')\n      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                         scope='dropout6')\n      net = slim.conv2d(net, 4096, [1, 1], scope='fc7')\n      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                         scope='dropout7')\n      net = slim.conv2d(net, num_classes, [1, 1],\n                        activation_fn=None,\n                        normalizer_fn=None,\n                        scope='fc8')\n      # Convert end_points_collection into a end_point dict.\n      end_points = slim.utils.convert_collection_to_dict(end_points_collection)\n      end_points['vgg_16/conv5'] = conv5_output\n      if spatial_squeeze:\n        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')\n        end_points[sc.name + '/fc8'] = net\n      return net, end_points\nvgg_16.default_image_size = 224\n\n\ndef vgg_19(inputs,\n           num_classes=1000,\n           is_training=True,\n           dropout_keep_prob=0.5,\n           spatial_squeeze=False,\n           scope='vgg_19'):\n  \"\"\"Oxford Net VGG 19-Layers version E Example.\n\n  Note: All the fully_connected layers have been transformed to conv2d layers.\n        To use in classification mode, resize input to 224x224.\n\n  Args:\n    inputs: a tensor of size [batch_size, height, width, channels].\n    num_classes: number of predicted classes.\n    is_training: whether or not the model is being trained.\n    dropout_keep_prob: the probability that activations are kept in the dropout\n      layers during training.\n    spatial_squeeze: whether or not should squeeze the spatial dimensions of the\n      outputs. Useful to remove unnecessary dimensions for classification.\n    scope: Optional scope for the variables.\n\n  Returns:\n    the last op containing the log predictions and end_points dict.\n  \"\"\"\n  with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc:\n    end_points_collection = sc.name + '_end_points'\n    # Collect outputs for conv2d, fully_connected and max_pool2d.\n    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],\n                        outputs_collections=end_points_collection):\n      net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')\n      net = slim.max_pool2d(net, [2, 2], scope='pool1')\n      net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')\n      net = slim.max_pool2d(net, [2, 2], scope='pool2')\n      net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3')\n      net = slim.max_pool2d(net, [2, 2], scope='pool3')\n      net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4')\n      net = slim.max_pool2d(net, [2, 2], scope='pool4')\n      net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5')\n      net = slim.max_pool2d(net, [2, 2], scope='pool5')\n      # Use conv2d instead of fully_connected layers.\n      net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')\n      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                         scope='dropout6')\n      net = slim.conv2d(net, 4096, [1, 1], scope='fc7')\n      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,\n                         scope='dropout7')\n      net = slim.conv2d(net, num_classes, [1, 1],\n                        activation_fn=None,\n                        normalizer_fn=None,\n                        scope='fc8')\n      # Convert end_points_collection into a end_point dict.\n      end_points = slim.utils.convert_collection_to_dict(end_points_collection)\n      if spatial_squeeze:\n        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')\n        end_points[sc.name + '/fc8'] = net\n      return net, end_points\nvgg_19.default_image_size = 224\n\n# Alias\nvgg_d = vgg_16\nvgg_e = vgg_19\n"
  },
  {
    "path": "models/slim/nets/vgg_test.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Tests for slim.nets.vgg.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom nets import vgg\n\nslim = tf.contrib.slim\n\n\nclass VGGATest(tf.test.TestCase):\n\n  def testBuild(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = vgg.vgg_a(inputs, num_classes)\n      self.assertEquals(logits.op.name, 'vgg_a/fc8/squeezed')\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n\n  def testFullyConvolutional(self):\n    batch_size = 1\n    height, width = 256, 256\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = vgg.vgg_a(inputs, num_classes, spatial_squeeze=False)\n      self.assertEquals(logits.op.name, 'vgg_a/fc8/BiasAdd')\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, 2, 2, num_classes])\n\n  def testEndPoints(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      _, end_points = vgg.vgg_a(inputs, num_classes)\n      expected_names = ['vgg_a/conv1/conv1_1',\n                        'vgg_a/pool1',\n                        'vgg_a/conv2/conv2_1',\n                        'vgg_a/pool2',\n                        'vgg_a/conv3/conv3_1',\n                        'vgg_a/conv3/conv3_2',\n                        'vgg_a/pool3',\n                        'vgg_a/conv4/conv4_1',\n                        'vgg_a/conv4/conv4_2',\n                        'vgg_a/pool4',\n                        'vgg_a/conv5/conv5_1',\n                        'vgg_a/conv5/conv5_2',\n                        'vgg_a/pool5',\n                        'vgg_a/fc6',\n                        'vgg_a/fc7',\n                        'vgg_a/fc8'\n                       ]\n      self.assertSetEqual(set(end_points.keys()), set(expected_names))\n\n  def testModelVariables(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      vgg.vgg_a(inputs, num_classes)\n      expected_names = ['vgg_a/conv1/conv1_1/weights',\n                        'vgg_a/conv1/conv1_1/biases',\n                        'vgg_a/conv2/conv2_1/weights',\n                        'vgg_a/conv2/conv2_1/biases',\n                        'vgg_a/conv3/conv3_1/weights',\n                        'vgg_a/conv3/conv3_1/biases',\n                        'vgg_a/conv3/conv3_2/weights',\n                        'vgg_a/conv3/conv3_2/biases',\n                        'vgg_a/conv4/conv4_1/weights',\n                        'vgg_a/conv4/conv4_1/biases',\n                        'vgg_a/conv4/conv4_2/weights',\n                        'vgg_a/conv4/conv4_2/biases',\n                        'vgg_a/conv5/conv5_1/weights',\n                        'vgg_a/conv5/conv5_1/biases',\n                        'vgg_a/conv5/conv5_2/weights',\n                        'vgg_a/conv5/conv5_2/biases',\n                        'vgg_a/fc6/weights',\n                        'vgg_a/fc6/biases',\n                        'vgg_a/fc7/weights',\n                        'vgg_a/fc7/biases',\n                        'vgg_a/fc8/weights',\n                        'vgg_a/fc8/biases',\n                       ]\n      model_variables = [v.op.name for v in slim.get_model_variables()]\n      self.assertSetEqual(set(model_variables), set(expected_names))\n\n  def testEvaluation(self):\n    batch_size = 2\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      eval_inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = vgg.vgg_a(eval_inputs, is_training=False)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n      predictions = tf.argmax(logits, 1)\n      self.assertListEqual(predictions.get_shape().as_list(), [batch_size])\n\n  def testTrainEvalWithReuse(self):\n    train_batch_size = 2\n    eval_batch_size = 1\n    train_height, train_width = 224, 224\n    eval_height, eval_width = 256, 256\n    num_classes = 1000\n    with self.test_session():\n      train_inputs = tf.random_uniform(\n          (train_batch_size, train_height, train_width, 3))\n      logits, _ = vgg.vgg_a(train_inputs)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [train_batch_size, num_classes])\n      tf.get_variable_scope().reuse_variables()\n      eval_inputs = tf.random_uniform(\n          (eval_batch_size, eval_height, eval_width, 3))\n      logits, _ = vgg.vgg_a(eval_inputs, is_training=False,\n                            spatial_squeeze=False)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [eval_batch_size, 2, 2, num_classes])\n      logits = tf.reduce_mean(logits, [1, 2])\n      predictions = tf.argmax(logits, 1)\n      self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])\n\n  def testForward(self):\n    batch_size = 1\n    height, width = 224, 224\n    with self.test_session() as sess:\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = vgg.vgg_a(inputs)\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(logits)\n      self.assertTrue(output.any())\n\n\nclass VGG16Test(tf.test.TestCase):\n\n  def testBuild(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = vgg.vgg_16(inputs, num_classes)\n      self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed')\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n\n  def testFullyConvolutional(self):\n    batch_size = 1\n    height, width = 256, 256\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False)\n      self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd')\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, 2, 2, num_classes])\n\n  def testEndPoints(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      _, end_points = vgg.vgg_16(inputs, num_classes)\n      expected_names = ['vgg_16/conv1/conv1_1',\n                        'vgg_16/conv1/conv1_2',\n                        'vgg_16/pool1',\n                        'vgg_16/conv2/conv2_1',\n                        'vgg_16/conv2/conv2_2',\n                        'vgg_16/pool2',\n                        'vgg_16/conv3/conv3_1',\n                        'vgg_16/conv3/conv3_2',\n                        'vgg_16/conv3/conv3_3',\n                        'vgg_16/pool3',\n                        'vgg_16/conv4/conv4_1',\n                        'vgg_16/conv4/conv4_2',\n                        'vgg_16/conv4/conv4_3',\n                        'vgg_16/pool4',\n                        'vgg_16/conv5/conv5_1',\n                        'vgg_16/conv5/conv5_2',\n                        'vgg_16/conv5/conv5_3',\n                        'vgg_16/pool5',\n                        'vgg_16/fc6',\n                        'vgg_16/fc7',\n                        'vgg_16/fc8'\n                       ]\n      self.assertSetEqual(set(end_points.keys()), set(expected_names))\n\n  def testModelVariables(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      vgg.vgg_16(inputs, num_classes)\n      expected_names = ['vgg_16/conv1/conv1_1/weights',\n                        'vgg_16/conv1/conv1_1/biases',\n                        'vgg_16/conv1/conv1_2/weights',\n                        'vgg_16/conv1/conv1_2/biases',\n                        'vgg_16/conv2/conv2_1/weights',\n                        'vgg_16/conv2/conv2_1/biases',\n                        'vgg_16/conv2/conv2_2/weights',\n                        'vgg_16/conv2/conv2_2/biases',\n                        'vgg_16/conv3/conv3_1/weights',\n                        'vgg_16/conv3/conv3_1/biases',\n                        'vgg_16/conv3/conv3_2/weights',\n                        'vgg_16/conv3/conv3_2/biases',\n                        'vgg_16/conv3/conv3_3/weights',\n                        'vgg_16/conv3/conv3_3/biases',\n                        'vgg_16/conv4/conv4_1/weights',\n                        'vgg_16/conv4/conv4_1/biases',\n                        'vgg_16/conv4/conv4_2/weights',\n                        'vgg_16/conv4/conv4_2/biases',\n                        'vgg_16/conv4/conv4_3/weights',\n                        'vgg_16/conv4/conv4_3/biases',\n                        'vgg_16/conv5/conv5_1/weights',\n                        'vgg_16/conv5/conv5_1/biases',\n                        'vgg_16/conv5/conv5_2/weights',\n                        'vgg_16/conv5/conv5_2/biases',\n                        'vgg_16/conv5/conv5_3/weights',\n                        'vgg_16/conv5/conv5_3/biases',\n                        'vgg_16/fc6/weights',\n                        'vgg_16/fc6/biases',\n                        'vgg_16/fc7/weights',\n                        'vgg_16/fc7/biases',\n                        'vgg_16/fc8/weights',\n                        'vgg_16/fc8/biases',\n                       ]\n      model_variables = [v.op.name for v in slim.get_model_variables()]\n      self.assertSetEqual(set(model_variables), set(expected_names))\n\n  def testEvaluation(self):\n    batch_size = 2\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      eval_inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = vgg.vgg_16(eval_inputs, is_training=False)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n      predictions = tf.argmax(logits, 1)\n      self.assertListEqual(predictions.get_shape().as_list(), [batch_size])\n\n  def testTrainEvalWithReuse(self):\n    train_batch_size = 2\n    eval_batch_size = 1\n    train_height, train_width = 224, 224\n    eval_height, eval_width = 256, 256\n    num_classes = 1000\n    with self.test_session():\n      train_inputs = tf.random_uniform(\n          (train_batch_size, train_height, train_width, 3))\n      logits, _ = vgg.vgg_16(train_inputs)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [train_batch_size, num_classes])\n      tf.get_variable_scope().reuse_variables()\n      eval_inputs = tf.random_uniform(\n          (eval_batch_size, eval_height, eval_width, 3))\n      logits, _ = vgg.vgg_16(eval_inputs, is_training=False,\n                             spatial_squeeze=False)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [eval_batch_size, 2, 2, num_classes])\n      logits = tf.reduce_mean(logits, [1, 2])\n      predictions = tf.argmax(logits, 1)\n      self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])\n\n  def testForward(self):\n    batch_size = 1\n    height, width = 224, 224\n    with self.test_session() as sess:\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = vgg.vgg_16(inputs)\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(logits)\n      self.assertTrue(output.any())\n\n\nclass VGG19Test(tf.test.TestCase):\n\n  def testBuild(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = vgg.vgg_19(inputs, num_classes)\n      self.assertEquals(logits.op.name, 'vgg_19/fc8/squeezed')\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n\n  def testFullyConvolutional(self):\n    batch_size = 1\n    height, width = 256, 256\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = vgg.vgg_19(inputs, num_classes, spatial_squeeze=False)\n      self.assertEquals(logits.op.name, 'vgg_19/fc8/BiasAdd')\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, 2, 2, num_classes])\n\n  def testEndPoints(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      _, end_points = vgg.vgg_19(inputs, num_classes)\n      expected_names = [\n          'vgg_19/conv1/conv1_1',\n          'vgg_19/conv1/conv1_2',\n          'vgg_19/pool1',\n          'vgg_19/conv2/conv2_1',\n          'vgg_19/conv2/conv2_2',\n          'vgg_19/pool2',\n          'vgg_19/conv3/conv3_1',\n          'vgg_19/conv3/conv3_2',\n          'vgg_19/conv3/conv3_3',\n          'vgg_19/conv3/conv3_4',\n          'vgg_19/pool3',\n          'vgg_19/conv4/conv4_1',\n          'vgg_19/conv4/conv4_2',\n          'vgg_19/conv4/conv4_3',\n          'vgg_19/conv4/conv4_4',\n          'vgg_19/pool4',\n          'vgg_19/conv5/conv5_1',\n          'vgg_19/conv5/conv5_2',\n          'vgg_19/conv5/conv5_3',\n          'vgg_19/conv5/conv5_4',\n          'vgg_19/pool5',\n          'vgg_19/fc6',\n          'vgg_19/fc7',\n          'vgg_19/fc8'\n      ]\n      self.assertSetEqual(set(end_points.keys()), set(expected_names))\n\n  def testModelVariables(self):\n    batch_size = 5\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      vgg.vgg_19(inputs, num_classes)\n      expected_names = [\n          'vgg_19/conv1/conv1_1/weights',\n          'vgg_19/conv1/conv1_1/biases',\n          'vgg_19/conv1/conv1_2/weights',\n          'vgg_19/conv1/conv1_2/biases',\n          'vgg_19/conv2/conv2_1/weights',\n          'vgg_19/conv2/conv2_1/biases',\n          'vgg_19/conv2/conv2_2/weights',\n          'vgg_19/conv2/conv2_2/biases',\n          'vgg_19/conv3/conv3_1/weights',\n          'vgg_19/conv3/conv3_1/biases',\n          'vgg_19/conv3/conv3_2/weights',\n          'vgg_19/conv3/conv3_2/biases',\n          'vgg_19/conv3/conv3_3/weights',\n          'vgg_19/conv3/conv3_3/biases',\n          'vgg_19/conv3/conv3_4/weights',\n          'vgg_19/conv3/conv3_4/biases',\n          'vgg_19/conv4/conv4_1/weights',\n          'vgg_19/conv4/conv4_1/biases',\n          'vgg_19/conv4/conv4_2/weights',\n          'vgg_19/conv4/conv4_2/biases',\n          'vgg_19/conv4/conv4_3/weights',\n          'vgg_19/conv4/conv4_3/biases',\n          'vgg_19/conv4/conv4_4/weights',\n          'vgg_19/conv4/conv4_4/biases',\n          'vgg_19/conv5/conv5_1/weights',\n          'vgg_19/conv5/conv5_1/biases',\n          'vgg_19/conv5/conv5_2/weights',\n          'vgg_19/conv5/conv5_2/biases',\n          'vgg_19/conv5/conv5_3/weights',\n          'vgg_19/conv5/conv5_3/biases',\n          'vgg_19/conv5/conv5_4/weights',\n          'vgg_19/conv5/conv5_4/biases',\n          'vgg_19/fc6/weights',\n          'vgg_19/fc6/biases',\n          'vgg_19/fc7/weights',\n          'vgg_19/fc7/biases',\n          'vgg_19/fc8/weights',\n          'vgg_19/fc8/biases',\n      ]\n      model_variables = [v.op.name for v in slim.get_model_variables()]\n      self.assertSetEqual(set(model_variables), set(expected_names))\n\n  def testEvaluation(self):\n    batch_size = 2\n    height, width = 224, 224\n    num_classes = 1000\n    with self.test_session():\n      eval_inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = vgg.vgg_19(eval_inputs, is_training=False)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [batch_size, num_classes])\n      predictions = tf.argmax(logits, 1)\n      self.assertListEqual(predictions.get_shape().as_list(), [batch_size])\n\n  def testTrainEvalWithReuse(self):\n    train_batch_size = 2\n    eval_batch_size = 1\n    train_height, train_width = 224, 224\n    eval_height, eval_width = 256, 256\n    num_classes = 1000\n    with self.test_session():\n      train_inputs = tf.random_uniform(\n          (train_batch_size, train_height, train_width, 3))\n      logits, _ = vgg.vgg_19(train_inputs)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [train_batch_size, num_classes])\n      tf.get_variable_scope().reuse_variables()\n      eval_inputs = tf.random_uniform(\n          (eval_batch_size, eval_height, eval_width, 3))\n      logits, _ = vgg.vgg_19(eval_inputs, is_training=False,\n                             spatial_squeeze=False)\n      self.assertListEqual(logits.get_shape().as_list(),\n                           [eval_batch_size, 2, 2, num_classes])\n      logits = tf.reduce_mean(logits, [1, 2])\n      predictions = tf.argmax(logits, 1)\n      self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])\n\n  def testForward(self):\n    batch_size = 1\n    height, width = 224, 224\n    with self.test_session() as sess:\n      inputs = tf.random_uniform((batch_size, height, width, 3))\n      logits, _ = vgg.vgg_19(inputs)\n      sess.run(tf.global_variables_initializer())\n      output = sess.run(logits)\n      self.assertTrue(output.any())\n\nif __name__ == '__main__':\n  tf.test.main()\n"
  },
  {
    "path": "models/slim/preprocessing/__init__.py",
    "content": "\n"
  },
  {
    "path": "models/slim/preprocessing/cifarnet_preprocessing.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Provides utilities to preprocess images in CIFAR-10.\n\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\n_PADDING = 4\n\nslim = tf.contrib.slim\n\n\ndef preprocess_for_train(image,\n                         output_height,\n                         output_width,\n                         padding=_PADDING):\n  \"\"\"Preprocesses the given image for training.\n\n  Note that the actual resizing scale is sampled from\n    [`resize_size_min`, `resize_size_max`].\n\n  Args:\n    image: A `Tensor` representing an image of arbitrary size.\n    output_height: The height of the image after preprocessing.\n    output_width: The width of the image after preprocessing.\n    padding: The amound of padding before and after each dimension of the image.\n\n  Returns:\n    A preprocessed image.\n  \"\"\"\n  tf.image_summary('image', tf.expand_dims(image, 0))\n\n  # Transform the image to floats.\n  image = tf.to_float(image)\n  if padding > 0:\n    image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])\n  # Randomly crop a [height, width] section of the image.\n  distorted_image = tf.random_crop(image,\n                                   [output_height, output_width, 3])\n\n  # Randomly flip the image horizontally.\n  distorted_image = tf.image.random_flip_left_right(distorted_image)\n\n  tf.image_summary('distorted_image', tf.expand_dims(distorted_image, 0))\n\n  # Because these operations are not commutative, consider randomizing\n  # the order their operation.\n  distorted_image = tf.image.random_brightness(distorted_image,\n                                               max_delta=63)\n  distorted_image = tf.image.random_contrast(distorted_image,\n                                             lower=0.2, upper=1.8)\n  # Subtract off the mean and divide by the variance of the pixels.\n  return tf.image.per_image_whitening(distorted_image)\n\n\ndef preprocess_for_eval(image, output_height, output_width):\n  \"\"\"Preprocesses the given image for evaluation.\n\n  Args:\n    image: A `Tensor` representing an image of arbitrary size.\n    output_height: The height of the image after preprocessing.\n    output_width: The width of the image after preprocessing.\n\n  Returns:\n    A preprocessed image.\n  \"\"\"\n  tf.image_summary('image', tf.expand_dims(image, 0))\n  # Transform the image to floats.\n  image = tf.to_float(image)\n\n  # Resize and crop if needed.\n  resized_image = tf.image.resize_image_with_crop_or_pad(image,\n                                                         output_width,\n                                                         output_height)\n  tf.image_summary('resized_image', tf.expand_dims(resized_image, 0))\n\n  # Subtract off the mean and divide by the variance of the pixels.\n  return tf.image.per_image_whitening(resized_image)\n\n\ndef preprocess_image(image, output_height, output_width, is_training=False):\n  \"\"\"Preprocesses the given image.\n\n  Args:\n    image: A `Tensor` representing an image of arbitrary size.\n    output_height: The height of the image after preprocessing.\n    output_width: The width of the image after preprocessing.\n    is_training: `True` if we're preprocessing the image for training and\n      `False` otherwise.\n\n  Returns:\n    A preprocessed image.\n  \"\"\"\n  if is_training:\n    return preprocess_for_train(image, output_height, output_width)\n  else:\n    return preprocess_for_eval(image, output_height, output_width)\n"
  },
  {
    "path": "models/slim/preprocessing/inception_preprocessing.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Provides utilities to preprocess images for the Inception networks.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom tensorflow.python.ops import control_flow_ops\n\n\ndef apply_with_random_selector(x, func, num_cases):\n  \"\"\"Computes func(x, sel), with sel sampled from [0...num_cases-1].\n\n  Args:\n    x: input Tensor.\n    func: Python function to apply.\n    num_cases: Python int32, number of cases to sample sel from.\n\n  Returns:\n    The result of func(x, sel), where func receives the value of the\n    selector as a python integer, but sel is sampled dynamically.\n  \"\"\"\n  sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)\n  # Pass the real x only to one of the func calls.\n  return control_flow_ops.merge([\n      func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)\n      for case in range(num_cases)])[0]\n\n\ndef distort_color(image, color_ordering=0, fast_mode=True, scope=None):\n  \"\"\"Distort the color of a Tensor image.\n\n  Each color distortion is non-commutative and thus ordering of the color ops\n  matters. Ideally we would randomly permute the ordering of the color ops.\n  Rather then adding that level of complication, we select a distinct ordering\n  of color ops for each preprocessing thread.\n\n  Args:\n    image: 3-D Tensor containing single image in [0, 1].\n    color_ordering: Python int, a type of distortion (valid values: 0-3).\n    fast_mode: Avoids slower ops (random_hue and random_contrast)\n    scope: Optional scope for name_scope.\n  Returns:\n    3-D Tensor color-distorted image on range [0, 1]\n  Raises:\n    ValueError: if color_ordering not in [0, 3]\n  \"\"\"\n  with tf.name_scope(scope, 'distort_color', [image]):\n    if fast_mode:\n      if color_ordering == 0:\n        image = tf.image.random_brightness(image, max_delta=32. / 255.)\n        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)\n      else:\n        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)\n        image = tf.image.random_brightness(image, max_delta=32. / 255.)\n    else:\n      if color_ordering == 0:\n        image = tf.image.random_brightness(image, max_delta=32. / 255.)\n        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)\n        image = tf.image.random_hue(image, max_delta=0.2)\n        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)\n      elif color_ordering == 1:\n        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)\n        image = tf.image.random_brightness(image, max_delta=32. / 255.)\n        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)\n        image = tf.image.random_hue(image, max_delta=0.2)\n      elif color_ordering == 2:\n        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)\n        image = tf.image.random_hue(image, max_delta=0.2)\n        image = tf.image.random_brightness(image, max_delta=32. / 255.)\n        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)\n      elif color_ordering == 3:\n        image = tf.image.random_hue(image, max_delta=0.2)\n        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)\n        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)\n        image = tf.image.random_brightness(image, max_delta=32. / 255.)\n      else:\n        raise ValueError('color_ordering must be in [0, 3]')\n\n    # The random_* ops do not necessarily clamp.\n    return tf.clip_by_value(image, 0.0, 1.0)\n\n\ndef distorted_bounding_box_crop(image,\n                                bbox,\n                                min_object_covered=0.1,\n                                aspect_ratio_range=(0.75, 1.33),\n                                # area_range=(0.05, 1.0),\n                                area_range=(0.85, 1.0),\n                                max_attempts=100,\n                                scope=None):\n  \"\"\"Generates cropped_image using a one of the bboxes randomly distorted.\n\n  See `tf.image.sample_distorted_bounding_box` for more documentation.\n\n  Args:\n    image: 3-D Tensor of image (it will be converted to floats in [0, 1]).\n    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]\n      where each coordinate is [0, 1) and the coordinates are arranged\n      as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole\n      image.\n    min_object_covered: An optional `float`. Defaults to `0.1`. The cropped\n      area of the image must contain at least this fraction of any bounding box\n      supplied.\n    aspect_ratio_range: An optional list of `floats`. The cropped area of the\n      image must have an aspect ratio = width / height within this range.\n    area_range: An optional list of `floats`. The cropped area of the image\n      must contain a fraction of the supplied image within in this range.\n    max_attempts: An optional `int`. Number of attempts at generating a cropped\n      region of the image of the specified constraints. After `max_attempts`\n      failures, return the entire image.\n    scope: Optional scope for name_scope.\n  Returns:\n    A tuple, a 3-D Tensor cropped_image and the distorted bbox\n  \"\"\"\n  with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]):\n    # Each bounding box has shape [1, num_boxes, box coords] and\n    # the coordinates are ordered [ymin, xmin, ymax, xmax].\n\n    # A large fraction of image datasets contain a human-annotated bounding\n    # box delineating the region of the image containing the object of interest.\n    # We choose to create a new bounding box for the object which is a randomly\n    # distorted version of the human-annotated bounding box that obeys an\n    # allowed range of aspect ratios, sizes and overlap with the human-annotated\n    # bounding box. If no box is supplied, then we assume the bounding box is\n    # the entire image.\n    sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(\n        tf.shape(image),\n        bounding_boxes=bbox,\n        min_object_covered=min_object_covered,\n        aspect_ratio_range=aspect_ratio_range,\n        area_range=area_range,\n        max_attempts=max_attempts,\n        use_image_if_no_bounding_boxes=True)\n    bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box\n\n    # Crop the image to the specified bounding box.\n    cropped_image = tf.slice(image, bbox_begin, bbox_size)\n    return cropped_image, distort_bbox\n\n\ndef preprocess_for_train(image, height, width, bbox,\n                         fast_mode=True,\n                         scope=None):\n  \"\"\"Distort one image for training a network.\n\n  Distorting images provides a useful technique for augmenting the data\n  set during training in order to make the network invariant to aspects\n  of the image that do not effect the label.\n\n  Additionally it would create image_summaries to display the different\n  transformations applied to the image.\n\n  Args:\n    image: 3-D Tensor of image. If dtype is tf.float32 then the range should be\n      [0, 1], otherwise it would converted to tf.float32 assuming that the range\n      is [0, MAX], where MAX is largest positive representable number for\n      int(8/16/32) data type (see `tf.image.convert_image_dtype` for details).\n    height: integer\n    width: integer\n    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]\n      where each coordinate is [0, 1) and the coordinates are arranged\n      as [ymin, xmin, ymax, xmax].\n    fast_mode: Optional boolean, if True avoids slower transformations (i.e.\n      bi-cubic resizing, random_hue or random_contrast).\n    scope: Optional scope for name_scope.\n  Returns:\n    3-D float Tensor of distorted image used for training with range [-1, 1].\n  \"\"\"\n  with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):\n    if bbox is None:\n      bbox = tf.constant([0.0, 0.0, 1.0, 1.0],\n                         dtype=tf.float32,\n                         shape=[1, 1, 4])\n    if image.dtype != tf.float32:\n      image = tf.image.convert_image_dtype(image, dtype=tf.float32)\n    # Each bounding box has shape [1, num_boxes, box coords] and\n    # the coordinates are ordered [ymin, xmin, ymax, xmax].\n    # image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),\n    #                                               bbox)\n    # tf.summary.image('image_with_bounding_boxes', image_with_box)\n    \n    image_channels = image.get_shape().as_list()[-1]\n    distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox)\n    # Restore the shape since the dynamic slice based upon the bbox_size loses\n    # the third dimension.\n    distorted_image.set_shape([None, None, image_channels])\n    # image_with_distorted_box = tf.image.draw_bounding_boxes(\n    #     tf.expand_dims(image, 0), distorted_bbox)\n    # tf.summary.image('images_with_distorted_bounding_box',\n    #                  image_with_distorted_box)\n\n    # This resizing operation may distort the images because the aspect\n    # ratio is not respected. We select a resize method in a round robin\n    # fashion based on the thread number.\n    # Note that ResizeMethod contains 4 enumerated resizing methods.\n\n    # We select only 1 case for fast_mode bilinear.\n    num_resize_cases = 1 if fast_mode else 4\n    distorted_image = apply_with_random_selector(\n        distorted_image,\n        lambda x, method: tf.image.resize_images(x, [height, width], method=method),\n        num_cases=num_resize_cases)\n\n    # tf.summary.image('cropped_resized_image',\n    #                  tf.expand_dims(distorted_image, 0))\n\n    # Randomly flip the image horizontally.\n    distorted_image = tf.image.random_flip_left_right(distorted_image)\n\n    # Randomly distort the colors. There are 4 ways to do it.\n    # rgirdhar: Stop distorting colors\n    # distorted_image = apply_with_random_selector(\n    #     distorted_image,\n    #     lambda x, ordering: distort_color(x, ordering, fast_mode),\n    #     num_cases=4)\n\n    # tf.summary.image('final_distorted_image',\n    #                  tf.expand_dims(distorted_image, 0))\n    distorted_image -= 0.5\n    distorted_image *= 2.0\n    return distorted_image\n\n\ndef preprocess_for_eval(image, height, width,\n                        central_fraction=0.875, scope=None):\n  \"\"\"Prepare one image for evaluation.\n\n  If height and width are specified it would output an image with that size by\n  applying resize_bilinear.\n\n  If central_fraction is specified it would cropt the central fraction of the\n  input image.\n\n  Args:\n    image: 3-D Tensor of image. If dtype is tf.float32 then the range should be\n      [0, 1], otherwise it would converted to tf.float32 assuming that the range\n      is [0, MAX], where MAX is largest positive representable number for\n      int(8/16/32) data type (see `tf.image.convert_image_dtype` for details)\n    height: integer\n    width: integer\n    central_fraction: Optional Float, fraction of the image to crop.\n    scope: Optional scope for name_scope.\n  Returns:\n    3-D float Tensor of prepared image.\n  \"\"\"\n  with tf.name_scope(scope, 'eval_image', [image, height, width]):\n    if image.dtype != tf.float32:\n      image = tf.image.convert_image_dtype(image, dtype=tf.float32)\n    # Crop the central region of the image with an area containing 87.5% of\n    # the original image.\n    if central_fraction:\n      image = tf.image.central_crop(image, central_fraction=central_fraction)\n\n    if height and width:\n      # Resize the image to the specified height and width.\n      image = tf.expand_dims(image, 0)\n      image = tf.image.resize_bilinear(image, [height, width],\n                                       align_corners=False)\n      image = tf.squeeze(image, [0])\n    image -= 0.5\n    image *= 2.0\n    return image\n\n\ndef preprocess_image(image, height, width,\n                     is_training=False,\n                     resize_side_min=None,  # this and next are only cos VGG\n                                            # uses these. No effect here.\n                     resize_side_max=None,\n                     bbox=None,\n                     fast_mode=True):\n  \"\"\"Pre-process one image for training or evaluation.\n\n  Args:\n    image: 3-D Tensor [height, width, channels] with the image.\n    height: integer, image expected height.\n    width: integer, image expected width.\n    is_training: Boolean. If true it would transform an image for train,\n      otherwise it would transform it for evaluation.\n    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]\n      where each coordinate is [0, 1) and the coordinates are arranged as\n      [ymin, xmin, ymax, xmax].\n    fast_mode: Optional boolean, if True avoids slower transformations.\n\n  Returns:\n    3-D float Tensor containing an appropriately scaled image\n\n  Raises:\n    ValueError: if user does not provide bounding box\n  \"\"\"\n  if is_training:\n    return preprocess_for_train(image, height, width, bbox, fast_mode)\n  else:\n    return preprocess_for_eval(image, height, width)\n"
  },
  {
    "path": "models/slim/preprocessing/lenet_preprocessing.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Provides utilities for preprocessing.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nslim = tf.contrib.slim\n\n\ndef preprocess_image(image, output_height, output_width, is_training):\n  \"\"\"Preprocesses the given image.\n\n  Args:\n    image: A `Tensor` representing an image of arbitrary size.\n    output_height: The height of the image after preprocessing.\n    output_width: The width of the image after preprocessing.\n    is_training: `True` if we're preprocessing the image for training and\n      `False` otherwise.\n\n  Returns:\n    A preprocessed image.\n  \"\"\"\n  image = tf.to_float(image)\n  image = tf.image.resize_image_with_crop_or_pad(\n      image, output_width, output_height)\n  image = tf.sub(image, 128.0)\n  image = tf.div(image, 128.0)\n  return image\n"
  },
  {
    "path": "models/slim/preprocessing/preprocessing_factory.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains a factory for building various models.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\n\nfrom preprocessing import cifarnet_preprocessing\nfrom preprocessing import inception_preprocessing\nfrom preprocessing import lenet_preprocessing\nfrom preprocessing import vgg_preprocessing\n\nslim = tf.contrib.slim\n\n\ndef get_preprocessing(name, is_training=False):\n  \"\"\"Returns preprocessing_fn(image, height, width, **kwargs).\n\n  Args:\n    name: The name of the preprocessing function.\n    is_training: `True` if the model is being used for training and `False`\n      otherwise.\n\n  Returns:\n    preprocessing_fn: A function that preprocessing a single image (pre-batch).\n      It has the following signature:\n        image = preprocessing_fn(image, output_height, output_width, ...).\n\n  Raises:\n    ValueError: If Preprocessing `name` is not recognized.\n  \"\"\"\n  preprocessing_fn_map = {\n      'cifarnet': cifarnet_preprocessing,\n      'inception': inception_preprocessing,\n      'inception_v1': inception_preprocessing,\n      'inception_v2': inception_preprocessing,\n      'inception_v2_tsn': vgg_preprocessing,  # Its wts are copied from caffe\n      'inception_v3': inception_preprocessing,\n      'inception_v4': inception_preprocessing,\n      'inception_resnet_v2': inception_preprocessing,\n      'lenet': lenet_preprocessing,\n      'resnet_v1_50': vgg_preprocessing,\n      'resnet_v1_101': vgg_preprocessing,\n      'resnet_v1_152': vgg_preprocessing,\n      'vgg': vgg_preprocessing,\n      'vgg_a': vgg_preprocessing,\n      'vgg_16': vgg_preprocessing,\n      'vgg_19': vgg_preprocessing,\n  }\n\n  if name not in preprocessing_fn_map:\n    raise ValueError('Preprocessing name [%s] was not recognized' % name)\n\n  def preprocessing_fn(image, output_height, output_width, **kwargs):\n    # preprocess 4D images (with [frames_per_vid, ht, wd, c])\n    expanded_dim = False\n    if image.get_shape().ndims == 3:\n      expanded_dim = True\n      image = tf.expand_dims(image, 0)\n    res = tf.stack([preprocessing_fn_map[name].preprocess_image(\n      el, output_height, output_width, is_training=is_training, **kwargs)\n      for el in tf.unstack(image)])\n    if expanded_dim:\n      res = res[0]\n    return res\n\n  return preprocessing_fn\n"
  },
  {
    "path": "models/slim/preprocessing/vgg_preprocessing.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Provides utilities to preprocess images.\n\nThe preprocessing steps for VGG were introduced in the following technical\nreport:\n\n  Very Deep Convolutional Networks For Large-Scale Image Recognition\n  Karen Simonyan and Andrew Zisserman\n  arXiv technical report, 2015\n  PDF: http://arxiv.org/pdf/1409.1556.pdf\n  ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf\n  CC-BY-4.0\n\nMore information can be obtained from the VGG website:\nwww.robots.ox.ac.uk/~vgg/research/very_deep/\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport tensorflow as tf\nimport numpy as np\n\nfrom tensorflow.python.ops import control_flow_ops\n\nslim = tf.contrib.slim\n\n# _R_MEAN = 123.68\n# _G_MEAN = 116.78\n# _B_MEAN = 103.94\n_MEAN = 128.0  # rgirdhar: changing to this for easier handling of label channel\n\n_RESIZE_SIDE_MIN = 512\n# _RESIZE_SIDE_MAX = 512\n_RESIZE_SIDE_MAX = 512  # for pose, I don't want to loose too much\n\n\ndef _crop(image, offset_height, offset_width, crop_height, crop_width):\n  \"\"\"Crops the given image using the provided offsets and sizes.\n\n  Note that the method doesn't assume we know the input image size but it does\n  assume we know the input image rank.\n\n  Args:\n    image: an image of shape [height, width, channels].\n    offset_height: a scalar tensor indicating the height offset.\n    offset_width: a scalar tensor indicating the width offset.\n    crop_height: the height of the cropped image.\n    crop_width: the width of the cropped image.\n\n  Returns:\n    the cropped (and resized) image.\n\n  Raises:\n    InvalidArgumentError: if the rank is not 3 or if the image dimensions are\n      less than the crop size.\n  \"\"\"\n  original_shape = tf.shape(image)\n\n  rank_assertion = tf.Assert(\n      tf.equal(tf.rank(image), 3),\n      ['Rank of image must be equal to 3.'])\n  cropped_shape = control_flow_ops.with_dependencies(\n      [rank_assertion],\n      tf.stack([crop_height, crop_width, original_shape[2]]))\n\n  size_assertion = tf.Assert(\n      tf.logical_and(\n          tf.greater_equal(original_shape[0], crop_height),\n          tf.greater_equal(original_shape[1], crop_width)),\n      ['Crop size greater than the image size.'])\n\n  offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))\n\n  # Use tf.slice instead of crop_to_bounding box as it accepts tensors to\n  # define the crop size.\n  image = control_flow_ops.with_dependencies(\n      [size_assertion],\n      tf.slice(image, offsets, cropped_shape))\n  return tf.reshape(image, cropped_shape)\n\n\ndef _random_crop(image_list, crop_height, crop_width, preproc_info):\n  \"\"\"Crops the given list of images.\n\n  The function applies the same crop to each image in the list. This can be\n  effectively applied when there are multiple image inputs of the same\n  dimension such as:\n\n    image, depths, normals = _random_crop([image, depths, normals], 120, 150)\n\n  Args:\n    image_list: a list of image tensors of the same dimension but possibly\n      varying channel.\n    crop_height: the new height.\n    crop_width: the new width.\n\n  Returns:\n    the image_list with cropped images.\n\n  Raises:\n    ValueError: if there are multiple image inputs provided with different size\n      or the images are smaller than the crop dimensions.\n  \"\"\"\n  if not image_list:\n    raise ValueError('Empty image_list.')\n\n  # Compute the rank assertions.\n  rank_assertions = []\n  for i in range(len(image_list)):\n    image_rank = tf.rank(image_list[i])\n    rank_assert = tf.Assert(\n        tf.equal(image_rank, 3),\n        ['Wrong rank for tensor  %s [expected] [actual]',\n         image_list[i].name, 3, image_rank])\n    rank_assertions.append(rank_assert)\n\n  image_shape = control_flow_ops.with_dependencies(\n      [rank_assertions[0]],\n      tf.shape(image_list[0]))\n  image_height = image_shape[0]\n  image_width = image_shape[1]\n  crop_size_assert = tf.Assert(\n      tf.logical_and(\n          tf.greater_equal(image_height, crop_height),\n          tf.greater_equal(image_width, crop_width)),\n      ['Crop size greater than the image size.'])\n\n  asserts = [rank_assertions[0], crop_size_assert]\n\n  for i in range(1, len(image_list)):\n    image = image_list[i]\n    asserts.append(rank_assertions[i])\n    shape = control_flow_ops.with_dependencies([rank_assertions[i]],\n                                               tf.shape(image))\n    height = shape[0]\n    width = shape[1]\n\n    height_assert = tf.Assert(\n        tf.equal(height, image_height),\n        ['Wrong height for tensor %s [expected][actual]',\n         image.name, height, image_height])\n    width_assert = tf.Assert(\n        tf.equal(width, image_width),\n        ['Wrong width for tensor %s [expected][actual]',\n         image.name, width, image_width])\n    asserts.extend([height_assert, width_assert])\n\n  # Create a random bounding box.\n  #\n  # Use tf.random_uniform and not numpy.random.rand as doing the former would\n  # generate random numbers at graph eval time, unlike the latter which\n  # generates random numbers at graph definition time.\n  max_offset_height = control_flow_ops.with_dependencies(\n      asserts, tf.reshape(image_height - crop_height + 1, []))\n  max_offset_width = control_flow_ops.with_dependencies(\n      asserts, tf.reshape(image_width - crop_width + 1, []))\n  offset_height = tf.random_uniform(\n      [], maxval=max_offset_height, dtype=tf.int32)\n  offset_width = tf.random_uniform(\n      [], maxval=max_offset_width, dtype=tf.int32)\n\n  preproc_info['crop_info'] = [\n    offset_height, offset_width, crop_height, crop_width]\n  return [_crop(image, offset_height, offset_width,\n                crop_height, crop_width) for image in image_list]\n\n\ndef _central_crop(image_list, crop_height, crop_width):\n  \"\"\"Performs central crops of the given image list.\n\n  Args:\n    image_list: a list of image tensors of the same dimension but possibly\n      varying channel.\n    crop_height: the height of the image following the crop.\n    crop_width: the width of the image following the crop.\n\n  Returns:\n    the list of cropped images.\n  \"\"\"\n  outputs = []\n  for image in image_list:\n    image_height = tf.shape(image)[0]\n    image_width = tf.shape(image)[1]\n\n    offset_height = (image_height - crop_height) / 2\n    offset_width = (image_width - crop_width) / 2\n\n    outputs.append(_crop(image, offset_height, offset_width,\n                         crop_height, crop_width))\n  return outputs\n\n\ndef _mean_image_subtraction(image, means):\n  \"\"\"Subtracts the given means from each image channel.\n\n  For example:\n    means = [123.68, 116.779, 103.939]\n    image = _mean_image_subtraction(image, means)\n\n  Note that the rank of `image` must be known.\n\n  Args:\n    image: a tensor of size [height, width, C].\n    means: a C-vector of values to subtract from each channel.\n\n  Returns:\n    the centered image.\n\n  Raises:\n    ValueError: If the rank of `image` is unknown, if `image` has a rank other\n      than three or if the number of channels in `image` doesn't match the\n      number of values in `means`.\n  \"\"\"\n  # if image.get_shape().ndims != 3:\n  #   raise ValueError('Input must be of size [height, width, C>0]')\n  num_channels = image.get_shape().as_list()[-1]\n  if len(means) != num_channels:\n    raise ValueError('len(means) must match the number of channels')\n\n  channels = tf.split(image, num_channels, 2)\n  for i in range(num_channels):\n    channels[i] -= means[i]\n  return tf.concat(channels, 2)\n\n\ndef _smallest_size_at_least(height, width, smallest_side):\n  \"\"\"Computes new shape with the smallest side equal to `smallest_side`.\n\n  Computes new shape with the smallest side equal to `smallest_side` while\n  preserving the original aspect ratio.\n\n  Args:\n    height: an int32 scalar tensor indicating the current height.\n    width: an int32 scalar tensor indicating the current width.\n    smallest_side: A python integer or scalar `Tensor` indicating the size of\n      the smallest side after resize.\n\n  Returns:\n    new_height: an int32 scalar tensor indicating the new height.\n    new_width: and int32 scalar tensor indicating the new width.\n  \"\"\"\n  smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)\n\n  height = tf.to_float(height)\n  width = tf.to_float(width)\n  smallest_side = tf.to_float(smallest_side)\n\n  scale = tf.cond(tf.greater(height, width),\n                  lambda: smallest_side / width,\n                  lambda: smallest_side / height)\n  new_height = tf.to_int32(height * scale)\n  new_width = tf.to_int32(width * scale)\n  return new_height, new_width\n\n\ndef _aspect_preserving_resize(image, smallest_side):\n  \"\"\"Resize images preserving the original aspect ratio.\n\n  Args:\n    image: A 3-D image `Tensor`.\n    smallest_side: A python integer or scalar `Tensor` indicating the size of\n      the smallest side after resize.\n\n  Returns:\n    resized_image: A 3-D tensor containing the resized image.\n  \"\"\"\n  num_channels = image.get_shape().as_list()[-1]\n  smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)\n\n  shape = tf.shape(image)\n  height = shape[0]\n  width = shape[1]\n  new_height, new_width = _smallest_size_at_least(height, width, smallest_side)\n  image = tf.expand_dims(image, 0)\n  resized_image = tf.image.resize_bilinear(image, [new_height, new_width],\n                                           align_corners=False)\n  resized_image = tf.squeeze(resized_image)\n  resized_image.set_shape([None, None, num_channels])\n  return resized_image\n\n\ndef preprocess_for_train(image,\n                         output_height,\n                         output_width,\n                         resize_side_min=_RESIZE_SIDE_MIN,\n                         resize_side_max=_RESIZE_SIDE_MAX,\n                         preproc_info={}, modality='rgb'):\n  \"\"\"Preprocesses the given image for training.\n\n  Note that the actual resizing scale is sampled from\n    [`resize_size_min`, `resize_size_max`].\n\n  Args:\n    image: A `Tensor` representing an image of arbitrary size.\n    output_height: The height of the image after preprocessing.\n    output_width: The width of the image after preprocessing.\n    resize_side_min: The lower bound for the smallest side of the image for\n      aspect-preserving resizing.\n    resize_side_max: The upper bound for the smallest side of the image for\n      aspect-preserving resizing.\n\n  Returns:\n    A preprocessed image.\n  \"\"\"\n  num_channels = image.get_shape().as_list()[-1]\n  resize_side = tf.random_uniform(\n      [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32)\n\n  image = _aspect_preserving_resize(image, resize_side)\n  preproc_info['image_shape'] = tf.shape(image)\n  image = _random_crop([image], output_height, output_width, preproc_info)[0]\n  image.set_shape([output_height, output_width, num_channels])\n  image = tf.to_float(image)\n  image, whether_flip = tf.cond(\n    tf.greater(tf.random_uniform((), 0, 1, tf.float32), 0.5),\n    lambda: tf.tuple([tf.image.flip_left_right(image), tf.constant(True)]),\n    lambda: tf.tuple([image, tf.constant(False)]))\n  if modality.startswith('flow'):\n    tf.logging.info('Subtracting 255-x from X-flow for flips. Flow input.')\n    assert(num_channels % 2 == 0)\n    flow_img_flip = image\n    alt_mat = np.ones([\n      image.get_shape().as_list()[-3],\n      image.get_shape().as_list()[-2],\n      num_channels])\n    alt_mat[..., np.arange(0, num_channels, 2)] *= -1\n    IMG_SCALER = 256.0\n    flow_img_flip = (flow_img_flip - IMG_SCALER/2) * alt_mat + IMG_SCALER/2\n    image = tf.cond(\n      whether_flip,\n      lambda: flow_img_flip,\n      lambda: image)\n  preproc_info['whether_flip'] = whether_flip\n  # tf.image.random_flip_left_right(image)\n\n  # return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])\n  return _mean_image_subtraction(image, [_MEAN] * num_channels)\n\n\ndef preprocess_for_eval(image, output_height, output_width, resize_side):\n  \"\"\"Preprocesses the given image for evaluation.\n\n  Args:\n    image: A `Tensor` representing an image of arbitrary size.\n    output_height: The height of the image after preprocessing.\n    output_width: The width of the image after preprocessing.\n    resize_side: The smallest side of the image for aspect-preserving resizing.\n\n  Returns:\n    A preprocessed image.\n  \"\"\"\n  num_channels = image.get_shape().as_list()[-1]\n  image = _aspect_preserving_resize(image, resize_side)\n  image = _central_crop([image], output_height, output_width)[0]\n  image.set_shape([output_height, output_width, num_channels])\n  image = tf.to_float(image)\n  return _mean_image_subtraction(image, [_MEAN] * num_channels)\n  # return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])\n\n\ndef preprocess_image(image, output_height, output_width, is_training=False,\n                     resize_side_min=_RESIZE_SIDE_MIN,\n                     resize_side_max=_RESIZE_SIDE_MAX,\n                     preproc_info={}, modality='rgb'):\n  \"\"\"Preprocesses the given image.\n\n  Args:\n    image: A `Tensor` representing an image of arbitrary size.\n    output_height: The height of the image after preprocessing.\n    output_width: The width of the image after preprocessing.\n    is_training: `True` if we're preprocessing the image for training and\n      `False` otherwise.\n    resize_side_min: The lower bound for the smallest side of the image for\n      aspect-preserving resizing. If `is_training` is `False`, then this value\n      is used for rescaling.\n    resize_side_max: The upper bound for the smallest side of the image for\n      aspect-preserving resizing. If `is_training` is `False`, this value is\n      ignored. Otherwise, the resize side is sampled from\n        [resize_size_min, resize_size_max].\n    preproc_info: will return all the information for the preprocessing,\n      including sizes, positions, flip or not etc, which can then be replayed\n      onto other images (specifically to be used for target heatmaps).\n      It should contain:\n        - whether_flip: Bool tensor: whether the image was flipped or not\n        - image_shape: [3,1] tensor: size of the original image after resize\n        - crop_info: [offset_ht, offset_wd, crop_ht, crop_wd]\n\n  Returns:\n    A preprocessed image.\n  \"\"\"\n  if is_training:\n    return preprocess_for_train(image, output_height, output_width,\n                                resize_side_min, resize_side_max,\n                                preproc_info, modality)\n  else:\n    return preprocess_for_eval(image, output_height, output_width,\n                               resize_side_min)\n"
  },
  {
    "path": "src/config.py",
    "content": "\"\"\"Config System\n\"\"\"\n\nimport os\nimport os.path as osp\nimport numpy as np\nfrom easydict import EasyDict as edict\n\n__C = edict()\n# Consumers can get config by:\n#   from fast_rcnn_config import cfg\ncfg = __C\n\n\n#\n# Input options\n#\n\n\n__C.INPUT = edict()\n\n# normal: normal image\n# rendered-pose: rendered pose on black bg\n# rendered-pose-on-image: rendered onto the image\n__C.INPUT.INPUT_IMAGE_FORMAT = 'normal'\n\n# pose renders can be 'rgb' or 'split-channel'\n__C.INPUT.INPUT_IMAGE_FORMAT_POSE_RENDER_TYPE = 'rgb'\n\n# input glimpse options\n__C.INPUT.POSE_GLIMPSE_CONTEXT_RATIO = 0.0  # ratio of glimpse area to pad around\n# set the following to true to resize the output to [IMAGE_SIZE, IMAGE_SIZE]\n# square\n__C.INPUT.POSE_GLIMPSE_RESIZE = False\n# list part sof the pose to keep in glimpse. Empty => all parts to keep\n__C.INPUT.POSE_GLIMPSE_PARTS_KEEP = []\n\n\n__C.INPUT.SPLIT_ID = 1  # for dataset with multiple splits (hmdb)\n\n# FOR VIDEO\n__C.INPUT.VIDEO = edict()\n__C.INPUT.VIDEO.MODALITY = 'rgb'  # rgb/flow5/flow10 etc\n\n#\n# Training options\n#\n\n__C.TRAIN = edict()\n\n# Minibatch size\n__C.TRAIN.BATCH_SIZE = 10\n\n__C.TRAIN.WEIGHT_DECAY = 0.0005\n\n# set to a positive value to clip the gradients at that l2 norm\n__C.TRAIN.CLIP_GRADIENTS = -1.0\n\n# the following should have been in the INPUT, but are here for historical\n# reasons\n__C.TRAIN.IMAGE_SIZE = 450  # final cropped image size\n__C.TRAIN.RESIZE_SIDE = 480  # resize the input image to this size for preproc\n## The RESIZE_SIDE is the size for the smallest side, so be careful,\n## MPII has images with extreme ratios\n## Note that if the difference RESIZE_SIDE to IMAGE_SIZE is too high,\n## most of the image being fed into the network will be small parts of the\n## image\n\n# This is the side of the heatmap before putting into queues\n# Ideally, resize it to the final target size so that there is no\n# need for a resize before computing loss. For inception-v2 with 450 input, the\n# output is 15x15\n__C.TRAIN.FINAL_POSE_HMAP_SIDE = 15\n\n__C.TRAIN.LABEL_SMOOTHING = False\n\n__C.TRAIN.MOVING_AVERAGE_VARIABLES = None\n\n__C.TRAIN.LEARNING_RATE = 0.01\n__C.TRAIN.LEARNING_RATE_DECAY_RATE = 0.33\n__C.TRAIN.END_LEARNING_RATE = 0.00001\n\n__C.TRAIN.NUM_STEPS_PER_DECAY = 0  # if this is not 0, the NUM_EPOCHS_PER_DECAY\n                                   # is ignored and this is used\n__C.TRAIN.NUM_EPOCHS_PER_DECAY = 40.0\n\n__C.TRAIN.LEARNING_RATE_DECAY_TYPE = 'exponential'\n\n\n__C.TRAIN.OPTIMIZER = 'momentum'\n__C.TRAIN.MOMENTUM = 0.9\n__C.TRAIN.ADAM_BETA1 = 0.9\n__C.TRAIN.ADAM_BETA2 = 0.999\n__C.TRAIN.OPT_EPSILON = 1.0\n\n__C.TRAIN.TRAINABLE_SCOPES = ''\n\n__C.TRAIN.MAX_NUMBER_OF_STEPS = 100000\n\n__C.TRAIN.LOG_EVERY_N_STEPS = 10\n\n__C.TRAIN.SAVE_SUMMARIES_SECS = 300\n\n__C.TRAIN.SAVE_INTERVAL_SECS = 1800\n\n__C.TRAIN.IGNORE_MISSING_VARS = True\n\n__C.TRAIN.CHECKPOINT_PATH = 'data/pretrained_models/inception_v3.ckpt'\n\n# __C.TRAIN.CHECKPOINT_EXCLUDE_SCOPES = 'InceptionV3/Logits,InceptionV3/AuxLogits,PoseLogits'\n__C.TRAIN.CHECKPOINT_EXCLUDE_SCOPES = ''\n\n__C.TRAIN.DATASET_SPLIT_NAME = 'trainval_train'\n\n# loss fn can be from the list or empty '', i.e. no loss on that modality\n__C.TRAIN.LOSS_FN_POSE = 'l2'  # can be 'l2'/'log-loss'/'sigmoid-log-loss'/'cosine-loss'\n__C.TRAIN.LOSS_FN_POSE_WT = 1.0\n__C.TRAIN.LOSS_FN_POSE_SAMPLED = False  # Harder loss, sample the negatives\n__C.TRAIN.LOSS_FN_ACTION = 'softmax-xentropy'  # can be 'softmax-xentropy'\n__C.TRAIN.LOSS_FN_ACTION_WT = 1.0\n\n__C.TRAIN.VAR_NAME_MAPPER = ''  # to be used when loading from npy checkpoints\n                                # see options in restore/var_name_mapper.py\n\n__C.TRAIN.VIDEO_FRAMES_PER_VIDEO = 1\n\n# If true, divide the video into segments and read\n# a random frame from that segment\n__C.TRAIN.READ_SEGMENT_STYLE = False\n\n__C.TRAIN.ITER_SIZE = 1  # accumulate gradients over this many iterations\n\n__C.TRAIN.OTHER_IMG_SUMMARIES_TO_ADD = ['PosePrelogitsBasedAttention']\n\n#\n# Testing options\n#\n\n__C.TEST = edict()\n\n__C.TEST.BATCH_SIZE = 10\n\n__C.TEST.DATASET_SPLIT_NAME = 'trainval_val'\n\n__C.TEST.MAX_NUM_BATCHES = None\n\n__C.TEST.CHECKPOINT_PATH = b''\n\n__C.TEST.MOVING_AVERAGE_DECAY = None\n\n__C.TEST.VIDEO_FRAMES_PER_VIDEO = 1  # single image dataset. Set 25 for hmdb\n\n__C.TEST.EVAL_METRIC = ''  # normal eval. Set ='mAP' to compute that.\n\n\n#\n# Network properties\n#\n\n__C.NET = edict()\n# The following replaces the action logits with one computed by weighting the\n# output using pose heatmaps\n__C.NET.USE_POSE_ATTENTION_LOGITS = False\n__C.NET.USE_POSE_ATTENTION_LOGITS_DIMS = [-1]  # by default use all parts\n# set following true to have a heatmap as the avg of all heatmaps\n__C.NET.USE_POSE_ATTENTION_LOGITS_AVGED_HMAP = False\n\n\n# The following will replace the action logits with one computed over the last\n# pose logits\n__C.NET.USE_POSE_LOGITS_DIRECTLY = False\n# set true to also have the actual logits concatenated to the output\n__C.NET.USE_POSE_LOGITS_DIRECTLY_PLUS_LOGITS = False\n# Another version, after talking to Deva on March 20, 2017. Concat before avg\n# pool and remove the extra layer.\n# The following by default contain the image logits\n__C.NET.USE_POSE_LOGITS_DIRECTLY_v2 = False\n__C.NET.USE_POSE_LOGITS_DIRECTLY_v2_EXTRA_LAYER = False\n\n# The following will replace the action logits with a one computed using an\n# unconstrained attention predictor based on the pose output\n__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION = False\n# REMOVED THIS TO DEPRECATE\n# # setting the following to true basically just reproduces the original system\n# # (doesnot use any attention). I just used it to debug that this can reproduce\n# # the original numbers (nothing else got screwed up)\n# __C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_DEBUG = False\n# set the following to more to have more layers predicting the unconstrained\n# attention map\n# DEPRECATING the following, commented out for now, will be removed later.\n# __C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_NLAYERS = 1\n# set True to enforce the attention map that is learnt to be passed  through a\n# spatial softmax\n__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_SOFTMAX_ATT = False\n# Pass the attention through a relu\n__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_RELU_ATT = False\n# 21 April 2017: This is not DEPRECATED because it didn't help, so it won't\n# work with code now. This was to simplify code for TopDownAttention endpoint\n# # Create an attention map for each class\n# adding it again on July 26, 2017 for NIPS17 rebuttal\n__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_PER_CLASS = False\n# Train attention directly over image features\n__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_SINGLE_LAYER_ATT = False\n# Add the predicted pose to the logits features\n__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_WITH_POSE_FEAT = False\n# 2-layers over the pose logits\n__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_WITH_POSE_FEAT_2LAYER = False\n# Allow for Rank > 1 approximation. Other options might not work with this\n__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_RANK = 1\n\n# Do attention on temporal pooling as well\n__C.NET.USE_TEMPORAL_ATT = False\n\n# Bilinear pooling baselines\n__C.NET.USE_COMPACT_BILINEAR_POOLING = False\n\n# Set which endpoint serves as the output for pose\n__C.NET.LAST_CONV_MAP_FOR_POSE = edict()\n__C.NET.LAST_CONV_MAP_FOR_POSE.inception_v2_tsn = 'InceptionV2_TSN/inception_5a'\n__C.NET.LAST_CONV_MAP_FOR_POSE.inception_v3 = 'Mixed_7c'\n__C.NET.LAST_CONV_MAP_FOR_POSE.resnet_v1_101 = 'resnet_v1_101/block4'\n__C.NET.LAST_CONV_MAP_FOR_POSE.vgg_16 = 'vgg_16/conv5'\n\n\n# Train the top BN. Useful when training flow/multi-channel inputs other than\n# RGB. In case of ResNet, this means \"train only top_bn\", and keep others\n# fixed.\n__C.NET.TRAIN_TOP_BN = False\n# Dropout\n# -1 (<0) => Use the network default. Else, use this value\n__C.NET.DROPOUT = -1.0\n\n#\n# MISC\n#\n\n# For reproducibility\n__C.RNG_SEED = 42\n\n# A small number that's used many times\n__C.EPS = 1e-14\n\n# Root directory of project\n__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))\n\n# Data directory\n__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))\n\n# Model directory\n__C.EXP_DIR = 'expt_outputs/'\n\n__C.DATASET_NAME = 'mpii'\n\n__C.DATASET_DIR = 'data/mpii/mpii_tfrecords'\n\n# Set the following if using the train_test files from non-std location\n__C.DATASET_LIST_DIR = ''\n\n__C.MODEL_NAME = 'inception_v3'\n\n__C.NUM_READERS = 4\n\n__C.NUM_PREPROCESSING_THREADS = 4\n\n__C.GPUS = '2'\n\n__C.HEATMAP_MARKER_WD_RATIO = 0.1\n\n__C.MAX_INPUT_IMAGE_SIZE = 512  # to avoid arbitrarily huge input images\n\n# ['one-label'/'multi-label%d']\n__C.INPUT_FILE_STYLE_LABEL = ''\n\n\ndef get_output_dir(config_file_name):\n    \"\"\"Return the directory where experimental artifacts are placed.\n    If the directory does not exist, it is created.\n\n    A canonical path is built using the name from an imdb and a network\n    (if not None).\n    \"\"\"\n    outdir = osp.abspath(osp.join(__C.EXP_DIR, osp.basename(config_file_name)))\n    if not os.path.exists(outdir):\n        os.makedirs(outdir)\n    return outdir\n\ndef _merge_a_into_b(a, b):\n    \"\"\"Merge config dictionary a into config dictionary b, clobbering the\n    options in b whenever they are also specified in a.\n    \"\"\"\n    if type(a) is not edict:\n        return\n\n    for k, v in a.iteritems():\n        # a must specify keys that are in b\n        if not b.has_key(k):\n            raise KeyError('{} is not a valid config key'.format(k))\n\n        # the types must match, too\n        old_type = type(b[k])\n        if old_type is not type(v):\n            if isinstance(b[k], np.ndarray):\n                v = np.array(v, dtype=b[k].dtype)\n            else:\n                raise ValueError(('Type mismatch ({} vs. {}) '\n                                'for config key: {}').format(type(b[k]),\n                                                            type(v), k))\n\n        # recursively merge dicts\n        if type(v) is edict:\n            try:\n                _merge_a_into_b(a[k], b[k])\n            except:\n                print('Error under config key: {}'.format(k))\n                raise\n        else:\n            b[k] = v\n\ndef cfg_from_file(filename):\n    \"\"\"Load a config file and merge it into the default options.\"\"\"\n    import yaml\n    with open(filename, 'r') as f:\n        yaml_cfg = edict(yaml.load(f))\n\n    _merge_a_into_b(yaml_cfg, __C)\n\ndef cfg_from_list(cfg_list):\n    \"\"\"Set config keys via list (e.g., from command line).\"\"\"\n    from ast import literal_eval\n    assert len(cfg_list) % 2 == 0\n    for k, v in zip(cfg_list[0::2], cfg_list[1::2]):\n        key_list = k.split('.')\n        d = __C\n        for subkey in key_list[:-1]:\n            assert d.has_key(subkey)\n            d = d[subkey]\n        subkey = key_list[-1]\n        assert d.has_key(subkey)\n        try:\n            value = literal_eval(v)\n        except:\n            # handle the case when v is a string literal\n            value = v\n        assert type(value) == type(d[subkey]), \\\n            'type {} does not match original type {}'.format(\n            type(value), type(d[subkey]))\n        d[subkey] = value\n"
  },
  {
    "path": "src/custom_ops/Makefile",
    "content": "BOOST_DIR := /home/rgirdhar/Software/basic/boost/install2/\nBOOST_LIB_DIR := $(BOOST_DIR)/lib\nBOOST_INC_DIR := $(BOOST_DIR)/include\nTF_INC := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')\nLDFLAGS := -Wl,-rpath,$(BOOST_LIB_DIR)  # this ensures it will look in the correct BOOST directory for libs (not the system path)\n\nall: pose_to_heatmap.so zero_out_channels.so render_pose.so render_objects.so\n\npose_to_heatmap.so: pose_to_heatmap.cc\n\tg++ -std=c++11 $(LDFLAGS) -shared -I$(BOOST_INC_DIR) `pkg-config --cflags --libs opencv` pose_to_heatmap.cc -o pose_to_heatmap.so -fPIC -I $(TF_INC) -O2 -L$(BOOST_LIB_DIR) -lboost_system -lboost_filesystem -lboost_thread\n\nzero_out_channels.so: zero_out_channels.cc\n\tg++ -std=c++11 $(LDFLAGS) -shared zero_out_channels.cc -o zero_out_channels.so -fPIC -I $(TF_INC) -O2\n\nrender_pose.so: render_pose.cc pose_utils.hpp\n\tg++ -std=c++11 $(LDFLAGS) -shared -I$(BOOST_INC_DIR) `pkg-config --cflags --libs opencv` render_pose.cc -o render_pose.so -fPIC -I $(TF_INC) -O2 -L$(BOOST_LIB_DIR) -lboost_system -lboost_filesystem -lboost_thread\n\nrender_objects.so: render_objects.cc\n\tg++ -std=c++11 $(LDFLAGS) -shared -I$(BOOST_INC_DIR) `pkg-config --cflags --libs opencv` render_objects.cc -o render_objects.so -fPIC -I $(TF_INC) -O2 -L$(BOOST_LIB_DIR) -lboost_system -lboost_filesystem -lboost_thread\n"
  },
  {
    "path": "src/custom_ops/__init__.py",
    "content": ""
  },
  {
    "path": "src/custom_ops/custom_ops_factory.py",
    "content": "import os\nimport json\nfrom collections import OrderedDict\nimport numpy as np\nimport tensorflow as tf\n\ncur_path = os.path.realpath(__file__)\nROOT_PATH = os.path.dirname(cur_path)\n\n# add any new ops under the following\npose_to_heatmap_fn = tf.load_op_library(\n  os.path.join(ROOT_PATH, 'pose_to_heatmap.so')).pose_to_heatmap\nzero_out_channels_fn = tf.load_op_library(\n  os.path.join(ROOT_PATH, 'zero_out_channels.so')).zero_out_channels\nrender_pose_fn = tf.load_op_library(\n  os.path.join(ROOT_PATH, 'render_pose.so')).render_pose\nrender_objects_fn = tf.load_op_library(\n  os.path.join(ROOT_PATH, 'render_objects.so')).render_objects\n\ndef pose_to_heatmap(*args, **kwargs):\n  with tf.variable_scope('pose_to_heatmap_pyWrapper'):\n    pose_img, pose_valid = pose_to_heatmap_fn(*args, **kwargs)\n    out_channels = kwargs['out_channels']\n    pose_img.set_shape((None, None, out_channels))\n    pose_valid.set_shape((out_channels,))\n    pose_img *= 255.0\n    pose_img = tf.cast(pose_img, tf.uint8)\n  return pose_img, pose_valid\n\ndef zero_out_channels(*args, **kwargs):\n  with tf.variable_scope('zero_out_channels_pyWrapper'):\n    return zero_out_channels_fn(*args, **kwargs)\n\ndef render_pose(*args, **kwargs):\n  with tf.variable_scope('render_pose_pyWrapper'):\n    out_channels = 3\n    if kwargs['out_type'] == 'rgb':\n      kwargs['out_type'] = 1\n      out_channels = 3\n    elif kwargs['out_type'] == 'split-channel':\n      kwargs['out_type'] = 2\n      out_channels = 18  # number of limbs\n    img = render_pose_fn(*args, **kwargs)\n    img *= 255.0\n    img = tf.cast(img, tf.uint8)\n    img.set_shape((None, None, out_channels))\n  return img\n\n# from render_pose.cc\nmpii_to_coco = OrderedDict([\n  (9, 0),\n  (8, 1),\n  (12, 2),\n  (11, 3),\n  (10, 4),\n  (13, 5),\n  (14, 6),\n  (15, 7),\n  (2, 8),\n  (1, 9),\n  (0, 10),\n  (3, 11),\n  (4, 12),\n  (5, 13),\n])\ndef read_json_pose_fn(fpath):\n  try:\n    with open(fpath, 'r') as fin:\n      data = json.load(fin)\n  except:\n    print('Unable to open file {}'.format(fpath))\n    return -np.ones((16*3,)).astype('int64')\n  res = []\n  for body in data['bodies']:\n    mpii_joints = -np.ones((16, 3))\n    joints = np.array(body['joints'])\n    joints = np.reshape(joints, (-1, 3))\n    joints[joints[..., :] <= 0] = -1\n    mpii_joints[np.array(mpii_to_coco.keys()), :] = \\\n      joints[np.array(mpii_to_coco.values()), :]\n    res += mpii_joints.reshape((-1,)).tolist()\n  res = np.array(res).astype('int64')\n  return res\n\ndef read_json_pose(*args):\n  return tf.py_func(read_json_pose_fn, args, tf.int64)\n\ndef render_objects(*args, **kwargs):\n  with tf.variable_scope('render_objects_pyWrapper'):\n    img = render_objects_fn(*args, **kwargs)\n    img *= 255.0\n    img = tf.cast(img, tf.uint8)\n    img.set_shape((None, None, kwargs['out_channels']))\n  return img\n\ndef extract_glimpse(image, pose_label, orig_im_ht, orig_im_wd,\n                    out_side, pad_ratio, parts_keep):\n  # pose label is a [3x16xn,] vector\n  # for now just take the first pose and crop out the human\n  with tf.name_scope('ExtractGlimpse'):\n    pose_label = pose_label[:16*3]\n    pose_label = tf.reshape(pose_label, [16, 3])\n    if len(parts_keep) > 0:\n      pose_label = tf.gather(pose_label, parts_keep)\n    if len(parts_keep) == 1:\n      # now only one point, but need at least two to make a crop region\n      delta = tf.to_int64(\n        [tf.to_float(tf.shape(image)[-2]) * 0.1,\n         tf.to_float(tf.shape(image)[-3]) * 0.1, 0])\n      pose_label = tf.stack([\n        pose_label[0] - delta, pose_label[0] + delta])\n    pose_label_x = tf.to_float(pose_label[:, 0]) * \\\n        tf.to_float(tf.shape(image)[-2]) / tf.to_float(orig_im_wd)\n    pose_label_y = tf.to_float(pose_label[:, 1]) * \\\n        tf.to_float(tf.shape(image)[-3]) / tf.to_float(orig_im_ht)\n    pose_label = tf.stack([pose_label_y, pose_label_x])\n    mx_pts = tf.to_int32(tf.reduce_max(pose_label, axis=1))\n    mn_pts = tf.to_int32(tf.reduce_min(\n      tf.where(tf.greater_equal(pose_label, 0), pose_label,\n               tf.ones(pose_label.get_shape()) * 999999), axis=1))\n    delta_0 = tf.to_int32(tf.to_float((mx_pts[0] - mn_pts[0])) * pad_ratio)\n    delta_1 = tf.to_int32(tf.to_float((mx_pts[1] - mn_pts[1])) * pad_ratio)\n    mx_pts = mx_pts + [delta_0, delta_1]\n    mn_pts = mn_pts - [delta_0, delta_1]\n\n    offset_ht = tf.maximum(mn_pts[0], 0)\n    offset_wd = tf.maximum(mn_pts[1], 0)\n    target_ht = tf.minimum(mx_pts[0]-offset_ht, tf.shape(image)[-3]-offset_ht-1)\n    target_wd = tf.minimum(mx_pts[1]-offset_wd, tf.shape(image)[-2]-offset_wd-1)\n    # image = tf.Print(image, [offset_ht, offset_wd, target_ht, target_wd,\n    #                          tf.shape(image)], \"stuff:\")\n    image = tf.cond(tf.logical_and(\n      tf.greater(mx_pts[1], mn_pts[1]),\n      tf.greater(mx_pts[0], mn_pts[0])),\n      lambda: tf.image.crop_to_bounding_box(\n        image, offset_ht, offset_wd, target_ht, target_wd),\n      lambda: image)\n    if out_side > 0:\n      image = tf.image.resize_images(\n        image, [out_side, out_side])\n    return image\n\ndef read_sparse_label_fn(sparse_label, nclasses):\n  \"\"\"sparse_label is a string and return a 1D vector with the dense label\n  \"\"\"\n  res = np.zeros((nclasses,), dtype='int32')\n  res[np.array([int(el.split(':')[0]) for el in sparse_label.split(',')])] = \\\n      np.array([int(el.split(':')[1]) for el in sparse_label.split(',')])\n  res[res < 0] = 0  # get rid of -1 label for now\n  return res\n\ndef read_sparse_label(*args):\n  return tf.py_func(read_sparse_label_fn, args, tf.int32)\n"
  },
  {
    "path": "src/custom_ops/pose_to_heatmap.cc",
    "content": "#include \"tensorflow/core/framework/op.h\"\n#include \"tensorflow/core/framework/shape_inference.h\"\n#include \"tensorflow/core/framework/op_kernel.h\"\n\n#include <iostream>\n#include <tuple>\n\n#include <opencv2/opencv.hpp>\n\nusing namespace tensorflow;\nusing namespace std;\n\nREGISTER_OP(\"PoseToHeatmap\")\n  .Attr(\"out_channels: int = 16\")\n  .Attr(\"marker_wd_ratio: float = 0.1\")\n  .Attr(\"do_gauss_blur: bool = True\")\n  .Input(\"pose_label: int64\")\n  .Input(\"im_ht: int64\")\n  .Input(\"im_wd: int64\")\n  .Input(\"out_wd: int64\")  // out_height decided using this and aspect ratio of image\n  .Output(\"heatmap: float\")\n  .Output(\"is_valid: bool\");  // a bit for each channel, if that pose label is valid or not\n\nclass PoseToHeatmapOp : public OpKernel {\n public:\n  explicit PoseToHeatmapOp(OpKernelConstruction* context) : OpKernel(context) {\n    OP_REQUIRES_OK(\n        context, context->GetAttr(\"out_channels\", &out_channels_));\n    OP_REQUIRES_OK(\n        context, context->GetAttr(\"marker_wd_ratio\", &marker_wd_ratio_));\n    OP_REQUIRES_OK(\n        context, context->GetAttr(\"do_gauss_blur\", &do_gauss_blur_));\n  }\n\n  void Compute(OpKernelContext* context) override {\n    // Grab the input tensor\n    const Tensor& pose_label_tensor = context->input(0);\n    auto pose_label = pose_label_tensor.flat<long long>();\n    const Tensor& im_ht_tensor = context->input(1);\n    auto im_ht = im_ht_tensor.flat<long long>()(0);\n    const Tensor& im_wd_tensor = context->input(2);\n    auto im_wd = im_wd_tensor.flat<long long>()(0);\n    const Tensor& out_wd_tensor = context->input(3);\n    auto out_wd = out_wd_tensor.flat<long long>()(0);\n    int out_ht = ((im_ht * out_wd * 1.0) / im_wd);\n\n    // The pose label should be 16 keypoints, with X,Y,is_visible\n    int num_keypoints = out_channels_;\n    assert(pose_label.size() % (3 * num_keypoints) == 0);\n    int n_rects = pose_label.size() / (3 * num_keypoints);\n\n    // Create output tensors\n    TensorShape out_shape {out_ht, out_wd, out_channels_};\n    Tensor* output_tensor = NULL;\n    OP_REQUIRES_OK(\n        context, \n        context->allocate_output(\n          0,\n          out_shape,\n          &output_tensor));\n    auto output = output_tensor->tensor<float, 3>();\n    TensorShape out_shape_valid {out_channels_};\n    Tensor* output_tensor_valid = NULL;\n    OP_REQUIRES_OK(\n        context, \n        context->allocate_output(\n          1,\n          out_shape_valid,\n          &output_tensor_valid));\n    auto output_valid = output_tensor_valid->tensor<bool, 1>();\n\n    int elts_per_pose = num_keypoints * 3;\n    for (int i = 0; i < num_keypoints; i++) {\n      cv::Mat channel(out_ht, out_wd, CV_32FC1, 0.0);\n      output_valid(i) = false;\n      for (int rid = 0; rid < n_rects; rid++) {  // for each rectangle\n        int x = pose_label(rid * elts_per_pose + i * 3) * out_wd / im_wd;\n        int y = pose_label(rid * elts_per_pose + i * 3 + 1) * out_ht / im_ht;\n        int is_visible = pose_label(rid * elts_per_pose + i * 3 + 2);  // ignore this\n        if (pose_label(rid * elts_per_pose + i * 3) >= 0 &&\n            pose_label(rid * elts_per_pose + i * 3 + 1) >= 0) {\n          output_valid(i) = true;\n          circle(channel, cv::Point(x, y),\n                 (int) out_wd * marker_wd_ratio_,\n                 cv::Scalar(1.0, 1.0, 1.0), -1);\n          if (do_gauss_blur_)\n            GaussianBlur(channel, channel, cv::Size(7, 7), 0);\n        }\n      }\n      for (int r = 0; r < channel.rows; r++) {\n        for (int c = 0; c < channel.cols; c++) {\n          output(r, c, i) = channel.at<float>(r, c);\n        }\n      }\n    }\n  }\n  \n private:\n  int out_channels_;\n  float marker_wd_ratio_;\n  bool do_gauss_blur_;\n};\n\nREGISTER_KERNEL_BUILDER(Name(\"PoseToHeatmap\").Device(DEVICE_CPU), PoseToHeatmapOp);\n"
  },
  {
    "path": "src/custom_ops/pose_utils.hpp",
    "content": "#include <opencv2/opencv.hpp>\n\n#include <tuple>\n#include <iostream>\n#include <sstream>\n\n// Very important to add the following #define\n// boost json parser depends on boost::spirit\n// which is not thread safe by default.\n// It was giving Segmentation Faults.\n// Also, this means I need to compile with -lboost_thread\n// ref: http://stackoverflow.com/a/22089792/1492614\n// This was tested to work fine with multi-threaded training\n#define BOOST_SPIRIT_THREADSAFE\n#include <boost/property_tree/ptree.hpp>\n#include <boost/property_tree/json_parser.hpp>\n\nusing namespace std;\nusing namespace cv;\nnamespace pt = boost::property_tree;\n\n\nvector<float> joint_color {1, 0, 0,\n                           1, 0.33, 0,\n                           1, 0.66, 0,\n                           1, 1, 0,\n                           0.66, 1, 0,\n                           0.33, 1, 0,\n                           0, 1, 0,\n                           0, 1, 0.33,\n                           0, 1, 0.66,\n                           0, 1, 1,\n                           0, 0.66, 1,\n                           0, 0.33, 1,\n                           0, 0, 1,\n                           0.33, 0, 1,\n                           0.66, 0, 1,\n                           1, 0, 1,\n                           1, 0, 0.66,\n                           1, 0, 0.33};\n//                           1, 1, 1};\nvector<int> limbSeq {2, 3,\n                     2, 6,\n                     3, 4,\n                     4, 5,\n                     6, 7,\n                     7, 8,\n                     2, 9,\n                     9, 10,\n                     10, 11,\n                     2, 12,\n                     12, 13,\n                     13, 14,\n                     2, 1,\n                     1, 15,\n                     15, 17,\n                     1, 16,\n                     16, 18,\n                     3, 17};\n//                     6, 18};\n\n#define RENDER_POSE_OUT_TYPE_RGB 1\n#define RENDER_POSE_OUT_TYPE_SPLITCHANNEL 2\nMat render_pose(vector<vector<tuple<float,float,float>>> poses,\n    int out_ht, int out_wd,\n    int max_ht, int max_wd, int marker_wd,\n    int out_type=RENDER_POSE_OUT_TYPE_RGB) {\n  int nLimbs = limbSeq.size() / 2;\n  int nchannels = 3;\n  if (out_type == RENDER_POSE_OUT_TYPE_RGB) {\n    nchannels = 3;\n  } else if (out_type == RENDER_POSE_OUT_TYPE_SPLITCHANNEL) {\n    nchannels = nLimbs;\n  } else {\n    cerr << \"render_pose: Unknown output type.\" << endl;\n  }\n  Mat output(out_ht, out_wd, CV_32FC(nchannels), 0.0);\n  vector<Mat> output_channels;\n  if (nchannels != 3) {\n    split(output, output_channels);\n  }\n  // assert(limbSeq.size() / 2 == joint_color.size() / 3);\n  for (int body_id = 0; body_id < poses.size(); body_id++) {\n    for (int i = 0; i < nLimbs; i++) {\n      float scal_ht = out_ht * 1.0 / max_ht;\n      float scal_wd = out_wd * 1.0 / max_wd;\n      tuple<float, float, float> pt1 = poses[body_id][limbSeq[2*i]-1];\n      tuple<float, float, float> pt2 = poses[body_id][limbSeq[2*i+1]-1];\n      float pt1_conf = get<2>(pt1);\n      float pt2_conf = get<2>(pt2);\n      if (pt1_conf < 0.1 || pt2_conf < 0.1) {\n        continue;\n      }\n      Mat render_img;\n      Scalar color;\n      if (nchannels == 3) {\n        render_img = output;\n        color = CV_RGB(joint_color[i*3], joint_color[i*3+1], joint_color[i*3+2]);\n      } else {\n        render_img = output_channels[i];\n        color = Scalar(1);\n      }\n      line(\n          render_img,\n          Point(get<0>(pt1) * scal_wd, get<1>(pt1) * scal_ht),\n          Point(get<0>(pt2) * scal_wd, get<1>(pt2) * scal_ht),\n          color, marker_wd);\n    }\n  }\n  if (nchannels != 3) {\n    merge(output_channels, output);\n  }\n  return output;\n}\n\n\nvector<vector<tuple<float,float,float>>> read_pose_xml(string xml_str, int &pose_dim) {\n  vector<vector<tuple<float,float,float>>> poses;\n  if (xml_str.size() > 0) {\n    stringstream ss(xml_str);\n    pt::ptree root;\n    pt::read_json(ss, root);\n    for (pt::ptree::value_type &body : root.get_child(\"bodies\")) {\n      vector<float> elts;\n      for (pt::ptree::value_type &joints : body.second.get_child(\"joints\")) {\n        elts.push_back((float) stof(joints.second.data()));\n      }\n      pose_dim = elts.size() / 3;  // x,y,score format\n      if (pose_dim * 3 != elts.size()) {\n        cerr << \"Invalid number of numbers in pose dim (\"\n          << pose_dim * 3 << \" vs \" << elts.size() << endl;\n        poses.clear();\n        break;\n      }\n      vector<tuple<float,float,float>> pose;\n      for (int i = 0; i < pose_dim; i++) {\n        pose.push_back(make_tuple(elts[i*3], elts[i*3+1], elts[i*3+2]));\n      }\n      poses.push_back(pose);\n    }\n  } else {\n    cerr << \"json_to_pose: Empty string passed in.\" << endl;\n  }\n  return poses;\n}\n"
  },
  {
    "path": "src/custom_ops/render_objects.cc",
    "content": "#include \"tensorflow/core/framework/op.h\"\n#include \"tensorflow/core/framework/shape_inference.h\"\n#include \"tensorflow/core/framework/op_kernel.h\"\n\n#include <iostream>\n#include <tuple>\n\n#include <opencv2/opencv.hpp>\n\nusing namespace tensorflow;\nusing namespace std;\n\nREGISTER_OP(\"RenderObjects\")\n  .Attr(\"out_channels: int = 80\")\n  .Input(\"objects_label: string\")\n  .Input(\"im_ht: int64\")\n  .Input(\"im_wd: int64\")\n  .Input(\"out_wd: int64\")  // out_height decided using this and aspect ratio of image\n  .Output(\"image: float\");\n\n\nvoid read_detections(\n    string objects_label,\n    vector<tuple<int,float,float,float,float,float>> &detections) {\n  istringstream ss(objects_label);\n  int ob_label, id;  // ignore the id\n  float conf, xmin, ymin, xmax, ymax;\n  detections.clear();\n  while (ss >> id >> ob_label >> conf >> xmin >> ymin >> xmax >> ymax) {\n    detections.push_back(make_tuple(ob_label, conf, xmin, ymin, xmax, ymax));\n  }\n}\n\n\nclass RenderObjectsOp : public OpKernel {\n public:\n  explicit RenderObjectsOp(OpKernelConstruction* context) : OpKernel(context) {\n    OP_REQUIRES_OK(\n        context, context->GetAttr(\"out_channels\", &out_channels_));\n  }\n\n  void Compute(OpKernelContext* context) override {\n    // Grab the input tensor\n    const Tensor& objects_label_tensor = context->input(0);\n    auto objects_label = objects_label_tensor.flat<string>()(0);\n    const Tensor& im_ht_tensor = context->input(1);\n    auto im_ht = im_ht_tensor.flat<long long>()(0);\n    const Tensor& im_wd_tensor = context->input(2);\n    auto im_wd = im_wd_tensor.flat<long long>()(0);\n    const Tensor& out_wd_tensor = context->input(3);\n    auto out_wd = out_wd_tensor.flat<long long>()(0);\n    int out_ht = ((im_ht * out_wd * 1.0) / im_wd);\n\n    // Create output tensors\n    TensorShape out_shape {out_ht, out_wd, out_channels_};\n    Tensor* output_tensor = NULL;\n    OP_REQUIRES_OK(\n        context,\n        context->allocate_output(\n          0,\n          out_shape,\n          &output_tensor));\n    auto output = output_tensor->tensor<float, 3>();\n    vector<tuple<int,float,float,float,float,float>> detections;\n    read_detections(objects_label, detections);\n    for (int i = 0; i < out_wd; i++) {\n      for (int j = 0; j < out_ht; j++) {\n        for (int k = 0; k < out_channels_; k++) {\n          output(j, i, k) = 0;\n        }\n      }\n    }\n\n    if (out_channels_ != 3) {  // i.e. not doing a RGB output\n      for (unsigned int i = 0; i < detections.size(); i++) {\n        int xmin = get<2>(detections[i]) * out_wd;\n        int ymin = get<3>(detections[i]) * out_ht;\n        int xmax = get<4>(detections[i]) * out_wd;\n        int ymax = get<5>(detections[i]) * out_ht;\n        int ob_label = get<0>(detections[i]);\n        float conf = get<1>(detections[i]);\n        for (int c = max(0, (int) xmin); c < min(xmax, (int) out_wd); c++) {\n          for (int r = max(0, (int) ymin); r < min(ymax, (int) out_ht); r++) {\n            output(r, c, ob_label) = conf;\n          }\n        }\n      }\n    } else {\n      cerr << \"render_objects: unable to render RGB currently.\" << endl;\n    }\n  }\n\n private:\n  int out_channels_;\n};\n\nREGISTER_KERNEL_BUILDER(Name(\"RenderObjects\").Device(DEVICE_CPU), RenderObjectsOp);\n"
  },
  {
    "path": "src/custom_ops/render_pose.cc",
    "content": "#include \"tensorflow/core/framework/op.h\"\n#include \"tensorflow/core/framework/shape_inference.h\"\n#include \"tensorflow/core/framework/op_kernel.h\"\n\n#include <iostream>\n#include <tuple>\n\n#include <opencv2/opencv.hpp>\n\n#include \"pose_utils.hpp\"\n\nusing namespace tensorflow;\nusing namespace std;\nnamespace pt = boost::property_tree;\n\n\nREGISTER_OP(\"RenderPose\")\n  .Attr(\"marker_wd_ratio: float = 0.01\")  // ratio of output image width\n  .Attr(\"out_type: int = 1\")  // RENDER_POSE_OUT_TYPE_RGB or RENDER_POSE_OUT_TYPE_SPLITCHANNEL\n  .Input(\"pose_label: int64\")\n  .Input(\"im_ht: int64\")\n  .Input(\"im_wd: int64\")\n  .Input(\"out_wd: int64\")\n  .Output(\"image: float\");\n\nclass RenderPoseOp : public OpKernel {\n public:\n  explicit RenderPoseOp(OpKernelConstruction* context) : OpKernel(context) {\n    OP_REQUIRES_OK(\n        context, context->GetAttr(\"marker_wd_ratio\", &marker_wd_ratio_));\n    OP_REQUIRES_OK(\n        context, context->GetAttr(\"out_type\", &out_type_));\n  }\n\n  void Compute(OpKernelContext* context) override {\n    // Grab the input tensor\n    const Tensor& pose_label_tensor = context->input(0);\n    auto pose_label = pose_label_tensor.flat<long long>();\n    const Tensor& im_ht_tensor = context->input(1);\n    auto im_ht = im_ht_tensor.flat<long long>()(0);\n    const Tensor& im_wd_tensor = context->input(2);\n    auto im_wd = im_wd_tensor.flat<long long>()(0);\n    const Tensor& out_wd_tensor = context->input(3);\n    auto out_wd = out_wd_tensor.flat<long long>()(0);\n    int out_ht = ((im_ht * out_wd * 1.0) / im_wd);\n\n    int num_keypoints = 16;  // MPII poses\n    assert(pose_label.size() % (3 * num_keypoints) == 0);\n    int n_people = pose_label.size() / (3 * num_keypoints);\n    vector<vector<tuple<float,float,float>>> poses;\n    int elts_per_pose = 3 * num_keypoints;\n    for (int i = 0; i < n_people; i++) {\n      vector<tuple<float,float,float>> person;\n      for (int j = 0; j < num_keypoints; j++) {\n        int x = pose_label(elts_per_pose * i + 3 * j);\n        int y = pose_label(elts_per_pose * i + 3 * j + 1);\n        int is_visible = pose_label(elts_per_pose * i + 3 * j + 2);\n        // TODO (rgirdhar): Maybe this needs be fixed\n        if (x == -1 && y == -1) {\n          is_visible = 0;\n        } else {\n          is_visible = 1;\n        }\n        person.push_back(make_tuple(x, y, is_visible));\n      }\n      poses.push_back(convert_pose_mpii_to_coco(person));\n    }\n\n    cv::Mat render = render_pose(\n        poses, out_ht, out_wd,\n        im_ht, im_wd, out_wd * marker_wd_ratio_,\n        out_type_);\n    // Create an output tensor\n    TensorShape out_shape {out_ht, out_wd, render.channels()};\n    Tensor* output_tensor = NULL;\n    OP_REQUIRES_OK(\n        context,\n        context->allocate_output(\n          0,\n          out_shape,\n          &output_tensor));\n    auto output = output_tensor->tensor<float, 3>();\n\n    for (int i = 0; i < render.rows; i++) {\n      for (int j = 0; j < render.cols; j++) {\n        float *pixel = render.ptr<float>(i, j);\n        for (int k = 0; k < render.channels(); k++) {\n          output(i, j, k) = pixel[render.channels()-k-1];\n        }\n      }\n    }\n  }\n\n private:\n\n  vector<tuple<float,float,float>> convert_pose_mpii_to_coco(\n      vector<tuple<float,float,float>> poses) {\n    // Using the coco definition from https://github.com/CMU-Perceptual-Computing-Lab/caffe_rtpose\n    // Using the MPII definition from http://human-pose.mpi-inf.mpg.de/#download\n    vector<tuple<float,float,float>> res;\n    auto dummy = make_tuple(0, 0, 0);  // for the parts I don't have in MPII\n    map<int, int> coco_to_mpii = {\n      {0, 9},  // Nose, head_top (approx)\n      {1, 8},\n      {2, 12},\n      {3, 11},\n      {4, 10},\n      {5, 13},\n      {6, 14},\n      {7, 15},\n      {8, 2},\n      {9, 1},\n      {10, 0},\n      {11, 3},\n      {12, 4},\n      {13, 5},\n      {14, -1},\n      {15, -1},\n      {16, -1},\n      {17, -1},\n      {18, -1}\n    };\n    for (int i = 0; i < coco_to_mpii.size(); i++) {\n      if (coco_to_mpii[i] == -1) {\n        res.push_back(dummy);\n      } else {\n        res.push_back(poses[coco_to_mpii[i]]);\n      }\n    }\n    return res;\n  }\n\n  float marker_wd_ratio_;\n  int out_type_;\n};\n\nREGISTER_KERNEL_BUILDER(Name(\"RenderPose\").Device(DEVICE_CPU), RenderPoseOp);\n"
  },
  {
    "path": "src/custom_ops/test/pose_to_heatmap_op_test.py",
    "content": "import tensorflow as tf\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom custom_ops.custom_ops_factory import pose_to_heatmap\n\nwith tf.Session(''):\n  pose = [50, 50, 1] * 3 +\\\n      [0, 0, 1] * 2 +\\\n      [-1, -1, 1] * 11\n  pose += [90, 90, 1] * 3 +\\\n      [0, 0, 1] * 2 +\\\n      [-1, -1, 1] * 11\n\n  T, T_valid = pose_to_heatmap(\n    pose,\n    100,\n    200,\n    100,\n    out_channels=16\n  )\n  A = T.eval()\n  A_valid = T_valid.eval()\n  plt.imsave('temp.jpg', np.mean(A, axis=-1))\n  print A_valid\n  import pdb\n  pdb.set_trace()\n  a = 1\n\n"
  },
  {
    "path": "src/custom_ops/test/render_objects_op_test.py",
    "content": "import tensorflow as tf\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom custom_ops.custom_ops_factory import render_objects\n\nwith tf.Session(''):\n  T = render_objects(\n    '1 1 0.743129 0.031770 0.151354 0.448363 0.994178\\n'\n    '1 1 0.813451 0.517574 0.303005 0.957526 0.975016',\n    100,\n    200,\n    100,\n    out_channels=80\n  )\n  A = T.eval()\n  plt.imsave('temp.jpg', np.mean(A, axis=-1))\n  import pdb\n  pdb.set_trace()\n  a = 1\n\n"
  },
  {
    "path": "src/custom_ops/test/zero_out_channels_op_test.py",
    "content": "import tensorflow as tf\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom custom_ops.custom_ops_factory import zero_out_channels\n\nwith tf.Session(''):\n  A = np.ones((1, 3, 3, 5))\n  channels = [True, False, True, True, True]\n  B = zero_out_channels(A, channels)\n  print B\n  C = B.eval()\n  assert(np.all(C[:, :, :, 0] == 1))\n  assert(np.all(C[:, :, :, 1] == 0))\n  assert(np.all(C[:, :, :, 2] == 1))\n  assert(np.all(C[:, :, :, 3] == 1))\n  import pdb\n  pdb.set_trace()\n  a = 1\n\n"
  },
  {
    "path": "src/custom_ops/zero_out_channels.cc",
    "content": "#include \"tensorflow/core/framework/op.h\"\n#include \"tensorflow/core/framework/shape_inference.h\"\n#include \"tensorflow/core/framework/op_kernel.h\"\n\nusing namespace tensorflow;\n\nREGISTER_OP(\"ZeroOutChannels\")\n  .Attr(\"T: {float32, float64, int32, int64}\")\n  .Input(\"to_zero: T\")  // must be 4-dim images\n  .Input(\"channels: bool\")  // list of true/false, false=>zero out that channel\n  .Output(\"zeroed: T\")\n  .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {\n    c->set_output(0, c->input(0));\n    return Status::OK();\n  });\n\ntemplate <typename T>\nclass ZeroOutChannelsOp : public OpKernel {\n public:\n  explicit ZeroOutChannelsOp(OpKernelConstruction* context) : OpKernel(context) {}\n\n  void Compute(OpKernelContext* context) override {\n    // Grab the input tensor\n    const Tensor& input_tensor = context->input(0);\n    const Tensor& input_tensor_channel = context->input(1);\n    auto input = input_tensor.tensor<T, 4>();\n    auto input_channel = input_tensor_channel.flat<bool>();\n\n    assert(input_tensor.shape().dims() == 4);\n    int num_channels = input_tensor.shape().dim_size(3);\n    assert(num_channels == input_tensor_channel.shape().dim_size(0));\n    Tensor *output = NULL;\n    OP_REQUIRES_OK(\n        context,\n        context->allocate_output(0, input_tensor.shape(), &output));\n    auto output_flat = output->tensor<T, 4>();\n    for (int i = 0; i < input_tensor.shape().dim_size(0); i++) {\n      for (int j = 0; j < input_tensor.shape().dim_size(1); j++) {\n        for (int k = 0; k < input_tensor.shape().dim_size(2); k++) {\n          for (int l = 0; l < input_tensor.shape().dim_size(3); l++) {\n            if (input_channel(l) == false) {\n              output_flat(i, j, k, l) = 0;\n            } else {\n              output_flat(i, j, k, l) = input(i, j, k, l);\n            }\n          }\n        }\n      }\n    }\n  }\n};\n\nREGISTER_KERNEL_BUILDER(\n    Name(\"ZeroOutChannels\")\n    .Device(DEVICE_CPU)\n    .TypeConstraint<double>(\"T\"),\n    ZeroOutChannelsOp<double>);\nREGISTER_KERNEL_BUILDER(\n    Name(\"ZeroOutChannels\")\n    .Device(DEVICE_CPU)\n    .TypeConstraint<float>(\"T\"),\n    ZeroOutChannelsOp<float>);\nREGISTER_KERNEL_BUILDER(\n    Name(\"ZeroOutChannels\")\n    .Device(DEVICE_CPU)\n    .TypeConstraint<int>(\"T\"),\n    ZeroOutChannelsOp<int>);\nREGISTER_KERNEL_BUILDER(\n    Name(\"ZeroOutChannels\")\n    .Device(DEVICE_CPU)\n    .TypeConstraint<long long>(\"T\"),\n    ZeroOutChannelsOp<long long>);\n"
  },
  {
    "path": "src/datasets/__init__.py",
    "content": ""
  },
  {
    "path": "src/datasets/charades.py",
    "content": "\"\"\"Provides data for the HMDB51 dataset.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom datasets.video_data_utils import gen_dataset\nimport tensorflow as tf\n\n_CHARADES_TRINITY_LIST_DIR = '/data/rgirdhar/Data2/Projects/2016/002_VideoRepresentation/StandardData/001_Charades/v1/Lists/train_test_lists/'\n_CHARADES_TRINITY_POSE_LABEL_DIR = '/scratch/rgirdhar/Datasets/Video/004_Charades/Processed/002_Pose_CPM_v2/'\n\ndef get_split(split_name, dataset_dir,\n              file_pattern=None,\n              reader=None, **kwargs):\n\n  _NUM_CLASSES = 157\n  # There are no pose labels, but need to keep this to load models from MPII\n  # trained\n  # Also, now the processing can still avoided by having no loss on pose\n  _NUM_POSE_KEYPOINTS = 16\n  # Need to do this otherwise the lambda function defined below will not work\n  # It evaluates the kwargs['..'] also when evaluated\n  if 'dataset_list_dir' not in kwargs:\n    dataset_list_dir = _CHARADES_TRINITY_LIST_DIR\n  else:\n    dataset_list_dir = kwargs['dataset_list_dir']\n  _LIST_FN = lambda split, id: \\\n      '%s/%s_split%d.txt' % (\n        dataset_list_dir,\n        split, id)\n\n  kwargs['num_pose_keypoints'] = _NUM_POSE_KEYPOINTS\n  kwargs['num_classes'] = _NUM_CLASSES\n  kwargs['list_fn'] = _LIST_FN\n  with open(_LIST_FN(split_name, kwargs['split_id']), 'r') as fin:\n    ncols = len(fin.readline().strip().split())\n  if ncols == 4:\n    input_file_style = '4-col'\n  elif ncols == 3:\n    input_file_style = '3-col'  # since video level testing with mAP\n  else:\n    raise ValueError('Invalid file style')\n  tf.logging.info('Using input_file_style {}'.format(input_file_style))\n\n  # need to remove some things from kwargs (if they exist) before passing on\n  kwargs.pop('dataset_list_dir', [])\n  return gen_dataset(split_name, dataset_dir, file_pattern,\n                     reader,\n                     pose_dataset_dir=_CHARADES_TRINITY_POSE_LABEL_DIR,\n                     input_file_style=input_file_style,\n                     **kwargs), _NUM_POSE_KEYPOINTS\n"
  },
  {
    "path": "src/datasets/dataset_factory.py",
    "content": "\"\"\"A factory-pattern class which returns classification image/label pairs.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom datasets import mpii\nfrom datasets import hmdb51\nfrom datasets import charades\nfrom datasets import hico\nfrom datasets import jhmdb21\n\ndatasets_map = {\n  'mpii': mpii,\n  'hmdb51': hmdb51,\n  'charades': charades,\n  'hico': hico,\n  'jhmdb21': jhmdb21\n}\n\n\ndef get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None,\n                **kwargs):  # added by rgirdhar: allow other options\n  \"\"\"Given a dataset name and a split_name returns a Dataset.\n\n  Args:\n    name: String, the name of the dataset.\n    split_name: A train/test split name.\n    dataset_dir: The directory where the dataset files are stored.\n    file_pattern: The file pattern to use for matching the dataset source files.\n    reader: The subclass of tf.ReaderBase. If left as `None`, then the default\n      reader defined by each dataset is used.\n\n  Returns:\n    A `Dataset` class.\n\n  Raises:\n    ValueError: If the dataset `name` is unknown.\n  \"\"\"\n  if name not in datasets_map:\n    raise ValueError('Name of dataset unknown %s' % name)\n  return datasets_map[name].get_split(\n      split_name,\n      dataset_dir,\n      file_pattern,\n      reader, **kwargs)\n"
  },
  {
    "path": "src/datasets/dataset_utils.py",
    "content": "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\"\"\"Contains utilities for downloading and converting datasets.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport sys\nimport tarfile\n\nfrom six.moves import urllib\nimport tensorflow as tf\n\nLABELS_FILENAME = 'labels.txt'\n\n\ndef int64_feature(values):\n  \"\"\"Returns a TF-Feature of int64s.\n\n  Args:\n    values: A scalar or list of values.\n\n  Returns:\n    a TF-Feature.\n  \"\"\"\n  if not isinstance(values, (tuple, list)):\n    values = [values]\n  return tf.train.Feature(int64_list=tf.train.Int64List(value=values))\n\n\ndef bytes_feature(values):\n  \"\"\"Returns a TF-Feature of bytes.\n\n  Args:\n    values: A string.\n\n  Returns:\n    a TF-Feature.\n  \"\"\"\n  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))\n\n\ndef image_to_tfexample(image_data, image_format, height, width, class_id):\n  return tf.train.Example(features=tf.train.Features(feature={\n      'image/encoded': bytes_feature(image_data),\n      'image/format': bytes_feature(image_format),\n      'image/class/label': int64_feature(class_id),\n      'image/height': int64_feature(height),\n      'image/width': int64_feature(width),\n  }))\n\n\ndef download_and_uncompress_tarball(tarball_url, dataset_dir):\n  \"\"\"Downloads the `tarball_url` and uncompresses it locally.\n\n  Args:\n    tarball_url: The URL of a tarball file.\n    dataset_dir: The directory where the temporary files are stored.\n  \"\"\"\n  filename = tarball_url.split('/')[-1]\n  filepath = os.path.join(dataset_dir, filename)\n\n  def _progress(count, block_size, total_size):\n    sys.stdout.write('\\r>> Downloading %s %.1f%%' % (\n        filename, float(count * block_size) / float(total_size) * 100.0))\n    sys.stdout.flush()\n  filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress)\n  print()\n  statinfo = os.stat(filepath)\n  print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')\n  tarfile.open(filepath, 'r:gz').extractall(dataset_dir)\n\n\ndef write_label_file(labels_to_class_names, dataset_dir,\n                     filename=LABELS_FILENAME):\n  \"\"\"Writes a file with the list of class names.\n\n  Args:\n    labels_to_class_names: A map of (integer) labels to class names.\n    dataset_dir: The directory in which the labels file should be written.\n    filename: The filename where the class names are written.\n  \"\"\"\n  labels_filename = os.path.join(dataset_dir, filename)\n  with tf.gfile.Open(labels_filename, 'w') as f:\n    for label in labels_to_class_names:\n      class_name = labels_to_class_names[label]\n      f.write('%d:%s\\n' % (label, class_name))\n\n\ndef has_labels(dataset_dir, filename=LABELS_FILENAME):\n  \"\"\"Specifies whether or not the dataset directory contains a label map file.\n\n  Args:\n    dataset_dir: The directory in which the labels file is found.\n    filename: The filename where the class names are written.\n\n  Returns:\n    `True` if the labels file exists and `False` otherwise.\n  \"\"\"\n  return tf.gfile.Exists(os.path.join(dataset_dir, filename))\n\n\ndef read_label_file(dataset_dir, filename=LABELS_FILENAME):\n  \"\"\"Reads the labels file and returns a mapping from ID to class name.\n\n  Args:\n    dataset_dir: The directory in which the labels file is found.\n    filename: The filename where the class names are written.\n\n  Returns:\n    A map from a label (integer) to class name.\n  \"\"\"\n  labels_filename = os.path.join(dataset_dir, filename)\n  with tf.gfile.Open(labels_filename, 'r') as f:\n    lines = f.read().decode()\n  lines = lines.split('\\n')\n  lines = filter(None, lines)\n\n  labels_to_class_names = {}\n  for line in lines:\n    index = line.index(':')\n    labels_to_class_names[int(line[:index])] = line[index+1:]\n  return labels_to_class_names\n"
  },
  {
    "path": "src/datasets/hico.py",
    "content": "\"\"\"Provides data for the HMDB51 dataset.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom datasets.video_data_utils import gen_dataset\n\n_HICO_TRINITY_LIST_DIR = '/data/rgirdhar/Data2/Projects/2016/002_VideoRepresentation/StandardData/005_HICO/data_videoFormat/001_Basic/train_test_lists'\n_HICO_TRINITY_POSE_LABEL_DIR = '/scratch/rgirdhar/Datasets/Image/003_HICO/data_videoFormat/001_Basic/features/001_CPMPose/'\n_HICO_DATASET_DIR = '/scratch/rgirdhar/Datasets/Image/003_HICO/data_videoFormat/001_Basic/frames'\n\ndef get_split(split_name, dataset_dir,\n              file_pattern=None,\n              reader=None, **kwargs):\n\n  _NUM_CLASSES = 600\n  # There are no pose labels, but need to keep this to load models from MPII\n  # trained\n  # Also, now the processing can still avoided by having no loss on pose\n  _NUM_POSE_KEYPOINTS = 16\n  if 'dataset_list_dir' not in kwargs:\n    dataset_list_dir = _HICO_TRINITY_LIST_DIR\n  else:\n    dataset_list_dir = kwargs['dataset_list_dir']\n  _LIST_FN = lambda split, id: \\\n      '%s/%s_split%d.txt' % (\n        dataset_list_dir,\n        split, id)\n\n  kwargs['num_pose_keypoints'] = _NUM_POSE_KEYPOINTS\n  kwargs['num_classes'] = _NUM_CLASSES\n  kwargs['list_fn'] = _LIST_FN\n  input_file_style = '3-col'\n  kwargs.pop('dataset_list_dir', [])\n  return gen_dataset(split_name, dataset_dir,\n                     file_pattern, reader,\n                     pose_dataset_dir=_HICO_TRINITY_POSE_LABEL_DIR,\n                     input_file_style=input_file_style,\n                     **kwargs), _NUM_POSE_KEYPOINTS\n"
  },
  {
    "path": "src/datasets/hmdb51.py",
    "content": "\"\"\"Provides data for the HMDB51 dataset.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom datasets.video_data_utils import gen_dataset\n\n_HMDB51_TRINITY_LIST_DIR = '/data/rgirdhar/Data2/Projects/2016/001_NetVLADVideo/raw/HMDB51/lists/train_test_lists2'\n_HMDB51_TRINITY_POSE_LABEL_DIR = '/scratch/rgirdhar/Datasets/Video/002_HMDB51/processed/features/002_CPM_Pose/'\n_HMDB51_TRINITY_OBJECTS_LABEL_DIR = '/scratch/rgirdhar/Datasets/Video/002_HMDB51/processed/features/001_YOLO9K_cocoDets_denseFilledIn'\n\ndef get_split(split_name, dataset_dir,\n              file_pattern=None,\n              reader=None, **kwargs):\n              # dataset_list_dir=_HMDB51_TRINITY_LIST_DIR,\n              # modality='rgb', num_samples=1,\n              # split_id=1, **kwargs):\n\n  _NUM_CLASSES = 51\n  # There are no pose labels, but need to keep this to load models from MPII\n  # trained\n  # Also, now the processing can still avoided by having no loss on pose\n  _NUM_POSE_KEYPOINTS = 16\n  _LIST_FN = lambda split, id: \\\n      '%s/%s_split%d.txt' % (\n        kwargs['dataset_list_dir'] if 'dataset_list_dir' in kwargs\n        else _HMDB51_TRINITY_LIST_DIR,\n        split, id)\n\n  kwargs['num_pose_keypoints'] = _NUM_POSE_KEYPOINTS\n  kwargs['num_classes'] = _NUM_CLASSES\n  kwargs['list_fn'] = _LIST_FN\n  return gen_dataset(split_name, dataset_dir, file_pattern,\n                     reader,\n                     pose_dataset_dir=_HMDB51_TRINITY_POSE_LABEL_DIR,\n                     objects_dataset_dir=_HMDB51_TRINITY_OBJECTS_LABEL_DIR,\n                     **kwargs), _NUM_POSE_KEYPOINTS\n                     # modality, num_samples, split_id,\n                     # _NUM_CLASSES, _LIST_FN, **kwargs), _NUM_POSE_KEYPOINTS\n"
  },
  {
    "path": "src/datasets/image_read_utils.py",
    "content": "import tensorflow as tf\n\n# TODO: move this to the main train script if useful. Not a good idea to have this inside.\ntf.app.flags.DEFINE_string(\n    'pose_style', 'heatmap',\n    'Select style for pose to be rendered [heatmap/render].')\nFLAGS = tf.app.flags.FLAGS\n\nIM_HT = 256\nIM_WD = 340\n\ndef _get_frame_sublist(start_frame, duration, num_samples, num_consec_frames,\n                       randomFromSegmentStyle=None):\n  # follow segmental architecture\n  res = []\n  step = tf.cast((duration - tf.constant(num_consec_frames)) / \n                 (tf.constant(num_samples)), 'int32')\n  step = tf.maximum(step, 1)\n  cur_end_point = 0\n  if randomFromSegmentStyle is None:\n    if num_samples == 1:\n      randomFromSegmentStyle = True  # because otherwise would not make sense\n    else:\n      randomFromSegmentStyle = False\n  # start_frame = tf.Print(start_frame, [start_frame], 'Using start frame: ')\n  # The following will be printed as many times as the number of read threads\n  if randomFromSegmentStyle:\n    tf.logging.info('Reading in random segment style')\n  else:\n    tf.logging.info('IMP NOTE:: Reading uniform frames')\n  for i in range(num_samples):\n    if randomFromSegmentStyle:\n      res.append(tf.random_uniform([1],\n                                   tf.minimum(start_frame + step * i,\n                                              duration-num_consec_frames-1),\n                                   tf.minimum(start_frame + step * (i+1),\n                                              duration-num_consec_frames),\n                                   dtype='int32')[0])\n    else:\n      res.append(tf.minimum(start_frame + step * i, duration - 1))\n  # To debug\n  # res[0] = tf.Print(res[0], res, 'Offsets:' )\n  [el.set_shape(()) for el in res]\n  return res\n\ndef _get_frame_sublist_SAME_AS_CAFFE(\n  start_frame, duration, num_samples, num_consec_frames,\n  randomFromSegmentStyle=None):\n  # follow segmental architecture\n  res = []\n  avg_duration = tf.cast(duration / tf.constant(num_samples), 'int32')\n  cur_end_point = 0\n  if randomFromSegmentStyle is None:\n    if num_samples == 1:\n      randomFromSegmentStyle = True  # because otherwise would not make sense\n    else:\n      randomFromSegmentStyle = False\n  # start_frame = tf.Print(start_frame, [start_frame], 'Using start frame: ')\n  # The following will be printed as many times as the number of read threads\n  if randomFromSegmentStyle:\n    tf.logging.info('Reading in random segment style')\n  else:\n    tf.logging.info('IMP NOTE:: Reading uniform frames')\n  for i in range(num_samples):\n    if randomFromSegmentStyle:\n      offset = tf.random_uniform([1], 0, avg_duration-num_consec_frames+1,\n                                 dtype=tf.int32)\n      T = tf.cond(tf.greater_equal(avg_duration, num_consec_frames),\n                  lambda: offset + i * avg_duration,\n                  lambda: tf.constant([1]))\n      res.append(T[0])\n    else:\n      T = tf.cond(tf.greater_equal(avg_duration, num_consec_frames),\n                  lambda: (\n                    avg_duration-num_consec_frames+1)/2 + i*avg_duration,\n                  lambda: tf.constant([1]))\n      res.append(T[0])\n  # To debug\n  # res[0] = tf.Print(res[0], res, 'Offsets:' )\n  return res\n\ndef _read_from_disk_spatial(fpath, nframes, num_samples=25, start_frame=0,\n                            file_prefix='', file_zero_padding=4, file_index=1,\n                            dataset_dir='', frame_sublist=None,\n                            randomFromSegmentStyle=None):\n    if frame_sublist is None:\n      frame_sublist = _get_frame_sublist(start_frame, nframes, num_samples, 1,\n                                        randomFromSegmentStyle)\n    allimgs = []\n    with tf.variable_scope('read_rgb_video'):\n        for i in range(num_samples):\n            with tf.variable_scope('read_rgb_image'):\n                prefix = file_prefix + '_' if file_prefix else ''\n                impath = tf.string_join([\n                    tf.constant(dataset_dir + '/'),\n                    fpath, tf.constant('/'),\n                    prefix,\n                    tf.as_string(frame_sublist[i] + file_index,\n                      width=file_zero_padding, fill='0'),\n                    tf.constant('.jpg')])\n                # To debug\n                # impath = tf.Print(impath, [impath], message='Reading image:')\n                img_str = tf.read_file(impath)\n            allimgs.append(img_str)\n    return allimgs\n\n\ndef _read_from_disk_temporal(\n    fpath, nframes, num_samples=25,\n    optical_flow_frames=10, start_frame=0,\n    file_prefix='', file_zero_padding=4, file_index=1,\n    dataset_dir='', frame_sublist=None, randomFromSegmentStyle=None):\n    if frame_sublist is None:\n      frame_sublist = _get_frame_sublist(start_frame, nframes, num_samples,\n                                         optical_flow_frames,\n                                         randomFromSegmentStyle)\n    allimgs = []\n    with tf.variable_scope('read_flow_video'):\n        for i in range(num_samples):\n            with tf.variable_scope('read_flow_image'):\n              flow_img = []\n              for j in range(optical_flow_frames):\n                # To protect for small videos, avoid overshooting the filelist\n                frame_id = frame_sublist[i] + j\n                frame_id = tf.cond(\n                  tf.greater(frame_id, nframes-2),\n                  lambda: nframes-2,\n                  lambda: frame_id)\n\n                with tf.variable_scope('read_flow_channels'):\n                  for dr in ['x', 'y']:\n                    prefix = file_prefix + '_' if file_prefix else ''\n                    impath = tf.string_join([\n                        tf.constant(dataset_dir + '/'),\n                        fpath, tf.constant('/'),\n                        prefix, '%s_' % dr,\n                        tf.as_string(frame_id + file_index,\n                          width=file_zero_padding, fill='0'),\n                        tf.constant('.jpg')])\n                    # impath = tf.Print(impath, [impath], \"Read file: \")\n                    img_str = tf.read_file(impath)\n                    flow_img.append(img_str)\n              allimgs.append(flow_img)\n    return allimgs\n\n\ndef _read_from_disk_pose(\n    fpath, nframes, num_samples=25,\n    pose_frames=5, start_frame=0,\n    file_prefix='', file_zero_padding=4, file_index=1,\n    dataset_dir='', frame_sublist=None, randomFromSegmentStyle=None,\n    file_ext='.jpg'):\n    from custom_ops.custom_ops_factory import read_file_safe\n    if frame_sublist is None:\n      frame_sublist = _get_frame_sublist(start_frame, nframes, num_samples,\n                                         pose_frames,\n                                         randomFromSegmentStyle)\n    allimgs = []\n    with tf.variable_scope('read_pose_video'):\n      for i in range(num_samples):\n        with tf.variable_scope('read_pose_image'):\n          pose_img = []\n          for j in range(pose_frames):\n            # To protect for small videos, avoid overshooting the filelist\n            frame_id = frame_sublist[i] + j\n            frame_id = tf.cond(\n              tf.greater(frame_id, nframes-1),  # there are nframes-1 flow\n              lambda: nframes-1,\n              lambda: frame_id)\n\n            prefix = file_prefix + '_' if file_prefix else ''\n            impath = tf.string_join([\n              tf.constant(dataset_dir + '/'),\n              fpath, tf.constant('/'),\n              prefix,\n              tf.as_string(frame_id + file_index,\n              width=file_zero_padding, fill='0'),\n              tf.constant(file_ext)])\n            # img_str = tf.read_file(impath)\n            img_str = read_file_safe(impath)\n            pose_img.append(img_str)\n          allimgs.append(pose_img)\n    return allimgs\n\n\ndef decode_rgb(img_str):\n  with tf.variable_scope('decode_rgb_frame'):\n    img = tf.image.decode_jpeg(img_str, channels=3)\n    # Always convert before resize, this is a bug in TF\n    # https://github.com/tensorflow/tensorflow/issues/1763\n    # IMPORTANT NOTE: The original netvlad model was trained with the convert\n    # happening after the resize, and hence it's trained with the large values.\n    # It still works if I do that, but I'm training a new netvlad RGB model\n    # with the current setup.\n    img = tf.image.convert_image_dtype(img, dtype=tf.float32)\n  return [img]\n\n\ndef decode_flow(img_str, perImageChannels=1):\n  # IMPORTANT NOTE: I am now resizing the flow frames before running through\n  # the preprocessing. I was not doing that earlier (in the master). This leads\n  # to the 66 number to drop to 63 on HMDB. But it should be fixable by\n  # re-training with this setup\n  with tf.variable_scope('decode_flow_frame'):\n    img = tf.concat([tf.image.decode_jpeg(el, channels=perImageChannels)\n      for el in tf.unstack(img_str)], axis=2)\n    # Always convert before resize, this is a bug in TF\n    # https://github.com/tensorflow/tensorflow/issues/1763\n    img = tf.image.convert_image_dtype(img, dtype=tf.float32)\n  return [img]\n\n\ndef decode_poseJson(img_str, perImageChannels=1):\n  from custom_ops.custom_ops_factory import json_to_pose\n  with tf.variable_scope('decode_poseJson_frame'):\n    pose_style = FLAGS.pose_style\n    img = tf.concat([json_to_pose(\n      el, out_height=IM_HT, out_width=IM_WD,\n      marker_wid=5 if pose_style=='render' else 20,\n      out_style=pose_style)\n      for el in img_str], axis=2)\n    # img = tf.image.resize_images(img, [IM_HT, IM_WD]) # not any faster\n    # TODO: remove the following checks once sure\n    # with tf.control_dependencies(\n    #   [tf.assert_less_equal(img, tf.constant(1.5)),\n    #    tf.assert_greater_equal(img, tf.constant(-0.5))]):\n    #   img = tf.identity(img)\n    # img = tf.image.convert_image_dtype(img, dtype=tf.float32)\n  return [img]\n\n\ndef _decode_from_string(img_str, modality):\n  if modality == 'rgb':\n    img = decode_rgb(img_str)\n  elif modality.startswith('flow'):\n    img = decode_flow(img_str)\n  elif modality.startswith('rgb+flow'):\n    with tf.name_scope('decode_rgbNflow'):\n      img_rgb = decode_rgb(img_str[..., 0])\n      img_flow = decode_flow(img_str[..., 1:])\n      img = [img_rgb[0], img_flow[0]]\n  elif modality.startswith('posejson'):\n    img = decode_poseJson(img_str)\n  elif modality.startswith('pose'):\n    img = decode_flow(img_str, perImageChannels=3)\n  im_ht = tf.reduce_max([tf.shape(el)[-3] for el in img])\n  im_wd = tf.reduce_max([tf.shape(el)[-2] for el in img])\n  img = [tf.image.resize_images(el, [IM_HT, IM_WD]) for el in img]\n  return img, im_ht, im_wd\n"
  },
  {
    "path": "src/datasets/jhmdb21.py",
    "content": "\"\"\"Provides data for the JHMDB21 dataset.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom datasets.video_data_utils import gen_dataset\n\n_JHMDB21_TRINITY_LIST_DIR = '/data/rgirdhar/Data2/Projects/2016/002_VideoRepresentation/StandardData/002_JHMDB/Processed/Lists/train_test_lists/'\n_JHMDB21_TRINITY_POSE_LABEL_DIR = '/data/rgirdhar/Data2/Projects/2016/002_VideoRepresentation/StandardData/002_JHMDB/Processed/Features/001_CPM_Pose/'\n_JHMDB21_TRINITY_OBJECTS_LABEL_DIR = ''\n\ndef get_split(split_name, dataset_dir,\n              file_pattern=None,\n              reader=None, **kwargs):\n              # dataset_list_dir=_JHMDB21_TRINITY_LIST_DIR,\n              # modality='rgb', num_samples=1,\n              # split_id=1, **kwargs):\n\n  _NUM_CLASSES = 21\n  # There are no pose labels, but need to keep this to load models from MPII\n  # trained\n  # Also, now the processing can still avoided by having no loss on pose\n  _NUM_POSE_KEYPOINTS = 16\n  _LIST_FN = lambda split, id: \\\n      '%s/%s_split%d.txt' % (\n        kwargs['dataset_list_dir'] if 'dataset_list_dir' in kwargs\n        else _JHMDB21_TRINITY_LIST_DIR,\n        split, id)\n\n  kwargs['num_pose_keypoints'] = _NUM_POSE_KEYPOINTS\n  kwargs['num_classes'] = _NUM_CLASSES\n  kwargs['list_fn'] = _LIST_FN\n  return gen_dataset(split_name, dataset_dir, file_pattern,\n                     reader,\n                     pose_dataset_dir=_JHMDB21_TRINITY_POSE_LABEL_DIR,\n                     objects_dataset_dir=_JHMDB21_TRINITY_OBJECTS_LABEL_DIR,\n                     **kwargs), _NUM_POSE_KEYPOINTS\n                     # modality, num_samples, split_id,\n                     # _NUM_CLASSES, _LIST_FN, **kwargs), _NUM_POSE_KEYPOINTS\n"
  },
  {
    "path": "src/datasets/mpii.py",
    "content": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport glob\nimport tensorflow as tf\n\nslim = tf.contrib.slim\n\n_FILE_PATTERN = 'mpii_%s_*.tfrecord'\n\nSPLITS_TO_SIZES = {'trainval_train': 8219, 'trainval_val': 6988,\n                   'trainval': 15207,  # 8219 + 6988\n                   'test': 5709}\n\n_NUM_CLASSES = 393  # activities\n\n_NUM_POSE_KEYPOINTS = 16\n\n_ITEMS_TO_DESCRIPTIONS = {\n    'image': 'A color image of varying size.',\n    'label': 'A pose representation, [x1,y1,is_visible1,...]',\n}\n\ndef _tfrecord_file_pattern_to_list(pattern):\n  res = glob.glob(pattern)\n  return sorted(res)\n\n\ndef get_split(split_name, dataset_dir, file_pattern=None, reader=None):\n  \"\"\"Gets a dataset tuple with instructions for reading flowers.\n\n  Args:\n    split_name: A train/validation split name.\n    dataset_dir: The base directory of the dataset sources.\n    file_pattern: The file pattern to use when matching the dataset sources.\n      It is assumed that the pattern contains a '%s' string so that the split\n      name can be inserted.\n    reader: The TensorFlow reader type.\n\n  Returns:\n    A `Dataset` namedtuple.\n\n  Raises:\n    ValueError: if `split_name` is not a valid train/validation split.\n  \"\"\"\n  if split_name not in SPLITS_TO_SIZES:\n    raise ValueError('split name %s was not recognized.' % split_name)\n\n  if not file_pattern:\n    file_pattern = _FILE_PATTERN\n  file_pattern = os.path.join(dataset_dir, file_pattern % split_name)\n  # The following is important to ensure the files are read in order, because\n  # otherwise test time output can be generated in any random order\n  file_pattern = _tfrecord_file_pattern_to_list(file_pattern)\n\n  # Allowing None in the signature so that dataset_factory can use the default.\n  if reader is None:\n    reader = tf.TFRecordReader\n\n  keys_to_features = {\n      'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),\n      'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),\n      'image/class/pose': tf.VarLenFeature(dtype=tf.int64),\n      'image/class/action_label': tf.FixedLenFeature(\n        (), tf.int64, default_value=tf.zeros([], dtype=tf.int64)),\n      'image/height': tf.FixedLenFeature(\n        (), tf.int64, default_value=tf.zeros([], dtype=tf.int64)),\n      'image/width': tf.FixedLenFeature(\n        (), tf.int64, default_value=tf.zeros([], dtype=tf.int64)),\n  }\n\n  items_to_handlers = {\n      'image': slim.tfexample_decoder.Image(),\n      'pose': slim.tfexample_decoder.Tensor('image/class/pose'),\n      'action_label': slim.tfexample_decoder.Tensor('image/class/action_label'),\n      'im_ht': slim.tfexample_decoder.Tensor('image/height'),\n      'im_wd': slim.tfexample_decoder.Tensor('image/width'),\n  }\n\n  decoder = slim.tfexample_decoder.TFExampleDecoder(\n      keys_to_features, items_to_handlers)\n\n  labels_to_names = None\n\n  return slim.dataset.Dataset(\n      data_sources=file_pattern,\n      reader=reader,\n      decoder=decoder,\n      num_samples=SPLITS_TO_SIZES[split_name],\n      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,\n      num_classes=_NUM_CLASSES,\n      labels_to_names=labels_to_names), _NUM_POSE_KEYPOINTS\n"
  },
  {
    "path": "src/datasets/video_data_utils.py",
    "content": "\"\"\"Provides data for the UCF101 dataset.\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport numpy as np\nimport tensorflow as tf\nimport sys\n\nfrom datasets import dataset_utils\nfrom datasets.image_read_utils import _read_from_disk_spatial, \\\n    _decode_from_string, _read_from_disk_temporal, _get_frame_sublist, \\\n    _read_from_disk_pose\nfrom tensorflow.python.platform import tf_logging as logging\nfrom custom_ops.custom_ops_factory import read_json_pose, read_sparse_label\n\nslim = tf.contrib.slim\n\n\nclass PreReadTextLineReader(tf.ReaderBase):\n  @staticmethod\n  def read(lines_queue):\n    # just return the line from this queue.\n    # The queue will be randomized if training and not if not.\n    # Standard tf.TextLineReader will open the file and return line by line, we\n    # don't want that, but want to randomize the whole file. Hence, this solves\n    # this by first reading the whole file into the queue and then just picking\n    # stuff from the queue.\n    video_information = lines_queue.dequeue()\n    return [video_information, video_information]  # make the video_info as the\n                                                   # key for this datapoint as\n                                                   # well\n\n\ndef decode_train_file_line(line, input_file_style='3-col',\n                           input_file_style_label='one-label'):\n  start_frame = 0\n  if input_file_style == '3-col':\n    fpath, nframes, label = tf.decode_csv(\n        line, record_defaults=[[''], [-1], ['']],\n        field_delim=' ')\n  elif input_file_style == '4-col':\n    fpath, start_frame, nframes, label = tf.decode_csv(\n        line, record_defaults=[[''], [-1], [-1], ['']],\n        field_delim=' ')\n  else:\n    raise ValueError('Unknown input file style: {0}'.format(\n      input_file_style))\n\n  if input_file_style_label == 'one-label':\n    label = tf.string_to_number(label, out_type=tf.int32)\n    label.set_shape(())\n  elif input_file_style_label.startswith('multi-label'):\n    nclasses = int(input_file_style_label[len('multi-label'):])\n    label = read_sparse_label(label, nclasses)\n    label.set_shape((nclasses,))\n  return fpath, start_frame, nframes, label\n\n\ndef getReaderFn(num_samples, modality='rgb', dataset_dir='',\n                randomFromSegmentStyle=None,\n                input_file_style='3-col',\n                input_file_style_label='one-label'):\n  def readerFn():\n    class reader_func(tf.ReaderBase):\n      @staticmethod\n      # def read(filename_queue):\n      def read(value):\n        # value = filename_queue.dequeue()\n        fpath, start_frame, nframes, label = decode_train_file_line(\n          value, input_file_style, input_file_style_label)\n        # TODO(rgirdhar): Release the file_prefix='', file_zero_padding=4,\n        # file_index=1 options to the bash script\n        # TODO: Fix the optical_flow_frame number...\n        optical_flow_frames = 1\n        frame_sublist = _get_frame_sublist(0, nframes, num_samples,\n                                           optical_flow_frames,\n                                           randomFromSegmentStyle=randomFromSegmentStyle)\n        # frame_sublist = tf.Print(frame_sublist, frame_sublist, \"frame sublist:\")\n        if modality == 'rgb':\n          assert(len(dataset_dir) >= 1)\n          image_buffer = _read_from_disk_spatial(\n              fpath, nframes, num_samples=num_samples,\n              start_frame=start_frame,\n              file_prefix='image', file_zero_padding=5, file_index=1,\n              dataset_dir=dataset_dir[0],\n              frame_sublist=frame_sublist,\n              randomFromSegmentStyle=randomFromSegmentStyle)\n        elif modality.startswith('flow'):\n          assert(len(dataset_dir) >= 1)\n          optical_flow_frames = int(modality[4:])\n          image_buffer = _read_from_disk_temporal(\n              fpath, nframes, num_samples=num_samples,\n              start_frame=start_frame,\n              optical_flow_frames=optical_flow_frames,\n              file_prefix='flow', file_zero_padding=5, file_index=1,\n              dataset_dir=dataset_dir[0],\n              frame_sublist=frame_sublist,\n              randomFromSegmentStyle=randomFromSegmentStyle)\n        elif modality.startswith('rgb+flow'):\n          assert(len(dataset_dir) >= 2)\n          # in this case, fix the step for both the streams to ensure correspondence\n          optical_flow_frames = int(modality[8:])\n          rgb_image_buffer = _read_from_disk_spatial(\n              fpath, nframes, num_samples=num_samples,\n              start_frame=start_frame,\n              file_prefix='image', file_zero_padding=5, file_index=1,\n              dataset_dir=dataset_dir[0],\n              frame_sublist=frame_sublist)\n          flow_image_buffer = _read_from_disk_temporal(\n              fpath, nframes, num_samples=num_samples,\n              start_frame=start_frame,\n              optical_flow_frames=optical_flow_frames,\n              file_prefix='flow', file_zero_padding=5, file_index=1,\n              dataset_dir=dataset_dir[1],\n              frame_sublist=frame_sublist)\n          image_buffer = zip(rgb_image_buffer, flow_image_buffer)\n          image_buffer = [[el[0]] + el[1] for el in image_buffer]\n        elif modality.startswith('pose'):\n          assert(len(dataset_dir) >= 1)\n          if modality.startswith('posejson'):\n            pose_frames = int(modality[8:])\n            file_ext = '.json'\n          elif modality.startswith('pose'):\n            pose_frames = int(modality[4:])\n            file_ext = '.jpg'\n          image_buffer = _read_from_disk_pose(\n              fpath, nframes, num_samples=num_samples,\n              start_frame=start_frame,\n              pose_frames=pose_frames,\n              file_prefix='image', file_zero_padding=5, file_index=1,\n              dataset_dir=dataset_dir[0],\n              frame_sublist=frame_sublist,\n              randomFromSegmentStyle=randomFromSegmentStyle,\n              file_ext=file_ext)\n        else:\n          logging.error('Unknown modality %s\\n' % modality)\n          raise ValueError()\n        return [image_buffer, label, fpath, frame_sublist, start_frame]\n    return reader_func\n  return readerFn\n\n\ndef decoderFn(\n  reader, num_samples=1, modality='rgb', dataset_dir='',\n  randomFromSegmentStyle=True, num_pose_keypoints=16,\n  pose_dataset_dir=None,\n  num_object_catagories=80, objects_dataset_dir=None):\n  class decoder_func(slim.data_decoder.DataDecoder):\n    @staticmethod\n    def list_items():\n      return ['image', 'action_label', 'pose', 'im_ht', 'im_wd', 'objects']\n\n    @staticmethod\n    def decode(data, items):\n      out = {}\n      # Arguments:\n      # data: Can be 3-col or 4-col CSV. A 3-col would look like \"filepath\n      # nframes class_id\", 4-col will be similar for Charades like dataset\n      # items: The different items to be returned.\n      with tf.name_scope('decode_video'):\n        if modality == 'rgb' or \\\n           modality.startswith('flow') or \\\n           modality.startswith('rgb+flow') or \\\n           modality.startswith('pose'):\n          image_buffer, label, fpath, frame_sublist, start_frame = reader.read(data)\n          # stacking required due to the way queues in main train loop work\n          # image_buffer = tf.stack([tf.stack(_decode_from_string(el, modality)) for\n          #                 el in image_buffer])\n          image_lst = []\n          image_hts = []\n          image_wds = []\n          for im_buf in image_buffer:\n            temp = _decode_from_string(im_buf, modality)\n            image_lst += temp[0]\n            image_hts.append(temp[1])\n            image_wds.append(temp[2])\n          image_buffer = tf.stack(image_lst)\n          im_ht = tf.reduce_max(image_hts)\n          im_wd = tf.reduce_max(image_wds)\n          # image_buffer = tf.stack([\n          #   _decode_from_string(el, modality)[0] for el in image_buffer])\n        else:\n          logging.error('Unknown modality %s\\n' % modality)\n        # since my code gives a 0-1 image, change it back\n        out['image'] = tf.cast(image_buffer * 255.0, tf.uint8)\n        if 'pose' in items:\n          if pose_dataset_dir is None:\n            out['pose'] = [-tf.ones([num_pose_keypoints * 3,], dtype=tf.int64)]\n          else:\n            out['pose'] = [read_json_pose(tf.string_join([\n              pose_dataset_dir, '/', fpath, '/',\n              'image_',\n              tf.as_string(frame_sublist_i+1, width=5, fill='0'),\n              '.json'])) for frame_sublist_i in tf.unstack(frame_sublist)]\n        if 'objects' in items:\n          if objects_dataset_dir is None:\n            out['objects'] = []\n          else:\n            out['objects'] = [tf.read_file(tf.string_join([\n              objects_dataset_dir, '/', fpath, '/',\n              'image_',\n              tf.as_string(frame_sublist_i+1, width=5, fill='0'),\n              '.txt'])) for frame_sublist_i in tf.unstack(frame_sublist)]\n        out['action_label'] = label\n        # The following is the original image size on disk,\n        # on which pose etc would have been computed\n        out['im_wd'] = tf.cast(im_wd, tf.int64)\n        out['im_ht'] = tf.cast(im_ht, tf.int64)\n        return [out[el] for el in items]\n  return decoder_func\n\n\ndef count_frames_file(fpath, frameLevel=True):\n  res = 0\n  with open(fpath, 'r') as fin:\n    for line in fin:\n      if frameLevel:\n        res += int(line.split()[1])\n      else:\n        res += 1\n  return res\n\n\ndef gen_dataset(split_name, dataset_dir, file_pattern=None,\n                reader=None,\n                pose_dataset_dir=None,\n                objects_dataset_dir=None,\n                modality='rgb', num_samples=1,\n                split_id=1, num_classes=0, list_fn=None,\n                input_file_style='3-col',\n                randomFromSegmentStyle=None, num_pose_keypoints=16,\n                num_object_catagories=80,\n                input_file_style_label='one-label'):\n  \"\"\"\n  input_file_style_label: ['one-label'/'multi-label%d' % integer]\n  \"\"\"\n  SPLITS_TO_SIZES = {\n    'train': count_frames_file(list_fn('train', split_id), frameLevel=(num_samples==1)),\n    'test': count_frames_file(list_fn('test', split_id), frameLevel=(num_samples==1)),\n  }\n  if split_name not in SPLITS_TO_SIZES:\n    raise ValueError('split name %s was not recognized.' % split_name)\n\n  _ITEMS_TO_DESCRIPTIONS = {\n    'image': 'A [? x ? x 3] color image.',\n    'label': 'A single integer between 0 and %d' % num_classes,\n  }\n  LIST_FILE = list_fn(split_name, split_id)\n  logging.info('Using file %s' % LIST_FILE)\n  with open(LIST_FILE, 'r') as fin:\n    data_sources = fin.read().splitlines()  # don't randomize here, in testing\n                                            # I'll run without randomizing, and\n                                            # the queue is going to randomize\n                                            # automatically anyway\n\n  # Allowing None in the signature so that dataset_factory can use the default.\n  if not reader:\n    reader = getReaderFn(num_samples, modality, [dataset_dir],\n                         randomFromSegmentStyle, input_file_style,\n                         input_file_style_label)\n\n  labels_to_names = None\n  # if dataset_utils.has_labels(dataset_dir):\n  #   labels_to_names = dataset_utils.read_label_file(dataset_dir)\n\n  return slim.dataset.Dataset(\n      data_sources=data_sources,\n      reader=lambda: PreReadTextLineReader,\n      decoder=decoderFn(reader(), num_samples, modality, [dataset_dir],\n                        randomFromSegmentStyle, num_pose_keypoints,\n                        pose_dataset_dir,\n                        num_object_catagories,\n                        objects_dataset_dir),\n      num_samples=SPLITS_TO_SIZES[split_name],\n      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,\n      num_classes=num_classes,\n      labels_to_names=labels_to_names)\n"
  },
  {
    "path": "src/eval/__init__.py",
    "content": ""
  },
  {
    "path": "src/eval/cap_eval_utils.py",
    "content": "# --------------------------------------------------------\n# Written by Saurabh Gupta\n# Modified by Ishan Misra\n# rgirdhar: Obtained on March-09-2017 from\n# https://github.com/imisra/latent-noise-icnm/blob/master/cap_eval_utils.py\n# --------------------------------------------------------\nimport numpy as np\nfrom scipy.interpolate import interp1d\n\nfrom IPython.core.debugger import Tracer\nimport code\n\ndef calc_pr_ovr(counts, out, K):\n  \"\"\"\n  [P, R, score, ap] = calc_pr_ovr(counts, out, K)\n  Input    :\n    counts : number of occurrences of this word in the ith image\n    out    : score for this image\n    K      : number of references\n  Output   :\n    P, R   : precision and recall\n    score  : score which corresponds to the particular precision and recall\n    ap     : average precision\n  \"\"\"\n  K = np.float64(K)\n  tog = np.hstack((counts[:,np.newaxis].astype(np.float64), out[:, np.newaxis].astype(np.float64)))\n  ind = np.argsort(out)\n  ind = ind[::-1]\n  score = np.array([tog[i,1] for i in ind])\n  sortcounts = np.array([tog[i,0] for i in ind])\n\n  tp = sortcounts*(1.-1./K);\n  fp = sortcounts.copy();\n  for i in xrange(sortcounts.shape[0]):\n    if sortcounts[i] > 1:\n      fp[i] = 0.;\n    elif sortcounts[i] == 0:\n      fp[i] = 1.;\n    elif sortcounts[i] == 1:\n      fp[i] = 1./K;\n  \n  P = np.cumsum(tp)/(np.cumsum(tp) + np.cumsum(fp));\n\n  # c = accumarray(sortcounts(:)+1, 1);\n  c = [np.sum(np.array(sortcounts) == i) for i in xrange(int(max(sortcounts)+1))]\n  ind = np.array(range(0, len(c)));\n  numinst = ind*c*(K-1.)/K;\n  numinst = np.sum(numinst, axis = 0)\n  R = np.cumsum(tp)/numinst\n  \n  ap = voc_ap(R,P)\n  return P, R, score, ap\n\n\ndef calc_pr_ovr_noref(counts, out):\n  \"\"\"\n  [P, R, score, ap] = calc_pr_ovr(counts, out, K)\n  Input    :\n    counts : number of occurrences of this word in the ith image\n    out    : score for this image\n    K      : number of references\n  Output   :\n    P, R   : precision and recall\n    score  : score which corresponds to the particular precision and recall\n    ap     : average precision\n  \"\"\" \n  #binarize counts\n  counts = np.array(counts > 0, dtype=np.float32);\n  tog = np.hstack((counts[:,np.newaxis].astype(np.float64), out[:, np.newaxis].astype(np.float64)))\n  ind = np.argsort(out)\n  ind = ind[::-1]\n  score = np.array([tog[i,1] for i in ind])\n  sortcounts = np.array([tog[i,0] for i in ind])\n\n  tp = sortcounts;\n  fp = sortcounts.copy();\n  for i in xrange(sortcounts.shape[0]):\n    if sortcounts[i] >= 1:\n      fp[i] = 0.;\n    elif sortcounts[i] < 1:\n      fp[i] = 1.;\n  P = np.cumsum(tp)/(np.cumsum(tp) + np.cumsum(fp));\n\n  numinst = np.sum(counts);\n\n  R = np.cumsum(tp)/numinst\n\n  ap = voc_ap(R,P)\n  return P, R, score, ap\n\n\ndef voc_ap(rec, prec):\n  \"\"\"\n  ap = voc_ap(rec, prec)\n  Computes the AP under the precision recall curve.\n  \"\"\"\n\n  rec = rec.reshape(rec.size,1); prec = prec.reshape(prec.size,1)\n  z = np.zeros((1,1)); o = np.ones((1,1));\n  mrec = np.vstack((z, rec, o))\n  mpre = np.vstack((z, prec, z))\n  for i in range(len(mpre)-2, -1, -1):\n    mpre[i] = max(mpre[i], mpre[i+1])\n\n  I = np.where(mrec[1:] != mrec[0:-1])[0]+1;\n  ap = 0;\n  for i in I:\n    ap = ap + (mrec[i] - mrec[i-1])*mpre[i];\n  return ap\n\ndef compute_precision_score_mapping(thresh, prec, score):\n  ind = np.argsort(thresh);\n  thresh = thresh[ind];\n  prec = prec[ind];\n  for i in xrange(1, len(prec)):\n    prec[i] = max(prec[i], prec[i-1]);\n  \n  indexes = np.unique(thresh, return_index=True)[1]\n  indexes = np.sort(indexes);\n  thresh = thresh[indexes]\n  prec = prec[indexes]\n  \n  thresh = np.vstack((min(-1000, min(thresh)-1), thresh[:, np.newaxis], max(1000, max(thresh)+1)));\n  prec = np.vstack((prec[0], prec[:, np.newaxis], prec[-1]));\n  \n  f = interp1d(thresh[:,0], prec[:,0])\n  val = f(score)\n  return val\n\ndef human_agreement(gt, K):\n  \"\"\"\n  function [prec, recall] = human_agreement(gt, K)\n  \"\"\"\n  c = np.zeros((K+1,1), dtype=np.float64)\n  # namespace = globals().copy()\n  # namespace.update(locals())\n  # code.interact(local=namespace)\n\n  for i in xrange(len(gt)):\n    if gt[i]<K+1:\n      c[gt[i]] += 1;\n  #maxRun = len(gt);  \n  # if len(gt) > K+1:\n  #   print 'warning: '\n  #   maxRun = K+1;\n  # for i in xrange(maxRun):\n  #   c[gt[i]] += 1;\n  \n  c = c/np.sum(c);\n  ind = np.array(range(len(c)))[:, np.newaxis]\n\n  n_tp = sum(ind*(ind-1)*c)/K;\n  n_fp = c[1]/K;\n  numinst = np.sum(c * (K-1) * ind) / K;\n  prec = n_tp / (n_tp+n_fp);\n  recall = n_tp / numinst;\n  \n  \n  return prec, recall\n\n#follows from http://arxiv.org/pdf/1312.4894v2.pdf (Sec 4.2)\ndef compute_warpstyle_pr(gtLabel, predMat, topK):\n  assert gtLabel.shape == predMat.shape, 'gt {}; pred {}'.format(gtLabel.shape, predMat.shape)\n  gtLabel = gtLabel.astype(np.float64)\n  predMat = predMat.astype(np.float64)\n  numTags = gtLabel.shape[1];\n  numIm = gtLabel.shape[0];\n\n  #first look at topK predictions per image\n  topPreds = np.zeros_like(predMat);\n  for imInd in range(numIm):\n    topKInds = im_utils.maxk(predMat[imInd,...], topK);\n    topPreds[imInd, topKInds] = 1;\n  # tb.print_stack();namespace = globals().copy();namespace.update(locals());code.interact(local=namespace)\n  gtLabel = (gtLabel > 0).astype(np.float64)\n  topPreds = (topPreds > 0).astype(np.float64)\n  corrMat = np.logical_and(gtLabel, topPreds).astype(np.float64)\n  nc_per_tag = corrMat.sum(axis=0).astype(np.float64);\n  ng_per_tag = gtLabel.sum(axis=0).astype(np.float64);\n  np_per_tag = topPreds.sum(axis=0).astype(np.float64);\n  #mean per-class\n  perclass_recall = 0.0;\n  perclass_precision = 0.0;\n  eps = 1e-6;\n  for t in range(numTags):\n    cr = nc_per_tag[t]/(ng_per_tag[t]+eps);\n    cp = nc_per_tag[t]/(np_per_tag[t]+eps);\n    perclass_precision += cp;\n    perclass_recall += cr;\n  perclass_precision = (1.0/numTags) * perclass_precision;\n  perclass_recall = (1.0/numTags) * perclass_recall;\n\n  #overall\n  overall_recall = nc_per_tag.sum()/(ng_per_tag.sum()+eps);\n  overall_precision = nc_per_tag.sum()/(np_per_tag.sum()+eps);\n  return perclass_precision, perclass_recall, overall_precision, overall_recall;\n\ndef print_benchmark_latex(evalFile, vocab = None, sortBy = \"words\", \\\n  printWords = False, printPos = True, printAgg = False, possOrder=None):\n  #evalFile has the following ['details', 'agg', 'vocab', 'imdb'] \n  evalData = sg_utils.load_variables(evalFile);\n  if vocab==None:\n    vocab = evalData['vocab'];\n  if 'details' in evalData:\n    details = evalData['details'];\n  else:\n    details = evalData;\n  ap = details['ap'];\n  prec_at_human_rec = details['prec_at_human_rec'];\n  human_prec = details['prec_at_human_rec'];\n  words = vocab['words'];\n  ind = 0;\n  if possOrder is None:\n    possOrder = ['NN', 'VB', 'JJ', 'DT', 'PRP', 'IN', 'other']\n  print ' '.join(possOrder);\n  for pos in possOrder:\n    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]\n    ind = np.asarray(ind,dtype=np.int32)\n    if any( np.isnan(ap[0,ind] )):\n       #print 'nan numbers ... skipping them for mean'\n       print 'nan numbers ... setting them to zero for mean stats'\n       ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0;\n    print '%.1f &'%(100*np.mean(ap[0,ind])),\n  print '%.1f & &'%(100*np.mean(ap[0, :]))\n  for pos in possOrder:\n    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]\n    ind = np.asarray(ind,dtype=np.int32)\n    if any( np.isnan(prec_at_human_rec[0,ind] )) or \\\n       any( np.isnan(human_prec[0,ind] )) :\n       #print 'nan numbers ... skipping them for mean'\n       print 'nan numbers ... setting them to zero for mean stats'\n       prec_at_human_rec[0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0;\n       human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0;\n    print '%.1f &'%(100*np.mean(prec_at_human_rec[0,ind])),\n  print '%.1f \\\\\\\\'%(100*np.mean(prec_at_human_rec[0, :]))\n  \n\n\n\ndef print_benchmark_plain(evalFile, vocab = None, \\\n  sortBy = \"words\", printWords = False, printPos = True, printAgg = False):\n  #evalFile has the following ['details', 'agg', 'vocab', 'imdb'] \n  evalData = sg_utils.load_variables(evalFile);\n  if vocab==None:\n    vocab = evalData['vocab'];\n  if 'details' in evalData:\n    details = evalData['details'];\n  else:\n    details = evalData;\n  ap = details['ap'];\n  prec_at_human_rec = details['prec_at_human_rec'];\n  human_prec = details['prec_at_human_rec'];\n  words = vocab['words'];\n  ind = 0;\n\n  if sortBy == \"words\":\n    srtInds = np.argsort(words);\n  elif sortBy == \"ap\":\n    srtInds = np.argsort(ap);\n    srtInds = srtInds[0];\n    srtInds = srtInds[::-1];\n  if printWords == True:\n    print \"{:>50s}\".format(\"-\"*50)\n    print \"{:^50s}\".format(\"Word metrics\")\n    print \"{:>50s}\".format(\"-\"*50)\n    print \"{:>15s} {:>8s} {:>6s} :     {:^5s}     {:^5s}\". \\\n      format(\"Words\",\"POS\",\"Counts\",\"mAP\", \"p@H\")\n    for i in srtInds:\n      print \"{:>15s} {:>8s} {:6d} :     {:5.2f}     {:5.2f}\". \\\n        format(words[i], vocab['poss'][i], vocab['counts'][i], 100*np.mean(ap[0, i]), 100*np.mean(prec_at_human_rec[0, i]));\n\n  if printPos:\n    print \"{:>50s}\".format(\"-\"*50)\n    print \"{:^50s}\".format(\"POS metrics\")\n    print \"{:>50s}\".format(\"-\"*50)\n    print \"{:>15s} :     {:^5s}     {:^5s}     {:^5s}\". \\\n    format(\"POS\", \"mAP\", \"p@H\", \"h\")\n\n    for pos in list(set(vocab['poss'])):\n      ind = [i for i,x in enumerate(vocab['poss']) if pos == x]\n      ind = np.asarray(ind)\n      if any( np.isnan(ap[0,ind] )) or \\\n         any( np.isnan(prec_at_human_rec[0,ind] )) or \\\n         any( np.isnan(human_prec[0,ind] )) :\n         print 'nan numbers ... setting them to zero for mean stats'\n         ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0;\n         prec_at_human_rec[0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0;\n         human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0;\n      print \"{:>11s} [{:4d}]:     {:5.2f}     {:5.2f}     {:5.2f}\". \\\n        format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \\\n        100*np.mean(human_prec[0, ind]))\n\n  if printAgg:\n    print \"{:>50s}\".format(\"-\"*50)\n    print \"{:^50s}\".format(\"Agg metrics\")\n    print \"{:>50s}\".format(\"-\"*50)\n    print \"{:>15s} :     {:^5s}     {:^5s}     {:^5s}\". \\\n      format(\"agg\", \"mAP\", \"p@H\", \"h\")\n    pos = 'all';\n    ind = srtInds;\n    ind = np.asarray(ind);\n    if any( np.isnan(ap[0,ind] )) or \\\n         any( np.isnan(prec_at_human_rec[0,ind] )) or \\\n         any( np.isnan(human_prec[0,ind] )) :\n         print 'nan numbers ... setting them to zero for mean stats'\n         ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0;\n         prec_at_human_rec[0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0;\n         human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0;\n    print \"{:>11s} [{:^4d}]     :     {:^5.2f}     {:5.2f}     {:5.2f}\". \\\n      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \\\n        100*np.mean(human_prec[0, ind]))\n"
  },
  {
    "path": "src/eval/utils.py",
    "content": "from eval.cap_eval_utils import calc_pr_ovr_noref\nimport numpy as np\n\ndef compute_map(all_logits, all_labels):\n  num_classes = all_logits.shape[1]\n  APs = []\n  for cid in range(num_classes):\n    this_logits = all_logits[:, cid]\n    this_labels = (all_labels == cid).astype('float32')\n    if np.sum(this_labels) == 0:\n      print('No positive videos for class {}. Ignoring...'.format(cid))\n      continue\n    _, _, _, ap = calc_pr_ovr_noref(this_labels, this_logits)\n    APs.append(ap)\n  mAP = np.mean(APs)\n  return mAP, APs\n"
  },
  {
    "path": "src/eval.py",
    "content": "\"\"\"Generic evaluation script that evaluates a model using a given dataset.\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport math\nimport argparse\nimport sys\nimport tensorflow as tf\nimport pprint\nimport os\nimport math\nimport cv2\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom tqdm import tqdm\nimport pdb\n\nfrom datasets import dataset_factory\nsys.path.append('../models/slim')\nfrom nets import nets_factory\nfrom preprocessing import preprocessing_factory\nfrom config import cfg, cfg_from_file, cfg_from_list, get_output_dir\nfrom eval.utils import compute_map\nfrom preprocess_pipeline import get_input\n\nslim = tf.contrib.slim\n\ndef parse_args():\n  \"\"\"\n  Parse input arguments\n  \"\"\"\n  parser = argparse.ArgumentParser(description='Train a keypoint regressor.')\n  parser.add_argument('--cfg', dest='cfg_file',\n                      help='optional config file',\n                      default=None, type=str)\n  parser.add_argument('--gpu', dest='gpu',\n                      help='GPU to use for running this.',\n                      default='0', type=str)\n  parser.add_argument('--save', dest='save', action='store_const',\n                      const=True, default=False,\n                      help='Set to save the features. Works only in mAP mode. '\n                           '(Set in cfg).')\n  parser.add_argument('--outfpath', default=None,\n                      help='(Optional) Give a custom path to save the features. '\n                           'By def. picks a path in ckpt directory.')\n  parser.add_argument('--preprocs', default=[], nargs='*',\n                      help='Set additional preprocs to do when testing. Eg. '\n                           'can put \\'flips\\'. This will flip images before '\n                           'pushing through the network. Can be useful for '\n                           'late fusion of multiple features.')\n  parser.add_argument('--ept', dest='ept', nargs='+', type=str, default=[],\n                      help='Optional end point to store. '\n                           'By def store the softmax logits.')\n  parser.add_argument('--split_name', default=None, type=str,\n                      help='Set to change the dataset split to run on. '\n                           'Eg, \\'train\\' or \\'test\\'.')\n  parser.add_argument('--frames_per_video', default=None, type=int,\n                      help='Set to change the '\n                           'cfg.TRAIN.VIDEO_FRAMES_PER_VIDEO.')\n  parser.add_argument('--dataset_list_dir', default=None, type=str,\n                      help='Set to change the train_test_lists dir.')\n  args = parser.parse_args()\n  if args.cfg_file is not None:\n    cfg_from_file(args.cfg_file)\n\n  # Change config for some options\n  if args.split_name is not None:\n    cfg.TEST.DATASET_SPLIT_NAME = args.split_name\n  if args.frames_per_video is not None:\n    cfg.TEST.VIDEO_FRAMES_PER_VIDEO = args.frames_per_video\n  if args.outfpath is not None:\n    args.save = True\n  return args, cfg\n\n\ndef mkdir_p(dpath):\n  try:\n    os.makedirs(dpath)\n  except:\n    pass\n\n\ndef main():\n  args, cfg = parse_args()\n  train_dir = get_output_dir('default' if args.cfg_file is None\n                             else args.cfg_file)\n  os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu\n\n  print('Using Config:')\n  pprint.pprint(cfg)\n\n  tf.logging.set_verbosity(tf.logging.INFO)\n  with tf.Graph().as_default():\n    tf_global_step = slim.get_or_create_global_step()\n\n    ######################\n    # Select the dataset #\n    ######################\n    kwargs = {}\n    if cfg.TEST.VIDEO_FRAMES_PER_VIDEO > 1:\n      kwargs['num_samples'] = cfg.TEST.VIDEO_FRAMES_PER_VIDEO\n      kwargs['modality'] = cfg.INPUT.VIDEO.MODALITY\n      kwargs['split_id'] = cfg.INPUT.SPLIT_ID\n    if args.dataset_list_dir is not None:\n      kwargs['dataset_list_dir'] = args.dataset_list_dir\n    elif cfg.DATASET_LIST_DIR != '':\n      kwargs['dataset_list_dir'] = cfg.DATASET_LIST_DIR\n    if cfg.INPUT_FILE_STYLE_LABEL != '':\n      kwargs['input_file_style_label'] = cfg.INPUT_FILE_STYLE_LABEL\n    dataset, num_pose_keypoints = dataset_factory.get_dataset(\n        cfg.DATASET_NAME, cfg.TEST.DATASET_SPLIT_NAME, cfg.DATASET_DIR,\n        **kwargs)\n\n    ####################\n    # Select the model #\n    ####################\n    network_fn = nets_factory.get_network_fn(\n        cfg.MODEL_NAME,\n        num_classes=dataset.num_classes,\n        num_pose_keypoints=num_pose_keypoints,\n        is_training=False,\n        cfg=cfg)\n\n    ##############################################################\n    # Create a dataset provider that loads data from the dataset #\n    ##############################################################\n    provider = slim.dataset_data_provider.DatasetDataProvider(\n        dataset,\n        shuffle=False,\n        num_epochs=1,\n        common_queue_capacity=2 * cfg.TEST.BATCH_SIZE,\n        common_queue_min=cfg.TEST.BATCH_SIZE)\n    [image, action_label] = get_input(provider, cfg,\n                                      ['image', 'action_label'])\n    # label -= FLAGS.labels_offset\n\n    #####################################\n    # Select the preprocessing function #\n    #####################################\n    preprocessing_name = cfg.MODEL_NAME\n    image_preprocessing_fn = preprocessing_factory.get_preprocessing(\n        preprocessing_name,\n        is_training=False)\n\n    eval_image_size = cfg.TRAIN.IMAGE_SIZE or network_fn.default_image_size\n\n    image = image_preprocessing_fn(\n      image, eval_image_size, eval_image_size,\n      resize_side_min=cfg.TRAIN.RESIZE_SIDE,\n      resize_side_max=cfg.TRAIN.RESIZE_SIDE)\n\n    # additional preprocessing as required\n    if 'flips' in args.preprocs:\n      tf.logging.info('Flipping all images while testing!')\n      image = tf.stack([\n        tf.image.flip_left_right(el) for el in tf.unstack(image)])\n\n    images, action_labels = tf.train.batch(\n      [image, action_label],\n      batch_size=cfg.TEST.BATCH_SIZE,\n      # following is because if there are more, the order of batch can be\n      # different due to different speed... so avoid that\n      # http://stackoverflow.com/questions/35001027/does-batching-queue-tf-train-batch-not-preserve-order#comment57731040_35001027\n      # num_threads=1 if args.save else cfg.NUM_PREPROCESSING_THREADS,\n      num_threads=1,  # The above was too unsafe as sometimes I forgot --save\n                      # and it would just randomize the whole thing.\n                      # This is very important so\n                      # shifting to this by default. Better safe than sorry.\n      allow_smaller_final_batch=True if cfg.TEST.VIDEO_FRAMES_PER_VIDEO == 1\n                                else False,  # because otherwise we need to\n                                             # average logits over the frames,\n                                             # and that needs first dimensions\n                                             # to be fully defined\n      capacity=5 * cfg.TEST.BATCH_SIZE)\n\n    ####################\n    # Define the model #\n    ####################\n    logits, end_points = network_fn(images)\n    end_points['images'] = images\n\n    if cfg.TEST.MOVING_AVERAGE_DECAY:\n      variable_averages = tf.train.ExponentialMovingAverage(\n          cfg.TEST.MOVING_AVERAGE_DECAY, tf_global_step)\n      variables_to_restore = variable_averages.variables_to_restore(\n          slim.get_model_variables())\n      variables_to_restore[tf_global_step.op.name] = tf_global_step\n    else:\n      variables_to_restore = slim.get_variables_to_restore()\n\n    predictions = tf.argmax(logits, 1)\n    if cfg.TRAIN.LOSS_FN_ACTION.startswith('multi-label'):\n      logits = tf.sigmoid(logits)\n    else:\n      logits = tf.nn.softmax(logits, -1)\n    labels = tf.squeeze(action_labels)\n    end_points['labels'] = labels\n\n    # Define the metrics:\n    names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({\n        'Accuracy': slim.metrics.streaming_accuracy(predictions, labels),\n        # 'Recall@5': slim.metrics.streaming_recall_at_k(\n        #     logits, labels, 5),\n    })\n\n    # Print the summaries to screen.\n    for name, value in names_to_values.iteritems():\n      summary_name = 'eval/%s' % name\n      op = tf.summary.scalar(summary_name, value, collections=[])\n      op = tf.Print(op, [value], summary_name)\n      tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)\n\n    # TODO(sguada) use num_epochs=1\n    if cfg.TEST.MAX_NUM_BATCHES:\n      num_batches = cfg.TEST.MAX_NUM_BATCHES\n    else:\n      # This ensures that we make a single pass over all of the data.\n      num_batches = math.ceil(dataset.num_samples / float(cfg.TEST.BATCH_SIZE))\n\n    # just test the latest trained model\n    checkpoint_path = cfg.TEST.CHECKPOINT_PATH or train_dir\n    if tf.gfile.IsDirectory(checkpoint_path):\n      checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)\n    else:\n      checkpoint_path = checkpoint_path\n    checkpoint_step = int(checkpoint_path.split('-')[-1])\n\n    tf.logging.info('Evaluating %s' % checkpoint_path)\n\n    config = tf.ConfigProto()\n    config.gpu_options.allow_growth = True\n    config.allow_soft_placement = True\n    summary_writer = tf.summary.FileWriter(logdir=train_dir)\n\n    if cfg.TEST.EVAL_METRIC == 'mAP' or args.save or args.ept:\n      from tensorflow.python.training import supervisor\n      from tensorflow.python.framework import ops\n      import h5py\n      saver = tf.train.Saver(variables_to_restore)\n      sv = supervisor.Supervisor(graph=ops.get_default_graph(),\n                                 logdir=None,\n                                 summary_op=None,\n                                 summary_writer=summary_writer,\n                                 global_step=None,\n                                 saver=None)\n      all_labels = []\n      end_points['logits'] = logits\n      end_points_to_save = args.ept + ['logits']\n      end_points_to_save = list(set(end_points_to_save))\n      all_feats = dict([(ename, []) for ename in end_points_to_save])\n      with sv.managed_session(\n          '', start_standard_services=False,\n          config=config) as sess:\n        saver.restore(sess, checkpoint_path)\n        sv.start_queue_runners(sess)\n        for j in tqdm(range(int(math.ceil(num_batches)))):\n          feats = sess.run([\n            action_labels,\n            [end_points[ename] for ename in end_points_to_save]])\n          all_labels.append(feats[0])\n          for ept_id, ename in enumerate(end_points_to_save):\n            all_feats[ename].append(feats[1][ept_id])\n      APs = []\n      all_labels = np.concatenate(all_labels)\n      if args.save or args.ept:\n        res_outdir = os.path.join(train_dir, 'Features/')\n        mkdir_p(res_outdir)\n        outfpath = args.outfpath or os.path.join(\n          res_outdir, 'features_ckpt_{}_{}.h5'.format(\n          cfg.TEST.DATASET_SPLIT_NAME,\n          checkpoint_step))\n        print('Saving the features/logits/labels to {}'.format(outfpath))\n        with h5py.File(outfpath, 'a') as fout:\n          for ename in end_points_to_save:\n            if ename in fout:\n              tf.logging.warning('Deleting {} from output HDF5 to write the '\n                                 'new features.'.format(ename))\n              del fout[ename]\n            if ename == 'labels':\n              feat_to_save = np.array(all_feats[ename])\n            else:\n              feat_to_save = np.concatenate(all_feats[ename])\n            try:\n              fout.create_dataset(\n                ename, data=feat_to_save,\n                compression='gzip', compression_opts=9)\n            except:\n              pdb.set_trace()  # manually deal with it and continue\n          if 'labels' in fout:\n            del fout['labels']\n          fout.create_dataset(\n            'labels', data=all_labels,\n            compression='gzip', compression_opts=9)\n\n      if args.ept:\n        tf.logging.info('Evaluation had --ept passed in. '\n                        'This indicates script was used for feature '\n                        'extraction. Hence, not performing any evaluation.')\n        return\n      # Evaluation code\n      all_logits = np.concatenate(all_feats['logits'])\n      acc = np.mean(\n        all_logits.argmax(axis=1) == all_labels)\n      mAP = compute_map(all_logits, all_labels)[0]\n      print('Mean AP: {}'.format(mAP))\n      print('Accuracy: {}'.format(acc))\n      summary_writer.add_summary(tf.Summary(value=[\n        tf.Summary.Value(\n          tag='mAP/{}'.format(cfg.TEST.DATASET_SPLIT_NAME),\n          simple_value=mAP)]),\n        global_step=checkpoint_step)\n      summary_writer.add_summary(tf.Summary(value=[\n        tf.Summary.Value(\n          tag='Accuracy/{}'.format(cfg.TEST.DATASET_SPLIT_NAME),\n          simple_value=acc)]),\n        global_step=checkpoint_step)\n    else:\n      slim.evaluation.evaluate_once(\n        master='',\n        checkpoint_path=checkpoint_path,\n        logdir=train_dir,\n        num_evals=num_batches,\n        eval_op=names_to_updates.values(),\n        variables_to_restore=variables_to_restore,\n        session_config=config)\n\n\nif __name__ == '__main__':\n  main()\n"
  },
  {
    "path": "src/loss.py",
    "content": "import tensorflow as tf\nslim = tf.contrib.slim\n\ndef gen_losses(\n  labels_action, logits_action, loss_type_action, num_action_classes,\n  action_loss_wt,\n  labels_pose, logits_pose, loss_type_pose, labels_pose_valid, pose_loss_wt,\n  end_points, cfg):\n\n  with tf.name_scope('LossFn'):\n    if loss_type_pose and logits_pose.get_shape().as_list()[-1] > 0:\n      with tf.name_scope('PoseLoss'):\n        # Loss over the pose\n        if labels_pose.get_shape().as_list() != \\\n           logits_pose.get_shape().as_list():\n          tf.logging.info('Sizes of logits {} and labels {} are different. '\n                          'Change the cfg.FINAL_POSE_HMAP_SIDE to avoid '\n                          'a resize operation.'.format(\n                            logits_pose.get_shape().as_list(),\n                            labels_pose.get_shape().as_list()))\n          labels_pose = tf.image.resize_images(\n            labels_pose, logits_pose.get_shape().as_list()[-3:-1])\n        # ignore the unknown channels, set those channels to 0 to incur no loss\n\n        # Following needs defining the gradient for this...\n        # labels_pose = zero_out_channels(labels_pose, labels_pose_valid)\n        # logits_pose = zero_out_channels(logits_pose, labels_pose_valid)\n\n        with tf.name_scope('ValidPoseLoss'):\n          channels_valid = tf.unstack(labels_pose_valid, axis=-1)\n          channels_logits = tf.unstack(logits_pose, axis=-1)\n          channels_labels = tf.unstack(labels_pose, axis=-1)\n          loss_elements = []\n          pose_loss_mask = []\n          for v, lbl, lgt in zip(channels_valid, channels_logits, channels_labels):\n            if cfg.TRAIN.LOSS_FN_POSE_SAMPLED:\n              # To make it harder\n              neg_areas = tf.equal(lgt, 0)\n              pos_areas = tf.greater(lgt, 0)\n              total_area = lgt.shape.num_elements()\n              pos_area_ratio = tf.reduce_sum(tf.to_float(pos_areas)) / total_area\n              # select that much of neg area\n              neg_areas_selected = tf.to_float(tf.less(tf.random_uniform(\n                tf.shape(lgt), 0, 1.0),\n                pos_area_ratio)) * tf.to_float(neg_areas)\n              # keep all positive pixels\n              mask = tf.greater(neg_areas_selected + tf.to_float(\n                tf.greater(lbl, 0)), 0)\n              mask = tf.to_float(mask)\n              lgt = lgt * mask  # just do loss over this subset\n              lbl = lbl * mask\n              loss_val = 0.5 * tf.reduce_mean(tf.square(lbl - lgt), axis=[1,2])\n            else:\n              mask = tf.ones(tf.shape(lgt))\n              loss_val = 0.5 * tf.reduce_sum(\n                tf.square(lbl - lgt), axis=[1,2]) / tf.reduce_sum(mask)\n            pose_loss_mask.append(tf.expand_dims(mask, -1))\n            if loss_type_pose == 'l2':\n              L = tf.reduce_mean(tf.where(\n                v,\n                loss_val,\n                [0] * v.get_shape().as_list()[0]))\n            elif loss_type_pose == '':\n              L = 0\n            else:\n              raise ValueError('Invalid loss {}'.format(loss_type_pose))\n            loss_elements.append(L)\n        end_points['PoseLossMask'] = tf.concat(pose_loss_mask, axis=-1)\n        tot_loss = tf.reduce_sum(loss_elements, name='ValidPoseEucLoss')\n        tf.losses.add_loss(tot_loss * pose_loss_wt)\n\n    with tf.name_scope('ActionLoss'):\n      # TODO (rgirdhar): Add the option of having -1 label, so ignore that one\n      if loss_type_action == 'softmax-xentropy':\n        tf.losses.softmax_cross_entropy(\n          onehot_labels=slim.one_hot_encoding(\n            labels_action,\n            num_action_classes),\n          logits=logits_action,\n          weights=action_loss_wt)\n      elif loss_type_action == 'l2':\n        tf.losses.mean_squared_error(\n          labels=slim.one_hot_encoding(\n            labels_action,\n            num_action_classes),\n          predictions=logits_action,\n          weights=action_loss_wt)\n      elif loss_type_action == 'multi-label':\n        labels_action = tf.to_float(labels_action)\n        # labels_action = tf.Print(\n        #   labels_action, [labels_action, tf.reduce_sum(labels_action, 1)],\n        #   \"Label action:\")\n        loss = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(\n          targets=labels_action,\n          logits=logits_action,\n          pos_weight=10))\n        tf.losses.add_loss(loss)\n      elif loss_type_action == 'multi-label-2':\n        tf.losses.sigmoid_cross_entropy(\n          multi_class_labels=labels_action,\n          logits=logits_action)\n      elif loss_type_action == '':\n        tf.logging.info('No loss on action')\n      else:\n        raise ValueError('Unrecognized loss {}'.format(loss_type_action))\n"
  },
  {
    "path": "src/preprocess_pipeline.py",
    "content": "import tensorflow as tf\nfrom custom_ops.custom_ops_factory import pose_to_heatmap, render_pose, \\\n    render_objects, extract_glimpse\n\ndef _resize_if_needed(image, max_wd):\n  with tf.name_scope('LimitMaxSizeOriginalImage'):\n    im_ht = tf.shape(image)[-3]\n    im_wd = tf.shape(image)[-2]\n    new_ht = tf.cast(im_ht, tf.float32) * (\n      tf.cast(max_wd, tf.float32) / tf.cast(im_wd, tf.float32))\n    new_ht = tf.cast(new_ht, tf.int64)\n    image = tf.cond(\n      tf.greater(im_wd, max_wd),\n      lambda: tf.image.resize_images(\n        image, tf.cast([new_ht, max_wd], tf.int32)),\n      lambda: tf.cast(image, tf.float32))\n    image = tf.cast(image, tf.uint8)\n  return image\n\n\ndef _replay_augmentation(H, aug_info):\n  # use the augmentation info from the original image to identically transform\n  # the heatmap H\n  with tf.name_scope('ReplayAugmentation'):\n    ## 1. Crop\n    H_wd = tf.shape(H)[-2]\n    H_ht = tf.shape(H)[-3]\n    num_channels = tf.shape(H)[-1]\n    orig_wd = aug_info['image_shape'][-2]\n    orig_ht = aug_info['image_shape'][-3]\n    ratio_x = tf.to_float(H_wd) / tf.to_float(orig_wd)\n    ratio_y = tf.to_float(H_ht) / tf.to_float(orig_ht)\n    start_points = [tf.to_float(aug_info['crop_info'][0]) * ratio_y,\n                    tf.to_float(aug_info['crop_info'][1]) * ratio_x]\n    edge_sides = [tf.to_float(aug_info['crop_info'][2]) * ratio_y,\n                  tf.to_float(aug_info['crop_info'][3]) * ratio_x]\n    H = tf.slice(H,\n                 tf.concat([tf.to_int32(start_points), [0,]], axis=-1),\n                 tf.concat([tf.to_int32(edge_sides), [num_channels,]], axis=-1))\n    ## 2. Flip\n    H = tf.cond(\n      aug_info['whether_flip'],\n      lambda: tf.image.flip_left_right(H),\n      lambda: H)\n  return H\n\n\ndef _get_other_items(provider, stuff, existing_items, new_items):\n  res = []\n  for item in new_items:\n    if item in existing_items:\n      res.append(stuff[existing_items.index(item)])\n    else:\n      res.append(provider.get([item])[0])\n  return res\n\n\ndef get_input(provider, cfg, items):\n  stuff = provider.get(items)\n  if 'image' in items:\n    img_pos = items.index('image')\n    image = stuff[img_pos]\n    # MPII has some huge images, which makes further processing too slow.\n    # So, make image smaller if needed\n    # IMP NOTE: Do not change the orig_im_ht or orig_im_wd, they are used for plotting\n    # the pose and the pose is defined w.r.t to the original image size\n    # Pose Label format: [16x3xn,] : x1,y1,score/isvisible...\n    # if x1 and y1 are both -1, that point is not visible/labeled\n    image = _resize_if_needed(image, cfg.MAX_INPUT_IMAGE_SIZE)\n    if cfg.INPUT.INPUT_IMAGE_FORMAT.startswith('rendered-pose') or \\\n       cfg.INPUT.INPUT_IMAGE_FORMAT.startswith('pose-glimpse'):\n      pose_label, orig_im_ht, orig_im_wd = _get_other_items(\n        provider, stuff, items, ['pose', 'im_ht', 'im_wd'])\n      # pose_label = tf.Print(pose_label, [pose_label], \"Pose Label: \")\n      pose_label_was_list = True\n      if not isinstance(pose_label, list):\n        pose_label_was_list = False\n        pose_label = [pose_label]\n\n      if cfg.INPUT.INPUT_IMAGE_FORMAT.startswith('rendered-pose'):\n        rendered_pose = tf.stack([render_pose(\n          pose_label[i], orig_im_ht, orig_im_wd,\n          # TODO: the following tf.shape is going to read the image irrespective\n          # of whether needed or not to compute shape. However the code isn't\n          # slow so not worrying about it at the moment. But fix it.\n          tf.cast(tf.shape(image)[-2], tf.int64),\n          out_type=cfg.INPUT.INPUT_IMAGE_FORMAT_POSE_RENDER_TYPE) for\n          i in range(len(pose_label))])\n        rendered_pose = tf.image.resize_images(\n          rendered_pose, tf.shape(image)[-3:-1])\n        if not pose_label_was_list:\n          rendered_pose = rendered_pose[0]\n      else:\n        image_glimpse = tf.stack([extract_glimpse(\n          image, pose_label[i], orig_im_ht, orig_im_wd,\n          cfg.TRAIN.IMAGE_SIZE if cfg.INPUT.POSE_GLIMPSE_RESIZE else -1,\n          cfg.INPUT.POSE_GLIMPSE_CONTEXT_RATIO,\n          cfg.INPUT.POSE_GLIMPSE_PARTS_KEEP) for\n          i in range(len(pose_label))])\n\n\n    if cfg.INPUT.INPUT_IMAGE_FORMAT.startswith('rendered-objects'):\n      objects_label, orig_im_ht, orig_im_wd = _get_other_items(\n        provider, stuff, items, ['objects', 'im_ht', 'im_wd'])\n      # pose_label = tf.Print(pose_label, [pose_label], \"Pose Label: \")\n      rendered_objects = tf.stack([render_objects(\n        objects_label[i], orig_im_ht, orig_im_wd,\n        cfg.TRAIN.IMAGE_SIZE, out_channels=80) for\n        i in range(len(objects_label))])\n\n    # Final output\n    if cfg.INPUT.INPUT_IMAGE_FORMAT == 'rendered-pose':\n      image = rendered_pose\n      # debugging\n      # image = tf.tile(tf.reduce_mean(\n      #   image, axis=-1, keep_dims=True), [1, 1, 1, 3])\n    elif cfg.INPUT.INPUT_IMAGE_FORMAT == 'rendered-pose-on-image':\n      image = tf.cast(tf.to_float(image) * 0.5 + \\\n                      tf.to_float(rendered_pose) * 0.5, tf.uint8)\n    elif cfg.INPUT.INPUT_IMAGE_FORMAT == 'rendered-objects':\n      image = rendered_objects\n      # To debug\n      # image = tf.cast(\n      #   tf.to_float(image) * 0.0 + \\\n      #   tf.to_float(tf.image.resize_images(\n      #     tf.reduce_mean(rendered_objects, axis=-1, keep_dims=True),\n      #     tf.shape(image)[-3:-1])) * 1.0,\n      #   tf.uint8)\n    elif cfg.INPUT.INPUT_IMAGE_FORMAT == 'pose-glimpse':\n      image = image_glimpse\n    stuff[img_pos] = image\n  return stuff\n\n\ndef train_preprocess_pipeline(provider, cfg, network_fn, num_pose_keypoints,\n                              image_preprocessing_fn):\n\n  [image, pose_label, orig_im_ht, orig_im_wd, action_label] = get_input(\n    provider, cfg, ['image', 'pose', 'im_ht', 'im_wd', 'action_label'])\n  # for consistency between video and image datasets, convert image datasets to\n  # 1-frame videos\n  if image.get_shape().ndims == 3:\n    image = tf.expand_dims(image, 0)\n    pose_label = [pose_label]\n  train_image_size = cfg.TRAIN.IMAGE_SIZE or network_fn.default_image_size\n\n  # joint preprocessing\n  combined_preproc_flag = False\n  with tf.name_scope('CombinedPreproc'):\n    if num_pose_keypoints > 0 and not cfg.TRAIN.LOSS_FN_POSE == '':\n      combined_preproc_flag = True\n      all_pose_label_hmap = []\n      all_pose_label_valid = []\n      for pl in pose_label:\n        pose_label_hmap, pose_label_valid = pose_to_heatmap(\n          pl, orig_im_ht, orig_im_wd,\n          # small enough for preproc, big enough to see\n          max(200, cfg.TRAIN.FINAL_POSE_HMAP_SIDE),\n          out_channels=num_pose_keypoints,\n          # if needed, do using a conv layer with fixed kernel\n          # would be faster on GPU\n          do_gauss_blur=False,\n          marker_wd_ratio=cfg.HEATMAP_MARKER_WD_RATIO)  # larger => large targets\n        all_pose_label_hmap.append(pose_label_hmap)\n        all_pose_label_valid.append(pose_label_valid)\n      # concat on last axis for now (for preproc), will stack it (like the\n      # valid labels) after that.\n      pose_label_hmap = tf.concat(all_pose_label_hmap, axis=-1)\n      pose_label_valid = tf.stack(all_pose_label_valid)\n\n    # rgirdhar NOTE: This is the most expensive CPU part. My perf was super\n    # slow with the output image sizes being 450x, because it'd first resize\n    # the smallest dimension to 512 or so, and then take a 450 crop from that.\n    # Doing that over RGB+heatmap channels was super slow, and is fixed when\n    # using small sizes (now, 256 & 224 works well). Another issue was the\n    # number of INTRA and INTER PARALLELIZATION THREADS, set in the train.py\n    # which sped up a lot. Also saves from the machines getting stuck by\n    # controlling the number of threads while giving better performance. For\n    # me, the Inter=12 and Intra=4 worked well.\n    preproc_info = {}\n    # since images is 4D vector, need to reshape to pass it through preproc\n    frames_per_video = image.get_shape().as_list()[0]\n    image = tf.concat(tf.unstack(image), axis=-1)\n    image = image_preprocessing_fn(\n      image,\n      train_image_size,\n      train_image_size,\n      resize_side_min=cfg.TRAIN.RESIZE_SIDE,\n      resize_side_max=cfg.TRAIN.RESIZE_SIDE,\n      preproc_info=preproc_info,\n      modality=cfg.INPUT.VIDEO.MODALITY)  # works for image too, rgb by def\n    image = tf.stack(tf.split(\n      image, frames_per_video,\n      axis=image.get_shape().ndims-1))\n    if combined_preproc_flag:\n      pose_label_hmap = _replay_augmentation(pose_label_hmap, preproc_info)\n      pose_label_hmap = tf.image.convert_image_dtype(pose_label_hmap,\n                                                     tf.float32)\n\n      # undo any value scaling that happened while preproc\n      pose_label_hmap -= tf.reduce_min(pose_label_hmap)\n      pose_label_hmap /= (tf.reduce_max(pose_label_hmap) + cfg.EPS)\n      # reduce the size of heatmaps to reduce memory usage in queues\n      pose_label_hmap = tf.image.resize_images(\n        pose_label_hmap,\n        [cfg.TRAIN.FINAL_POSE_HMAP_SIDE,\n         cfg.TRAIN.FINAL_POSE_HMAP_SIDE])\n      pose_label_hmap.set_shape([\n        pose_label_hmap.get_shape().as_list()[0],\n        pose_label_hmap.get_shape().as_list()[1],\n        num_pose_keypoints * frames_per_video])\n      pose_label_hmap = tf.stack(tf.split(\n        pose_label_hmap, frames_per_video,\n        axis=pose_label_hmap.get_shape().ndims-1))\n    else:\n      pose_label_hmap = tf.zeros((0,))  # dummy value, not used\n      pose_label_valid = tf.zeros((0,))  # dummy value, not used\n\n  return image, pose_label_hmap, pose_label_valid, action_label\n"
  },
  {
    "path": "src/restore/__init__.py",
    "content": ""
  },
  {
    "path": "src/restore/model_restorer.py",
    "content": "import numpy as np\nimport h5py\n\nfrom tensorflow.contrib import slim\nfrom tensorflow.python.platform import tf_logging as logging\nfrom tensorflow.python import pywrap_tensorflow\nimport tensorflow as tf\nimport var_name_mapper\n\n\ndef restore_model(checkpoint_path,\n                  variables_to_restore,\n                  ignore_missing_vars=False,\n                  var_name_mapper_type=None):\n  all_ops = []\n  checkpoint_variables = variables_to_restore\n  if checkpoint_path.endswith('.npy'):\n    vars_to_restore_names = [\n      el.name for el in checkpoint_variables]\n    key_name_mapper = var_name_mapper.map(var_name_mapper_type)\n    init_weights = np.load(checkpoint_path).item()\n    init_weights_final = {}\n    vars_restored = []\n    for key in init_weights.keys():\n      for subkey in init_weights[key].keys():\n        final_key_name = key_name_mapper(\n          key + '/' + subkey)\n        if final_key_name not in vars_to_restore_names:\n          logging.info('Not using %s from npy' % final_key_name)\n          continue\n        target_shape = slim.get_model_variables(\n          final_key_name)[0].get_shape().as_list()\n        pretrained_wts = init_weights[key][subkey].copy()\n        target_shape_squeezed = np.delete(\n          target_shape, np.where(np.array(target_shape) == 1))\n        pretrained_shape_squeezed = np.delete(\n          pretrained_wts.shape, np.where(np.array(pretrained_wts.shape) == 1))\n\n        go_ahead = False  # whether or not I'll be able to copy these weights\n        if np.any(target_shape_squeezed !=\n                  pretrained_shape_squeezed):\n          logging.info('Shape mismatch var: %s from npy [%s vs %s]. ' % (\n                       final_key_name, target_shape,\n                       pretrained_wts.shape))\n          if pretrained_shape_squeezed[-2] != target_shape_squeezed[-2]:\n            logging.info('Trying repeating channels to make it similar.')\n            pretrained_wts = np.repeat(\n              np.mean(pretrained_wts, axis=-2, keepdims=True),\n              repeats=target_shape_squeezed[-2],\n              axis=-2)\n            if np.all(target_shape_squeezed == pretrained_wts.shape):\n              logging.info('Success! Copying the hacked weights.')\n              go_ahead = True\n            else:\n              logging.info('Couldnot match the weights still.')\n        else:\n          go_ahead = True\n        if go_ahead:\n          init_weights_final[final_key_name] = \\\n            pretrained_wts\n          vars_restored.append(final_key_name)\n    init_weights = init_weights_final\n    for v in vars_to_restore_names:\n      if v not in vars_restored:\n        logging.fatal('No weights found for %s' % v)\n        if not ignore_missing_vars:\n          raise ValueError()\n    all_ops.append(slim.assign_from_values_fn(init_weights))\n  else:\n    all_ops.append(assign_from_checkpoint_fn(\n      checkpoint_path, checkpoint_variables,\n      ignore_missing_vars=ignore_missing_vars,\n      resize_variables=True))\n  def combined(sess):\n    for op in all_ops:\n      op(sess)\n  return combined\n\n\ndef assign_from_checkpoint_fn(model_path, var_list, ignore_missing_vars=False,\n                              reshape_variables=False, resize_variables=False):\n  \"\"\"Modified function from\n  https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/framework/python/ops/variables.py\n  Mod by rgirdhar to allow for repeating the channels dimension in case a layer\n  does not match. It's useful for setting the first layer in flow models for\n  videos. Does this only when resize_variables is True.\n  \"\"\"\n  \"\"\"Returns a function that assigns specific variables from a checkpoint.\n\n  If ignore_missing_vars is True and no variables are found in the checkpoint\n  it returns None.\n\n  Args:\n    model_path: The full path to the model checkpoint. To get latest checkpoint\n        use `model_path = tf.train.latest_checkpoint(checkpoint_dir)`\n    var_list: A list of `Variable` objects or a dictionary mapping names in the\n        checkpoint to the corresponding variables to initialize. If empty or\n        `None`, it would return `no_op(), None`.\n    ignore_missing_vars: Boolean, if True it would ignore variables missing in\n        the checkpoint with a warning instead of failing.\n    reshape_variables: Boolean, if True it would automatically reshape variables\n        which are of different shape then the ones stored in the checkpoint but\n        which have the same number of elements.\n    resize_variables: Boolean, if True it would repeat the channels to match\n        the target variable dimensions\n\n  Returns:\n    A function that takes a single argument, a `tf.Session`, that applies the\n    assignment operation. If no matching variables were found in the checkpoint\n    then `None` is returned.\n\n  Raises:\n    ValueError: If var_list is empty.\n  \"\"\"\n  if not var_list:\n    raise ValueError('var_list cannot be empty')\n  reader = pywrap_tensorflow.NewCheckpointReader(model_path)\n  if isinstance(var_list, dict):\n    var_dict = var_list\n  else:\n    var_dict = {var.op.name: var for var in var_list}\n  available_vars = {}\n  for var in var_dict:\n    if reader.has_tensor(var):\n      go_ahead = False\n      V = reader.get_tensor(var)\n      ckpt_shape = list(V.shape)\n      target_shape = var_dict[var].get_shape().as_list()\n      if np.all(ckpt_shape == target_shape):\n        go_ahead = True\n      else:\n        if resize_variables:\n          logging.warning('Resizing to assign to variable {} to {} from {}'.format(\n            var, var_dict[var].get_shape().as_list(),\n            V.shape))\n          V = np.repeat(\n            np.mean(V, axis=-2, keepdims=True),\n            repeats=target_shape[-2],\n            axis=-2)\n          ckpt_shape = list(V.shape)\n          if np.all(ckpt_shape == target_shape):\n            logging.info('Was able to match shape, so restoring the var :-)')\n            go_ahead = True\n          else:\n            logging.error('Was not able to match shape, not restoring it!!!')\n            go_ahead = False\n        else:\n          logging.error('Found a shape mismatch. Set resize_var to true to '\n                        'do a hacky shape copy.')\n      if go_ahead:\n        available_vars[var] = V\n    else:\n      logging.warning(\n          'Variable %s missing in checkpoint %s', var, model_path)\n      if not ignore_missing_vars:\n        raise ValueError()\n  return slim.assign_from_values_fn(available_vars)\n\n\ndef get_special_assigns(special_assign_vars):\n  init_wts = {}\n  special_assign_vars = special_assign_vars.split(',')\n  for i in range(len(special_assign_vars) / 2):\n    var_name = special_assign_vars[2*i]\n    file_path = special_assign_vars[2*i+1]\n    with h5py.File(file_path, 'r') as fin:\n      init_wts[var_name] = fin['feat'].value\n    logging.info('Special Assign: %s with a %s array' % (\n      var_name, init_wts[var_name].shape))\n  return slim.assign_from_values_fn(init_wts)\n"
  },
  {
    "path": "src/restore/var_name_mapper.py",
    "content": "def map(var_name_mapping):\n  map_fn = lambda x: x\n  if var_name_mapping == 'placenet365-vgg':\n    map_fn = placenet365_vgg_fn\n  elif var_name_mapping == 'cuhk-action-vgg':\n    map_fn = cuhk_action_vgg\n  elif var_name_mapping == 'cuhk-action-tsn':\n    map_fn = cuhk_action_tsn\n  elif var_name_mapping == 'xiaolonw_action_vgg_hmdb':\n    map_fn = xiaolonw_action_vgg_hmdb\n  else:\n    raise ValueError('Invalid var name mapping')\n  return map_fn\n\n\ndef placenet365_vgg_fn(var_name):\n  final_name = var_name\n  if final_name.split('/')[0].startswith('conv'):\n    final_name = \\\n      final_name.split('/')[0].split('_')[0] + '/' + final_name\n  elif final_name.split('/')[0] == 'fc8a':\n    final_name = final_name.replace('fc8a', 'fc8')\n  return 'vgg_16/' + final_name + ':0'\n\n\ndef cuhk_action_vgg(var_name):\n  final_name = var_name\n  if final_name.split('/')[0].startswith('conv'):\n    final_name = \\\n      final_name.split('/')[0].split('_')[0] + '/' + final_name\n  elif final_name.split('/')[0].startswith('fc8'):\n    final_name = final_name.replace(final_name.split('/')[0], 'fc8')\n  return 'vgg_16/' + final_name + ':0'\n\n\ndef xiaolonw_action_vgg_hmdb(var_name):\n  final_name = var_name\n  if final_name.split('/')[0].startswith('conv'):\n    final_name = \\\n      final_name.split('/')[0].split('_')[0] + '/' + final_name\n  elif final_name.split('/')[0] == 'fc8_hmdb':\n    final_name = final_name.replace('fc8_hmdb', 'fc8')\n  return 'vgg_16/' + final_name + ':0'\n\n\ndef cuhk_action_tsn(var_name):\n  final_name = var_name\n  var_name = final_name.split('/')[-1]\n  if final_name.split('/')[0].endswith('_bn'):\n    if var_name == 'scale':\n      var_name = 'gamma'\n    elif var_name == 'shift':\n      var_name = 'beta'\n    elif var_name == 'mean':\n      var_name = 'moving_mean'\n    elif var_name == 'variance':\n      var_name = 'moving_variance'\n    final_name = \\\n      final_name.split('/')[0][:-3] + '/BatchNorm/' + var_name\n  elif final_name.split('/')[0] == 'fc-action':\n    final_name = 'Logits/Conv/' + var_name\n  else:\n    final_name = final_name.split('/')[0] + '/Conv/' + var_name\n  block_name = final_name.split('/')[0]\n  pos = None\n  if block_name.startswith('inception'):\n    pos = len('inception_xx')\n  elif block_name.startswith('conv'):\n    pos = len('convx')\n  if pos is not None:\n    final_name = final_name[:pos] + '/' + final_name[pos+1:]\n  return 'InceptionV2_TSN/' + final_name + ':0'\n"
  },
  {
    "path": "src/train.py",
    "content": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport argparse\nimport tensorflow as tf\nimport sys\nimport pprint\nimport os\nimport time\nimport numpy as np\nfrom datetime import datetime\n\nfrom tensorflow.python.ops import control_flow_ops\nfrom tensorflow.python.client import timeline\nfrom datasets import dataset_factory\nsys.path.append('../models/slim')\nfrom deployment import model_deploy\nfrom nets import nets_factory\nfrom preprocessing import preprocessing_factory\n\nfrom config import cfg, cfg_from_file, cfg_from_list, get_output_dir\nfrom restore import model_restorer\nfrom loss import gen_losses\nfrom preprocess_pipeline import train_preprocess_pipeline\n\nslim = tf.contrib.slim\n\ndef _configure_learning_rate(num_samples_per_epoch, num_clones, global_step):\n  \"\"\"Configures the learning rate.\n\n  Args:\n    num_samples_per_epoch: The number of samples in each epoch of training.\n    global_step: The global_step tensor.\n\n  Returns:\n    A `Tensor` representing the learning rate.\n\n  Raises:\n    ValueError: if\n  \"\"\"\n  if cfg.TRAIN.NUM_STEPS_PER_DECAY > 0:\n    decay_steps = cfg.TRAIN.NUM_STEPS_PER_DECAY\n    tf.logging.info('Using {} steps for decay. Ignoring any epoch setting for '\n                    'decay.'.format(decay_steps))\n  else:\n    decay_steps = int(num_samples_per_epoch / (\n      cfg.TRAIN.BATCH_SIZE * num_clones * cfg.TRAIN.ITER_SIZE) * cfg.TRAIN.NUM_EPOCHS_PER_DECAY)\n\n  if cfg.TRAIN.LEARNING_RATE_DECAY_TYPE == 'exponential':\n    return tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE,\n                                      global_step,\n                                      decay_steps,\n                                      cfg.TRAIN.LEARNING_RATE_DECAY_RATE,\n                                      staircase=True,\n                                      name='exponential_decay_learning_rate')\n  elif cfg.TRAIN.LEARNING_RATE_DECAY_TYPE == 'fixed':\n    return tf.constant(cfg.TRAIN.LEARNING_RATE, name='fixed_learning_rate')\n  elif cfg.TRAIN.LEARNING_RATE_DECAY_TYPE == 'polynomial':\n    return tf.train.polynomial_decay(cfg.TRAIN.LEARNING_RATE,\n                                     global_step,\n                                     decay_steps,\n                                     cfg.TRAIN.END_LEARNING_RATE,\n                                     power=1.0,\n                                     cycle=False,\n                                     name='polynomial_decay_learning_rate')\n  else:\n    raise ValueError('learning_rate_decay_type [%s] was not recognized',\n                     cfg.TRAIN.LEARNING_RATE_DECAY_RATE)\n\n\ndef _configure_optimizer(learning_rate):\n  \"\"\"Configures the optimizer used for training.\n\n  Args:\n    learning_rate: A scalar or `Tensor` learning rate.\n\n  Returns:\n    An instance of an optimizer.\n\n  Raises:\n    ValueError: if cfg.optimizer is not recognized.\n  \"\"\"\n  if cfg.TRAIN.OPTIMIZER == 'adam':\n    optimizer = tf.train.AdamOptimizer(\n        learning_rate,\n        beta1=cfg.TRAIN.ADAM_BETA1,\n        beta2=cfg.TRAIN.ADAM_BETA2,\n        epsilon=cfg.TRAIN.OPT_EPSILON)\n  elif cfg.TRAIN.OPTIMIZER == 'momentum':\n    optimizer = tf.train.MomentumOptimizer(\n        learning_rate,\n        momentum=cfg.TRAIN.MOMENTUM,\n        name='Momentum')\n  elif cfg.TRAIN.OPTIMIZER == 'rmsprop':\n    optimizer = tf.train.RMSPropOptimizer(\n        learning_rate,\n        decay=cfg.TRAIN.RMSPROP_DECAY,\n        momentum=cfg.TRAIN.MOMENTUM,\n        epsilon=cfg.TRAIN.OPT_EPSILON)\n  elif cfg.TRAIN.OPTIMIZER == 'sgd':\n    optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n  else:\n    raise ValueError('Optimizer [%s] was not recognized', cfg.TRAIN.OPTIMIZER)\n  return optimizer\n\n\ndef _add_variables_summaries(learning_rate):\n  summaries = []\n  for variable in slim.get_model_variables():\n    summaries.append(tf.histogram_summary(variable.op.name, variable))\n  summaries.append(tf.summary.scalar(tensor=learning_rate,\n                                     name='training/Learning Rate'))\n  return summaries\n\n\ndef _get_init_fn(train_dir):\n  \"\"\"Returns a function run by the chief worker to warm-start the training.\n\n  Note that the init_fn is only run when initializing the model during the very\n  first global step.\n\n  Returns:\n    An init function run by the supervisor.\n  \"\"\"\n  if cfg.TRAIN.CHECKPOINT_PATH is None:\n    return None\n\n  # Warn the user if a checkpoint exists in the train_dir. Then we'll be\n  # ignoring the checkpoint anyway.\n  if tf.train.latest_checkpoint(train_dir):\n    tf.logging.info(\n        'Ignoring --checkpoint_path because a checkpoint already exists in %s'\n        % train_dir)\n    return None\n\n  exclusions = []\n  if cfg.TRAIN.CHECKPOINT_EXCLUDE_SCOPES:\n    exclusions = [scope.strip()\n                  for scope in cfg.TRAIN.CHECKPOINT_EXCLUDE_SCOPES.split(',')]\n\n  # variables_to_restore = slim.get_variables_to_restore(exclude=exclusions)\n  # NOTE: The above was wrong!! It would restore all global_step, momentum etc\n  # variables too, which we don't want when starting from a pretrained model\n  # (like imagenet). The above is (and should be) used when restoring from a\n  # half-trained model of the same script (which doesn't happen here anyway,\n  # see above, there's a return None if a checkpoint exists)\n  variables_to_restore = slim.filter_variables(\n    slim.get_model_variables(),\n    exclude_patterns=exclusions)\n\n  if tf.gfile.IsDirectory(cfg.TRAIN.CHECKPOINT_PATH):\n    checkpoint_path = tf.train.latest_checkpoint(cfg.TRAIN.CHECKPOINT_PATH)\n  else:\n    checkpoint_path = cfg.TRAIN.CHECKPOINT_PATH\n\n  tf.logging.info('Fine-tuning from %s' % checkpoint_path)\n\n  return model_restorer.restore_model(\n      checkpoint_path,\n      variables_to_restore,\n      ignore_missing_vars=cfg.TRAIN.IGNORE_MISSING_VARS,\n      var_name_mapper_type=cfg.TRAIN.VAR_NAME_MAPPER)\n\n\ndef _get_variables_to_train():\n  \"\"\"Returns a list of variables to train.\n\n  Returns:\n    A list of variables to train by the optimizer.\n  \"\"\"\n  if cfg.TRAIN.TRAINABLE_SCOPES == '':\n    return tf.trainable_variables()\n  else:\n    scopes = [scope.strip() for scope in cfg.TRAIN.TRAINABLE_SCOPES.split(',')]\n\n  variables_to_train = []\n  for scope in scopes:\n    variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)\n    variables_to_train.extend(variables)\n  return variables_to_train\n\n\ndef _gen_overlayed_img(hmap, img):\n  with tf.name_scope('VisualizeOverlayedHeatmap'):\n    hmap = tf.expand_dims(hmap, -1)\n    hmap = tf.image.resize_images(\n      hmap, img.get_shape().as_list()[-3:-1])\n    img = tf.tile(\n      tf.image.rgb_to_grayscale(img), [1, 1, 1, 3])\n    hmap = tf.image.grayscale_to_rgb(hmap)\n    hmap = tf.concat([\n      tf.expand_dims(hmap[..., 0] * 255.0, -1),\n      hmap[..., 1:] * 0.0], axis=-1)\n    return (0.5 * img + 0.5 * hmap)\n\n\ndef _summarize_heatmaps(name, tensor, img_tensor):\n  # return tf.summary.image(name, tf.reduce_sum(tensor, axis=-1, keep_dims=True))\n  if tensor.get_shape()[-1] == 0:\n    tf.logging.info('Pose heatmaps have 0 channels. Not summarizing')\n    return set()\n  return set([\n    tf.summary.image(\n      name + '/head', _gen_overlayed_img(tensor[..., 9], img_tensor)),\n    tf.summary.image(\n      name + '/lwrist', _gen_overlayed_img(tensor[..., 15], img_tensor)),\n    tf.summary.image(\n      name + '/rankle', _gen_overlayed_img(tensor[..., 0], img_tensor)),\n    tf.summary.image(\n      name + '/pelvis', _gen_overlayed_img(tensor[..., 6], img_tensor))])\n\n\nend_points_debug = {}\ndef _train_step(sess, train_op, global_step, train_step_kwargs):\n  \"\"\"Function that takes a gradient step and specifies whether to stop.\n\n  Args:\n    sess: The current session.\n    train_op: An `Operation` that evaluates the gradients and returns the\n      total loss.\n    global_step: A `Tensor` representing the global training step.\n    train_step_kwargs: A dictionary of keyword arguments.\n\n  Returns:\n    The total loss and a boolean indicating whether or not to stop training.\n\n  Raises:\n    ValueError: if 'should_trace' is in `train_step_kwargs` but `logdir` is not.\n  \"\"\"\n  start_time = time.time()\n\n  trace_run_options = None\n  run_metadata = None\n  if 'should_trace' in train_step_kwargs:\n    if 'logdir' not in train_step_kwargs:\n      raise ValueError('logdir must be present in train_step_kwargs when '\n                       'should_trace is present')\n    if sess.run(train_step_kwargs['should_trace']):\n      trace_run_options = config_pb2.RunOptions(\n          trace_level=config_pb2.RunOptions.FULL_TRACE)\n      run_metadata = config_pb2.RunMetadata()\n\n  if cfg.TRAIN.ITER_SIZE == 1:\n    # To Debug, uncomment here and observe the end_points_debug\n    total_loss, np_global_step = sess.run([train_op, global_step],\n                                          options=trace_run_options,\n                                          run_metadata=run_metadata)\n  else:\n    for j in range(cfg.TRAIN.ITER_SIZE-1):\n      sess.run([train_op[j]])\n    total_loss, np_global_step = sess.run([\n      train_op[cfg.TRAIN.ITER_SIZE-1], global_step],\n      options=trace_run_options,\n      run_metadata=run_metadata)\n  time_elapsed = time.time() - start_time\n\n  if run_metadata is not None:\n    tl = timeline.Timeline(run_metadata.step_stats)\n    trace = tl.generate_chrome_trace_format()\n    trace_filename = os.path.join(train_step_kwargs['logdir'],\n                                  'tf_trace-%d.json' % np_global_step)\n    tf.logging.info('Writing trace to %s', trace_filename)\n    file_io.write_string_to_file(trace_filename, trace)\n    if 'summary_writer' in train_step_kwargs:\n      train_step_kwargs['summary_writer'].add_run_metadata(run_metadata,\n                                                           'run_metadata-%d' %\n                                                           np_global_step)\n\n  if 'should_log' in train_step_kwargs:\n    if sess.run(train_step_kwargs['should_log']):\n      tf.logging.info('%s: global step %d: loss = %.4f (%.2f sec/step)',\n                   datetime.now(), np_global_step, total_loss, time_elapsed)\n\n  if 'should_stop' in train_step_kwargs:\n    should_stop = sess.run(train_step_kwargs['should_stop'])\n  else:\n    should_stop = False\n\n  return total_loss, should_stop\n\n\ndef parse_args():\n  \"\"\"\n  Parse input arguments\n  \"\"\"\n  parser = argparse.ArgumentParser(description='Train a keypoint regressor.')\n  parser.add_argument('--cfg', dest='cfg_file',\n                      help='optional config file',\n                      default=None, type=str)\n\n  args = parser.parse_args()\n  return args\n\n\ndef main():\n  args = parse_args()\n  if args.cfg_file is not None:\n    cfg_from_file(args.cfg_file)\n\n  tf.logging.info('Using Config:')\n  pprint.pprint(cfg)\n\n  train_dir = get_output_dir('default' if args.cfg_file is None\n                             else args.cfg_file)\n  os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPUS\n  num_clones = len(cfg.GPUS.split(','))\n\n  tf.logging.set_verbosity(tf.logging.INFO)\n  with tf.Graph().as_default():\n    ######################\n    # Config model_deploy#\n    ######################\n    tf.set_random_seed(cfg.RNG_SEED)\n    deploy_config = model_deploy.DeploymentConfig(\n        num_clones=num_clones,\n        clone_on_cpu=False,\n        replica_id=0,\n        num_replicas=1,\n        num_ps_tasks=0)\n\n    # Create global_step\n    with tf.device(deploy_config.variables_device()):\n      global_step = slim.create_global_step()\n\n    ######################\n    # Select the dataset #\n    ######################\n    kwargs = {}\n    if cfg.TRAIN.VIDEO_FRAMES_PER_VIDEO > 1:\n      kwargs['num_samples'] = cfg.TRAIN.VIDEO_FRAMES_PER_VIDEO\n      kwargs['randomFromSegmentStyle'] = cfg.TRAIN.READ_SEGMENT_STYLE\n      kwargs['modality'] = cfg.INPUT.VIDEO.MODALITY\n      kwargs['split_id'] = cfg.INPUT.SPLIT_ID\n    if cfg.DATASET_LIST_DIR != '':\n      kwargs['dataset_list_dir'] = cfg.DATASET_LIST_DIR\n    if cfg.INPUT_FILE_STYLE_LABEL != '':\n      kwargs['input_file_style_label'] = cfg.INPUT_FILE_STYLE_LABEL\n    dataset, num_pose_keypoints = dataset_factory.get_dataset(\n      cfg.DATASET_NAME, cfg.TRAIN.DATASET_SPLIT_NAME, cfg.DATASET_DIR,\n      **kwargs)\n\n    ####################\n    # Select the network #\n    ####################\n    network_fn = nets_factory.get_network_fn(\n        cfg.MODEL_NAME,\n        num_classes=(dataset.num_classes),\n        num_pose_keypoints=num_pose_keypoints,\n        weight_decay=cfg.TRAIN.WEIGHT_DECAY,\n        is_training=True,\n        cfg=cfg)  # advanced network creation controlled with cfg.NET\n\n    #####################################\n    # Select the preprocessing function #\n    #####################################\n    preprocessing_name = cfg.MODEL_NAME\n    image_preprocessing_fn = preprocessing_factory.get_preprocessing(\n        preprocessing_name,\n        is_training=True)\n\n    ##############################################################\n    # Create a dataset provider that loads data from the dataset #\n    ##############################################################\n    with tf.device(deploy_config.inputs_device()):\n      provider = slim.dataset_data_provider.DatasetDataProvider(\n          dataset,\n          num_readers=cfg.NUM_READERS,\n          common_queue_capacity=20 * cfg.TRAIN.BATCH_SIZE,\n          common_queue_min=10 * cfg.TRAIN.BATCH_SIZE)\n\n      [image, pose_label_hmap,\n       pose_label_valid, action_label] = train_preprocess_pipeline(\n         provider, cfg, network_fn, num_pose_keypoints,\n         image_preprocessing_fn)\n      # input_data = [preprocess_pipeline(\n      #   provider, cfg, network_fn, num_pose_keypoints, image_preprocessing_fn)\n      #   for _ in range(cfg.NUM_PREPROCESSING_THREADS)]\n\n      images, pose_labels_hmap, pose_labels_valid, action_labels = tf.train.batch(\n          [image, pose_label_hmap, pose_label_valid, action_label],\n          # input_data,\n          batch_size=cfg.TRAIN.BATCH_SIZE,\n          num_threads=cfg.NUM_PREPROCESSING_THREADS,\n          capacity=5 * cfg.TRAIN.BATCH_SIZE)\n      batch_queue = slim.prefetch_queue.prefetch_queue(\n        [images, pose_labels_hmap, pose_labels_valid, action_labels],\n        capacity=5 * deploy_config.num_clones * cfg.TRAIN.ITER_SIZE)\n\n    ####################\n    # Define the model #\n    ####################\n    def clone_fn(batch_queue):\n      \"\"\"Allows data parallelism by creating multiple clones of network_fn.\"\"\"\n      images, labels_pose, labels_pose_valid, labels_action = batch_queue.dequeue()\n      # due to the multi-frame/video thing, need to squeeze first 2 dimensions\n      labels_pose = tf.concat(tf.unstack(labels_pose), axis=0)\n      labels_pose_valid = tf.concat(tf.unstack(labels_pose_valid), axis=0)\n      logits, end_points = network_fn(images)\n      pose_logits = end_points['PoseLogits']\n\n      #############################\n      # Specify the loss function #\n      #############################\n      # if 'AuxLogits' in end_points:\n      #   slim.losses.softmax_cross_entropy(\n      #       end_points['AuxLogits'], labels,\n      #       label_smoothing=cfg.TRAIN.LABEL_SMOOTHING, weight=0.4, scope='aux_loss')\n      # slim.losses.softmax_cross_entropy(\n      #     logits, labels, label_smoothing=cfg.TRAIN.LABEL_SMOOTHING, weight=1.0)\n      end_points['Images'] = images\n      end_points['PoseLabels'] = labels_pose\n      end_points['ActionLabels'] = labels_action\n      end_points['ActionLogits'] = logits\n      tf.logging.info('PoseLogits shape is {}.'.format(pose_logits.get_shape().as_list()))\n\n      gen_losses(labels_action, logits, cfg.TRAIN.LOSS_FN_ACTION,\n                 dataset.num_classes, cfg.TRAIN.LOSS_FN_ACTION_WT,\n                 labels_pose, pose_logits, cfg.TRAIN.LOSS_FN_POSE,\n                 labels_pose_valid, cfg.TRAIN.LOSS_FN_POSE_WT, end_points, cfg)\n\n      return end_points\n\n    # Gather initial summaries.\n    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))\n\n    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])\n    first_clone_scope = deploy_config.clone_scope(0)\n    # Gather update_ops from the first clone. These contain, for example,\n    # the updates for the batch_norm variables created by network_fn.\n    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)\n\n    # Add summaries for end_points.\n    end_points = clones[0].outputs\n\n    # store the end points in a global variable for debugging in train_step\n    global end_points_debug\n    end_points_debug = end_points\n\n    for end_point in end_points:\n      x = end_points[end_point]\n      summaries.add(tf.summary.histogram('activations/' + end_point, x))\n      # summaries.add(tf.summary.scalar(tf.nn.zero_fraction(x),\n      #                                 name='sparsity/' + end_point))\n    sum_img = tf.concat(tf.unstack(end_points['Images']), axis=0)\n    if sum_img.get_shape().as_list()[-1] not in [1, 3, 4]:\n      sum_img = tf.reduce_sum(sum_img, axis=-1, keep_dims=True)\n      sum_img = sum_img - tf.reduce_min(sum_img)\n      sum_img = sum_img / (tf.reduce_max(sum_img) + cfg.EPS)\n    summaries.add(tf.summary.image('images', sum_img))\n    for epname in cfg.TRAIN.OTHER_IMG_SUMMARIES_TO_ADD:\n      if epname in end_points:\n        summaries.add(tf.summary.image('image_vis/' + epname, end_points[epname]))\n    summaries = summaries.union(_summarize_heatmaps(\n      'labels', end_points['PoseLabels'], sum_img))\n    summaries = summaries.union(_summarize_heatmaps(\n      'pose', end_points['PoseLogits'], sum_img))\n    if 'PoseLossMask' in end_points:\n      summaries = summaries.union(_summarize_heatmaps(\n        'loss_mask/pose', end_points['PoseLossMask'], sum_img))\n\n    # Add summaries for losses.\n    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):\n      summaries.add(tf.summary.scalar(tensor=loss, name='losses/%s' % loss.op.name))\n\n    # Add summaries for variables.\n    for variable in slim.get_model_variables():\n      summaries.add(tf.summary.histogram(variable.op.name, variable))\n\n    #################################\n    # Configure the moving averages #\n    #################################\n    if cfg.TRAIN.MOVING_AVERAGE_VARIABLES:\n      moving_average_variables = slim.get_model_variables()\n      variable_averages = tf.train.ExponentialMovingAverage(\n          cfg.TRAIN.MOVING_AVERAGE_VARIABLES, global_step)\n    else:\n      moving_average_variables, variable_averages = None, None\n\n    #########################################\n    # Configure the optimization procedure. #\n    #########################################\n    with tf.device(deploy_config.optimizer_device()):\n      learning_rate = _configure_learning_rate(dataset.num_samples, num_clones, global_step)\n      optimizer = _configure_optimizer(learning_rate)\n      summaries.add(tf.summary.scalar(tensor=learning_rate,\n                                      name='learning_rate'))\n\n    # if cfg.sync_replicas:\n    #   # If sync_replicas is enabled, the averaging will be done in the chief\n    #   # queue runner.\n    #   optimizer = tf.train.SyncReplicasOptimizer(\n    #       opt=optimizer,\n    #       replicas_to_aggregate=,\n    #       variable_averages=variable_averages,\n    #       variables_to_average=moving_average_variables,\n    #       replica_id=tf.constant(cfg.task, tf.int32, shape=()),\n    #       total_num_replicas=cfg.worker_replicas)\n    # elif cfg.moving_average_decay:\n    #   # Update ops executed locally by trainer.\n    #   update_ops.append(variable_averages.apply(moving_average_variables))\n\n    # Variables to train.\n    variables_to_train = _get_variables_to_train()\n    tf.logging.info('Training the following variables: {}'.format(\n                    ', '.join([var.op.name for var in variables_to_train])))\n\n    #  and returns a train_tensor and summary_op\n    total_loss, clones_gradients = model_deploy.optimize_clones(\n        clones,\n        optimizer,\n        var_list=variables_to_train,\n        clip_gradients=cfg.TRAIN.CLIP_GRADIENTS)\n    # Add total_loss to summary.\n    summaries.add(tf.summary.scalar(tensor=total_loss,\n                                    name='total_loss'))\n\n    # Create gradient updates.\n    train_ops = {}\n    if cfg.TRAIN.ITER_SIZE == 1:\n      grad_updates = optimizer.apply_gradients(clones_gradients,\n                                               global_step=global_step)\n      update_ops.append(grad_updates)\n\n      update_op = tf.group(*update_ops)\n      train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,\n                                                        name='train_op')\n      train_ops = train_tensor\n    else:\n      with tf.name_scope('AccumulateGradients'):\n        # copied as is from my previous code\n        gvs = [(grad, var) for grad, var in clones_gradients]\n        varnames = [var.name for grad, var in gvs]\n        varname_to_var = {var.name: var for grad, var in gvs}\n        varname_to_grad = {var.name: grad for grad, var in gvs}\n        varname_to_ref_grad = {}\n        for vn in varnames:\n          grad = varname_to_grad[vn]\n          print(\"accumulating ... \", (vn, grad.get_shape()))\n          with tf.variable_scope(\"ref_grad\"):\n            with tf.device(deploy_config.variables_device()):\n              ref_var = slim.local_variable(\n                  np.zeros(grad.get_shape(),dtype=np.float32),\n                  name=vn[:-2])\n              varname_to_ref_grad[vn] = ref_var\n\n        all_assign_ref_op = [ref.assign(varname_to_grad[vn]) for vn, ref in varname_to_ref_grad.items()]\n        all_assign_add_ref_op = [ref.assign_add(varname_to_grad[vn]) for vn, ref in varname_to_ref_grad.items()]\n        assign_gradients_ref_op = tf.group(*all_assign_ref_op)\n        accmulate_gradients_op = tf.group(*all_assign_add_ref_op)\n        with tf.control_dependencies([accmulate_gradients_op]):\n          final_gvs = [(varname_to_ref_grad[var.name] / float(cfg.TRAIN.ITER_SIZE), var) for grad, var in gvs]\n          apply_gradients_op = optimizer.apply_gradients(final_gvs, global_step=global_step)\n          update_ops.append(apply_gradients_op)\n          update_op = tf.group(*update_ops)\n          train_tensor = control_flow_ops.with_dependencies([update_op],\n              total_loss, name='train_op')\n        for i in range(cfg.TRAIN.ITER_SIZE):\n          if i == 0:\n            train_ops[i] = assign_gradients_ref_op\n          elif i < cfg.TRAIN.ITER_SIZE - 1:  # because apply_gradients also computes\n                                             # (see control_dependency), so\n                                             # no need of running an extra iteration\n            train_ops[i] = accmulate_gradients_op\n          else:\n            train_ops[i] = train_tensor\n\n    # Add the summaries from the first clone. These contain the summaries\n    # created by model_fn and either optimize_clones() or _gather_clone_loss().\n    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,\n                                       first_clone_scope))\n\n    # Merge all summaries together.\n    summary_op = tf.summary.merge(list(summaries), name='summary_op')\n\n    config = tf.ConfigProto()\n    config.gpu_options.allow_growth = True\n    config.allow_soft_placement = True\n    config.intra_op_parallelism_threads = 4  # to avoid too many threads\n    # The following seems optimal... though not sure\n    config.inter_op_parallelism_threads = max(\n      cfg.NUM_PREPROCESSING_THREADS, 12)\n    ###########################\n    # Kicks off the training. #\n    ###########################\n    slim.learning.train(\n        train_ops,\n        train_step_fn=_train_step,\n        logdir=train_dir,\n        master='',\n        is_chief=True,\n        init_fn=_get_init_fn(train_dir),\n        summary_op=summary_op,\n        number_of_steps=cfg.TRAIN.MAX_NUMBER_OF_STEPS,\n        log_every_n_steps=cfg.TRAIN.LOG_EVERY_N_STEPS,\n        save_summaries_secs=cfg.TRAIN.SAVE_SUMMARIES_SECS,\n        save_interval_secs=cfg.TRAIN.SAVE_INTERVAL_SECS,\n        sync_optimizer=None,\n        session_config=config)\n\n\nif __name__ == '__main__':\n  main()\n"
  },
  {
    "path": "utils/convert_mpii_result_for_eval.m",
    "content": "function convert(h5_file)\n\noutfpath = [h5_file '.mat'];\n\nTOTAL_ACT_IDS = 983;  % = max([RELEASE.act.act_id])\nDATA_DIR='../src/data/mpii/mpii_tfrecords';\n\nsample_ids = dlmread(fullfile(DATA_DIR, 'test_ids.txt'));\n% scores = zeros(numel(sample_ids), numel(class_ids));\nclass_ids = {};\n\ncid = 0;\nfid = fopen(fullfile(DATA_DIR, 'classes.txt'), 'r');\nwhile ~feof(fid)\n  line = fgetl(fid);\n  cid = cid + 1;\n  parts = strsplit(line, ';');\n  class_ids{cid} = parts{1};\n  % nums = cellfun(@str2num, strsplit(parts{2}, ','));\n  % cls_to_ids{cid} = nums;\nend\n\nscores = h5read(h5_file, '/logits')';\n% for cid = 1 : numel(cls_to_ids)\n%   targets = cls_to_ids{cid};\n%   for i = 1 : numel(targets)\n%     scores(:, targets(i)) = logits(:, cid);\n%   end\n% end\n\nsave(outfpath, 'sample_ids', 'class_ids', 'scores');\n"
  },
  {
    "path": "utils/convert_mpii_result_for_eval.sh",
    "content": "if [ $# -lt 1 ]; then\n  echo \"Usage $0 <H5 path>\"\nfi\n\nnice -n 19 matlab -nodisplay -r \"cd ../utils/; convert_mpii_result_for_eval('$1'); exit;\"\n"
  },
  {
    "path": "utils/dataset_utils/gen_tfrecord_mpii.py",
    "content": "from __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport math\nimport os\nimport random\nimport sys\nimport scipy.io\nimport operator\nimport numpy as np\n\nimport tensorflow as tf\n\nos.environ['CUDA_VISIBLE_DEVICES'] = \"\"\n\n# Set the following paths\n_MPII_MAT_FILE = '/path/to/mpii_human_pose_v1_u12_1.mat'\n_IMG_DIR = '/path/to/MPII/images/'\n\n\ndataset_dir = '../../src/data/mpii/mpii_tfrecords/'\n_SPLITS_PATH = '../../src/data/mpii/lists/images_mpii_{0}.txt'\n\n# Seed for repeatability.\n_RANDOM_SEED = 42\n\n# The number of shards per dataset split.\n_NUM_SHARDS = 20\n\n_NUM_JOINTS = 16  # for pose\n\nclass ImageReader(object):\n  \"\"\"Helper class that provides TensorFlow image coding utilities.\"\"\"\n\n  def __init__(self):\n    # Initializes function that decodes RGB JPEG data.\n    self._decode_jpeg_data = tf.placeholder(dtype=tf.string)\n    self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)\n\n  def read_image_dims(self, sess, image_data):\n    image = self.decode_jpeg(sess, image_data)\n    return image.shape[0], image.shape[1]\n\n  def decode_jpeg(self, sess, image_data):\n    image = sess.run(self._decode_jpeg,\n                     feed_dict={self._decode_jpeg_data: image_data})\n    assert len(image.shape) == 3\n    assert image.shape[2] == 3\n    return image\n\n\ndef int64_feature(values):\n  \"\"\"Returns a TF-Feature of int64s.\n\n  Args:\n    values: A scalar or list of values.\n\n  Returns:\n    a TF-Feature.\n  \"\"\"\n  if not isinstance(values, (tuple, list)):\n    values = [values]\n  return tf.train.Feature(int64_list=tf.train.Int64List(value=values))\n\n\ndef float_feature(values):\n  return tf.train.Feature(float_list=tf.train.FloatList(value=values))\n\n\ndef bytes_feature(values):\n  \"\"\"Returns a TF-Feature of bytes.\n\n  Args:\n    values: A string.\n\n  Returns:\n    a TF-Feature.\n  \"\"\"\n  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))\n\n\ndef image_to_tfexample(image_data, image_format, height, width,\n                       pose, # [x,y,is_vis,...]\n                       action_label):\n  assert(len(pose) % (_NUM_JOINTS * 3) == 0)\n  return tf.train.Example(features=tf.train.Features(feature={\n      'image/encoded': bytes_feature(image_data),\n      'image/format': bytes_feature(image_format),\n      'image/class/pose': int64_feature([int(el) for el in pose]),\n      'image/class/action_label': int64_feature(action_label),\n      'image/height': int64_feature(height),\n      'image/width': int64_feature(width),\n  }))\n\n\ndef _get_dataset_filename(dataset_dir, split_name, shard_id):\n  output_filename = 'mpii_%s_%05d-of-%05d.tfrecord' % (\n      split_name, shard_id, _NUM_SHARDS)\n  return os.path.join(dataset_dir, output_filename)\n\n\ndef _convert_dataset(split_name, list_to_write, dataset_dir):\n  num_per_shard = int(math.ceil(len(list_to_write) / float(_NUM_SHARDS)))\n\n  with tf.Graph().as_default():\n    image_reader = ImageReader()\n\n    with tf.Session('') as sess:\n\n      for shard_id in range(_NUM_SHARDS):\n        output_filename = _get_dataset_filename(\n            dataset_dir, split_name, shard_id)\n\n        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:\n          start_ndx = shard_id * num_per_shard\n          end_ndx = min((shard_id+1) * num_per_shard, len(list_to_write))\n          for i in range(start_ndx, end_ndx):\n            sys.stdout.write('\\r>> Converting image %d/%d shard %d' % (\n                i+1, len(list_to_write), shard_id))\n            sys.stdout.flush()\n\n            # Read the filename:\n            fname = os.path.join(_IMG_DIR, list_to_write[i][0])\n            action_label = list_to_write[i][1]\n            poses = list_to_write[i][2]\n            all_joints = []\n            for pose in poses:\n              joints = dict((el[0], [el[1], el[2], el[3]]) for el in pose)\n              final_pose = []\n              for i in range(_NUM_JOINTS):\n                if i in joints:\n                  final_pose.append(joints[i])\n                else:\n                  final_pose.append([-1, -1, 0])\n              final_pose = [item for sublist in final_pose for item in sublist]\n              all_joints += final_pose\n            assert(len(all_joints) % 16 == 0)\n            image_data = tf.gfile.FastGFile(fname, 'r').read()\n            height, width = image_reader.read_image_dims(sess, image_data)\n\n            example = image_to_tfexample(\n                image_data, 'jpg', height, width, all_joints, action_label)\n            tfrecord_writer.write(example.SerializeToString())\n\n  sys.stdout.write('\\n')\n  sys.stdout.flush()\n\n\ndef _get_action_class(cname, D, act_id):\n  try:\n    if cname not in D:\n      D[cname] = (len(D.keys()), set([act_id]))  # act_id is the actual MPII action id\n    else:\n      D[cname][1].add(act_id)\n      # It's pretty crazy that same action will have multiple action IDs\n    return D[cname][0]\n  except Exception, e:\n    print('Invalid class name {}. setting -1. {}'.format(cname, e))\n    return -1\n\n\ndef main():\n  T = scipy.io.loadmat(_MPII_MAT_FILE, squeeze_me=True,\n                       struct_as_record=False)\n  annots = T['RELEASE'].annolist\n  is_train = T['RELEASE'].img_train\n  action_label = T['RELEASE'].act\n  splits = ['train', 'val', 'test']\n  lists_to_write = {}\n  img_id_in_split = {}\n  all_imnames = []\n  for spl in splits:\n    lists_to_write[spl] = []\n    img_id_in_split[spl] = []\n  splits_filenames = {}\n  filename_to_split = {}\n  actclassname_to_id = {}\n  for spl in splits:\n    with open(_SPLITS_PATH.format(spl), 'r') as fin:\n      splits_filenames[spl] = fin.read().splitlines()\n      filename_to_split.update(dict(zip(\n        splits_filenames[spl], [spl] * len(splits_filenames[spl]))))\n  for aid,annot in enumerate(annots):\n    imname = annot.image.name\n    all_imnames.append(imname)\n    try:\n      this_split = filename_to_split[imname[:-4]]\n    except:\n      continue  # ignore this image\n    points_fmted = []  # put all points one after the other\n    if 'annorect' in dir(annot):\n      all_rects = annot.annorect\n      if isinstance(all_rects, scipy.io.matlab.mio5_params.mat_struct):\n        all_rects = np.array([all_rects])\n      for rect in all_rects:\n        points_rect = []\n        try:\n          points = rect.annopoints.point\n        except:\n          continue\n        if isinstance(points, scipy.io.matlab.mio5_params.mat_struct):\n          points = np.array([points])\n        for point in points:\n          try:\n            is_visible = point.is_visible if point.is_visible in [1,0] else 0\n          except:\n            is_visible = 0\n          points_rect.append((point.id, point.x, point.y, is_visible))\n        points_fmted.append(points_rect)\n    [el.sort() for el in points_fmted]\n\n    # the following assert is not true, so putting -1 when writing it out\n    # assert(all([len(el) == 16 for el in points_fmted]))\n    image_obj = (annot.image.name,\n                 _get_action_class(action_label[aid].act_name,\n                                   actclassname_to_id,\n                                   action_label[aid].act_id),\n                 points_fmted)\n    if os.path.exists(os.path.join(_IMG_DIR, imname)):\n      lists_to_write[this_split].append(image_obj)\n      img_id_in_split[this_split].append(aid+1)  # 1-indexed\n  cls_ids = sorted(actclassname_to_id.items(), key=operator.itemgetter(1))\n  print('Total classes found: {}'.format(len(cls_ids)))\n  #write out the dictionary of classnames\n  with open(os.path.join(dataset_dir, 'classes.txt'), 'w') as fout:\n    fout.write('\\n'.join([el[0] + ';' + ','.join([\n      str(e) for e in list(el[1][1])]) for el in cls_ids]))\n\n  if not tf.gfile.Exists(dataset_dir):\n    tf.gfile.MakeDirs(dataset_dir)\n\n  # Only randomize the train set\n  random.seed(_RANDOM_SEED)\n  train_ids = range(len(lists_to_write['train']))\n  random.shuffle(train_ids)\n  lists_to_write['train'] = [lists_to_write['train'][i] for i in train_ids]\n  img_id_in_split['train'] = [img_id_in_split['train'][i] for i in train_ids]\n\n  with open(os.path.join(dataset_dir, 'imnames.txt'), 'w') as fout:\n    fout.write('\\n'.join(all_imnames))\n  for spl in splits:\n    with open(os.path.join(\n      dataset_dir, '{}_ids.txt'.format(spl)), 'w') as fout:\n      fout.write('\\n'.join([str(el) for el in img_id_in_split[spl]]))\n    spl_name = spl\n    if spl in ['train', 'val']:\n      spl_name = 'trainval_' + spl  # would be useful when training on tr+val\n    print('Writing {} images for split {}.'.format(\n      len(lists_to_write[spl]), spl))\n    _convert_dataset(spl_name, lists_to_write[spl],\n                     dataset_dir)\n\n  print('\\nFinished converting the MPII dataset!')\n\nif __name__ == '__main__':\n  main()\n"
  }
]