Repository: rohitgirdhar/AttentionalPoolingAction
Branch: master
Commit: 9ab0acd9360f
Files: 98
Total size: 622.1 KB

Directory structure:
gitextract_4chdi2im/

├── .gitignore
├── LICENSE
├── README.md
├── experiments/
│   ├── 001_MPII_ResNet.yaml
│   ├── 001_MPII_ResNet_pretrained.yaml
│   ├── 002_MPII_ResNet_withAttention.yaml
│   ├── 002_MPII_ResNet_withAttention_pretrained.yaml
│   ├── 003_MPII_ResNet_withPoseAttention.yaml
│   ├── 003_MPII_ResNet_withPoseAttention_pretrained.yaml
│   └── 004_MPII_ResNet_withAttention_train+val.yaml
├── models/
│   ├── .github/
│   │   └── ISSUE_TEMPLATE.md
│   ├── .gitignore
│   ├── .gitmodules
│   ├── LICENSE
│   └── slim/
│       ├── __init__.py
│       ├── datasets/
│       │   ├── __init__.py
│       │   ├── cifar10.py
│       │   ├── dataset_factory.py
│       │   ├── dataset_utils.py
│       │   ├── download_and_convert_cifar10.py
│       │   ├── download_and_convert_flowers.py
│       │   ├── download_and_convert_mnist.py
│       │   ├── flowers.py
│       │   ├── imagenet.py
│       │   └── mnist.py
│       ├── deployment/
│       │   ├── __init__.py
│       │   ├── model_deploy.py
│       │   └── model_deploy_test.py
│       ├── nets/
│       │   ├── __init__.py
│       │   ├── alexnet.py
│       │   ├── alexnet_test.py
│       │   ├── cifarnet.py
│       │   ├── inception.py
│       │   ├── inception_resnet_v2.py
│       │   ├── inception_resnet_v2_test.py
│       │   ├── inception_utils.py
│       │   ├── inception_v1.py
│       │   ├── inception_v1_test.py
│       │   ├── inception_v2.py
│       │   ├── inception_v2_test.py
│       │   ├── inception_v2_tsn.py
│       │   ├── inception_v3.py
│       │   ├── inception_v3_test.py
│       │   ├── inception_v4.py
│       │   ├── inception_v4_test.py
│       │   ├── lenet.py
│       │   ├── nets_factory.py
│       │   ├── nets_factory_test.py
│       │   ├── overfeat.py
│       │   ├── overfeat_test.py
│       │   ├── resnet_utils.py
│       │   ├── resnet_v1.py
│       │   ├── resnet_v1_test.py
│       │   ├── resnet_v2.py
│       │   ├── resnet_v2_test.py
│       │   ├── vgg.py
│       │   └── vgg_test.py
│       └── preprocessing/
│           ├── __init__.py
│           ├── cifarnet_preprocessing.py
│           ├── inception_preprocessing.py
│           ├── lenet_preprocessing.py
│           ├── preprocessing_factory.py
│           └── vgg_preprocessing.py
├── src/
│   ├── config.py
│   ├── custom_ops/
│   │   ├── Makefile
│   │   ├── __init__.py
│   │   ├── custom_ops_factory.py
│   │   ├── pose_to_heatmap.cc
│   │   ├── pose_utils.hpp
│   │   ├── render_objects.cc
│   │   ├── render_pose.cc
│   │   ├── test/
│   │   │   ├── pose_to_heatmap_op_test.py
│   │   │   ├── render_objects_op_test.py
│   │   │   └── zero_out_channels_op_test.py
│   │   └── zero_out_channels.cc
│   ├── datasets/
│   │   ├── __init__.py
│   │   ├── charades.py
│   │   ├── dataset_factory.py
│   │   ├── dataset_utils.py
│   │   ├── hico.py
│   │   ├── hmdb51.py
│   │   ├── image_read_utils.py
│   │   ├── jhmdb21.py
│   │   ├── mpii.py
│   │   └── video_data_utils.py
│   ├── eval/
│   │   ├── __init__.py
│   │   ├── cap_eval_utils.py
│   │   └── utils.py
│   ├── eval.py
│   ├── loss.py
│   ├── preprocess_pipeline.py
│   ├── restore/
│   │   ├── __init__.py
│   │   ├── model_restorer.py
│   │   └── var_name_mapper.py
│   └── train.py
└── utils/
    ├── convert_mpii_result_for_eval.m
    ├── convert_mpii_result_for_eval.sh
    └── dataset_utils/
        └── gen_tfrecord_mpii.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
*.pyc
*.swp
*.so
*.jpg
*.html
\#*\#
.\#*
*~
*.h5
src/expt_outputs/*
src/data/*

# Custom stuff
webpages/002_VisAtt2/hmdb_frames
webpages/002_VisAtt2/linAtt
webpages/002_VisAtt2/poseAtt


================================================
FILE: LICENSE
================================================
Copyright (c) 2017 Rohit Girdhar and Deva Ramanan.
All rights reserved.

This code is copyrighted by the authors and Carnegie Mellon University,
and is for non-commercial research purposes only. Please contact the authors and
Carnegie Mellon University if you are interested in licensing for
commercial purposes.

                                  Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "{}"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright {yyyy} {name of copyright owner}

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
# Attentional Pooling for Action Recognition

[[project page](https://rohitgirdhar.github.io/AttentionalPoolingAction/)] [[paper](https://arxiv.org/abs/1711.01467)]

If this code helps with your work/research, please consider citing

Rohit Girdhar and Deva Ramanan. **Attentional Pooling for Action Recognition**. Advances in Neural Information Processing Systems (NIPS), 2017.

```txt
@inproceedings{Girdhar_17b_AttentionalPoolingAction,
    title = {Attentional Pooling for Action Recognition},
    author = {Girdhar, Rohit and Ramanan, Deva},
    booktitle = {NIPS},
    year = 2017
}
```

## Pre-requisites

This code was trained and tested with

1. CentOS 6.5
2. Python 2.7
3. TensorFlow 1.1.0-rc2 ([6a1825e2](https://github.com/tensorflow/tensorflow/tree/6a1825e2369d2537e15dc585705c53c4b763f3f6))

## Getting started

Clone the code and create some directories for outputs

```bash
$ git clone --recursive https://github.com/rohitgirdhar/AttentionalPoolingAction.git
$ export ROOT=`pwd`/AttentionalPoolingAction
$ cd $ROOT/src/
$ mkdir -p expt_outputs data
$ # compile some custom ops
$ cd custom_ops; make; cd ..
```

## Data setup

You can download the `tfrecord` files for MPII I used from
[here](https://cmu.box.com/shared/static/xb7esevyl6uzmra2eehnkbt2ud7awld9.tar)
and uncompress on to a fast local disk.
If you want to create your own tfrecords, you can use the following steps, which is
what I used to create the linked tfrecord files

Convert the MPII data into tfrecords. The system also can read from individual JPEG files,
but that needs a slightly different intial setup.

First download the MPII [images](http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/mpii_human_pose_v1.tar.gz)
and [annotations](http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/mpii_human_pose_v1_u12_2.zip),
and un-compress the files.

```bash
$ cd $ROOT/utils/dataset_utils
$ # Set the paths for MPII images and annotations file in gen_tfrecord_mpii.py
$ python gen_tfrecord_mpii.py  # Will generate the tfrecord files
```

### Keypoint labels for other datasets

While MPII dataset comes with pose labels, I also experiment with HMDB-51 and HICO, pose for which was computed using an initial version of [OpenPose](https://github.com/CMU-Perceptual-Computing-Lab/openpose). I provide the extracted keypoints here: [HMDB51](https://cmu.box.com/shared/static/gt8lhpafu7zwexf1wdwwmsufoktg94rg.tar) and [HICO](https://cmu.box.com/shared/static/42xizpt0w3almdgwczjxawvc1pvpesoa.tar).

## Testing pre-trained models

First download and unzip the
[pretrained models](https://cmu.box.com/shared/static/s72scgtjj3lm60hsufi25rfjs2dk3a7i.zip)
to a `$ROOT/src/pretrained_models/`.
The models can be run by

```bash
# Baseline model (no attention)
$ python eval.py --cfg ../experiments/001_MPII_ResNet_pretrained.yaml
# With attention
$ python eval.py --cfg ../experiments/002_MPII_ResNet_withAttention_pretrained.yaml
# With pose regularized attention
$ python eval.py --cfg ../experiments/003_MPII_ResNet_withPoseAttention_pretrained.yaml
```

### Expected performance on MPII Validation set

| Method  | mAP | Accuracy |
|--------|-----|------|
| Baseline (no attention) | 26.2 | 33.5 |
| With attention | 30.3 | 37.2 |
| With pose regularized attention | 30.6 | 37.8 |

## Training

Train an attentional pooled model on MPII dataset, using `python train.py --cfg <path to YAML file>`.

```bash
$ cd $ROOT/src
$ python train.py --cfg ../experiments/002_MPII_ResNet_withAttention.yaml
# To train the model with pose regularized attention, use the following config
$ python train.py --cfg ../experiments/003_MPII_ResNet_withPoseAttention.yaml
# To train the baseline without attention, use the following config
$ python train.py --cfg ../experiments/001_MPII_ResNet.yaml
```

## Testing and evaluation

Test the model trained above on the validation set, using `python eval.py --cfg <path to YAML file>`.

```bash
$ python eval.py --cfg ../experiments/002_MPII_ResNet_withAttention.yaml
# To evaluate the model with pose regularized attention
$ python eval.py --cfg ../experiments/003_MPII_ResNet_withPoseAttention.yaml
# To evaluate the model without attention
$ python train.py --cfg ../experiments/001_MPII_ResNet.yaml
```

The performance of these models should be similar to the above
released pre-trained models.

## Train + test on the final test set

This is for getting the final number on MPII test set.

```bash
# Train on the train + val set
$ python train.py --cfg ../experiments/004_MPII_ResNet_withAttention_train+val.yaml
# Test on the test set
$ python eval.py --cfg ../experiments/004_MPII_ResNet_withAttention_train+val.yaml --save
# Convert the output into the MAT files as expected by MPII authors (requires matlab/octave)
$ cd ../utils;
$ bash convert_mpii_result_for_eval.sh ../src/expt_outputs/004_MPII_ResNet_withAttention_train+val.yaml/<filename.h5>
# Now the generated mat file can be emailed to MPII authors for test evaluation
```


================================================
FILE: experiments/001_MPII_ResNet.yaml
================================================
GPUS: '0,1,2,3'
NUM_READERS: 4
NUM_PREPROCESSING_THREADS: 12
MODEL_NAME: 'resnet_v1_101'
TRAIN:
  ITER_SIZE: 2
  LEARNING_RATE: 0.001
  BATCH_SIZE: 16
  FINAL_POSE_HMAP_SIDE: 15
  LEARNING_RATE_DECAY_RATE: 0.33
  NUM_STEPS_PER_DECAY: 5000
  MAX_NUMBER_OF_STEPS: 12000
  LOSS_FN_ACTION: softmax-xentropy
  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt
  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits
  LOSS_FN_ACTION: 'softmax-xentropy'
  LOSS_FN_POSE: ''
TEST:
  EVAL_METRIC: mAP
  BATCH_SIZE: 1


================================================
FILE: experiments/001_MPII_ResNet_pretrained.yaml
================================================
GPUS: '0,1,2,3'
NUM_READERS: 4
NUM_PREPROCESSING_THREADS: 12
MODEL_NAME: 'resnet_v1_101'
TRAIN:
  ITER_SIZE: 2
  LEARNING_RATE: 0.001
  BATCH_SIZE: 16
  FINAL_POSE_HMAP_SIDE: 15
  LEARNING_RATE_DECAY_RATE: 0.33
  NUM_STEPS_PER_DECAY: 5000
  MAX_NUMBER_OF_STEPS: 12000
  LOSS_FN_ACTION: softmax-xentropy
  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt
  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits
  LOSS_FN_ACTION: 'softmax-xentropy'
  LOSS_FN_POSE: ''
TEST:
  EVAL_METRIC: mAP
  BATCH_SIZE: 1
  CHECKPOINT_PATH: pretrained_models/mpii_baseline/model.ckpt-12000


================================================
FILE: experiments/002_MPII_ResNet_withAttention.yaml
================================================
GPUS: '0,1,2,3'
NUM_READERS: 4
NUM_PREPROCESSING_THREADS: 12
MODEL_NAME: 'resnet_v1_101'
NET:
  USE_POSE_PRELOGITS_BASED_ATTENTION: True
  USE_POSE_PRELOGITS_BASED_ATTENTION_SINGLE_LAYER_ATT: True
TRAIN:
  ITER_SIZE: 2
  LEARNING_RATE: 0.001
  BATCH_SIZE: 16
  FINAL_POSE_HMAP_SIDE: 15
  LEARNING_RATE_DECAY_RATE: 0.33
  NUM_STEPS_PER_DECAY: 5000
  MAX_NUMBER_OF_STEPS: 12000
  LOSS_FN_ACTION: softmax-xentropy
  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt
  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits
  LOSS_FN_ACTION: 'softmax-xentropy'
  LOSS_FN_POSE: ''
TEST:
  EVAL_METRIC: mAP
  BATCH_SIZE: 1


================================================
FILE: experiments/002_MPII_ResNet_withAttention_pretrained.yaml
================================================
GPUS: '0,1,2,3'
NUM_READERS: 4
NUM_PREPROCESSING_THREADS: 12
MODEL_NAME: 'resnet_v1_101'
NET:
  USE_POSE_PRELOGITS_BASED_ATTENTION: True
  USE_POSE_PRELOGITS_BASED_ATTENTION_SINGLE_LAYER_ATT: True
TRAIN:
  ITER_SIZE: 2
  LEARNING_RATE: 0.001
  BATCH_SIZE: 16
  FINAL_POSE_HMAP_SIDE: 15
  LEARNING_RATE_DECAY_RATE: 0.33
  NUM_STEPS_PER_DECAY: 5000
  MAX_NUMBER_OF_STEPS: 12000
  LOSS_FN_ACTION: softmax-xentropy
  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt
  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits
  LOSS_FN_ACTION: 'softmax-xentropy'
  LOSS_FN_POSE: ''
TEST:
  EVAL_METRIC: mAP
  BATCH_SIZE: 1
  CHECKPOINT_PATH: pretrained_models/mpii_attention/model.ckpt-12000


================================================
FILE: experiments/003_MPII_ResNet_withPoseAttention.yaml
================================================
GPUS: '0,1,2,3'
NUM_READERS: 4
NUM_PREPROCESSING_THREADS: 12
MODEL_NAME: 'resnet_v1_101'
HEATMAP_MARKER_WD_RATIO: 0.05
NET:
  USE_POSE_PRELOGITS_BASED_ATTENTION: True
TRAIN:
  ITER_SIZE: 2
  LEARNING_RATE: 0.001
  BATCH_SIZE: 16
  FINAL_POSE_HMAP_SIDE: 15
  LEARNING_RATE_DECAY_RATE: 0.33
  NUM_STEPS_PER_DECAY: 5000
  MAX_NUMBER_OF_STEPS: 12000
  LOSS_FN_ACTION: softmax-xentropy
  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt
  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits
  LOSS_FN_ACTION: 'softmax-xentropy'
  LOSS_FN_POSE: 'l2'
TEST:
  EVAL_METRIC: mAP
  BATCH_SIZE: 1


================================================
FILE: experiments/003_MPII_ResNet_withPoseAttention_pretrained.yaml
================================================
GPUS: '0,1,2,3'
NUM_READERS: 4
NUM_PREPROCESSING_THREADS: 12
MODEL_NAME: 'resnet_v1_101'
HEATMAP_MARKER_WD_RATIO: 0.05
NET:
  USE_POSE_PRELOGITS_BASED_ATTENTION: True
TRAIN:
  ITER_SIZE: 2
  LEARNING_RATE: 0.001
  BATCH_SIZE: 16
  FINAL_POSE_HMAP_SIDE: 15
  LEARNING_RATE_DECAY_RATE: 0.33
  NUM_STEPS_PER_DECAY: 5000
  MAX_NUMBER_OF_STEPS: 12000
  LOSS_FN_ACTION: softmax-xentropy
  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt
  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits
  LOSS_FN_ACTION: 'softmax-xentropy'
  LOSS_FN_POSE: 'l2'
TEST:
  EVAL_METRIC: mAP
  BATCH_SIZE: 1
  CHECKPOINT_PATH: pretrained_models/mpii_poseAttention/model.ckpt-12000


================================================
FILE: experiments/004_MPII_ResNet_withAttention_train+val.yaml
================================================
GPUS: '0,1,2,3'
NUM_READERS: 4
NUM_PREPROCESSING_THREADS: 12
MODEL_NAME: 'resnet_v1_101'
NET:
  USE_POSE_PRELOGITS_BASED_ATTENTION: True
  USE_POSE_PRELOGITS_BASED_ATTENTION_SINGLE_LAYER_ATT: True
TRAIN:
  ITER_SIZE: 2
  LEARNING_RATE: 0.001
  BATCH_SIZE: 16
  FINAL_POSE_HMAP_SIDE: 15
  LEARNING_RATE_DECAY_RATE: 0.33
  NUM_STEPS_PER_DECAY: 5000
  MAX_NUMBER_OF_STEPS: 12000
  LOSS_FN_ACTION: softmax-xentropy
  CHECKPOINT_PATH: data/pretrained_models/resnet_v1_101.ckpt
  CHECKPOINT_EXCLUDE_SCOPES: resnet_v1_101/logits
  LOSS_FN_ACTION: 'softmax-xentropy'
  LOSS_FN_POSE: ''
  DATASET_SPLIT_NAME: 'trainval'
TEST:
  DATASET_SPLIT_NAME: 'test'
  EVAL_METRIC: mAP
  BATCH_SIZE: 1


================================================
FILE: models/.github/ISSUE_TEMPLATE.md
================================================
## Please let us know which model this issue is about (specify the top-level directory)


================================================
FILE: models/.gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# IPython Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# dotenv
.env

# virtualenv
venv/
ENV/

# Spyder project settings
.spyderproject

# Rope project settings
.ropeproject

# editor
*.swp


================================================
FILE: models/.gitmodules
================================================
[submodule "tensorflow"]
	path = syntaxnet/tensorflow
	url = https://github.com/tensorflow/tensorflow.git


================================================
FILE: models/LICENSE
================================================
Copyright 2016 The TensorFlow Authors.  All rights reserved.

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright 2016, The Authors.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: models/slim/__init__.py
================================================


================================================
FILE: models/slim/datasets/__init__.py
================================================


================================================
FILE: models/slim/datasets/cifar10.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides data for the Cifar10 dataset.

The dataset scripts used to create the dataset can be found at:
tensorflow/models/slim/data/create_cifar10_dataset.py
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import tensorflow as tf

from datasets import dataset_utils

slim = tf.contrib.slim

_FILE_PATTERN = 'cifar10_%s.tfrecord'

SPLITS_TO_SIZES = {'train': 50000, 'test': 10000}

_NUM_CLASSES = 10

_ITEMS_TO_DESCRIPTIONS = {
    'image': 'A [32 x 32 x 3] color image.',
    'label': 'A single integer between 0 and 9',
}


def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
  """Gets a dataset tuple with instructions for reading cifar10.

  Args:
    split_name: A train/test split name.
    dataset_dir: The base directory of the dataset sources.
    file_pattern: The file pattern to use when matching the dataset sources.
      It is assumed that the pattern contains a '%s' string so that the split
      name can be inserted.
    reader: The TensorFlow reader type.

  Returns:
    A `Dataset` namedtuple.

  Raises:
    ValueError: if `split_name` is not a valid train/test split.
  """
  if split_name not in SPLITS_TO_SIZES:
    raise ValueError('split name %s was not recognized.' % split_name)

  if not file_pattern:
    file_pattern = _FILE_PATTERN
  file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

  # Allowing None in the signature so that dataset_factory can use the default.
  if not reader:
    reader = tf.TFRecordReader

  keys_to_features = {
      'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
      'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
      'image/class/label': tf.FixedLenFeature(
          [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
  }

  items_to_handlers = {
      'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]),
      'label': slim.tfexample_decoder.Tensor('image/class/label'),
  }

  decoder = slim.tfexample_decoder.TFExampleDecoder(
      keys_to_features, items_to_handlers)

  labels_to_names = None
  if dataset_utils.has_labels(dataset_dir):
    labels_to_names = dataset_utils.read_label_file(dataset_dir)

  return slim.dataset.Dataset(
      data_sources=file_pattern,
      reader=reader,
      decoder=decoder,
      num_samples=SPLITS_TO_SIZES[split_name],
      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
      num_classes=_NUM_CLASSES,
      labels_to_names=labels_to_names)


================================================
FILE: models/slim/datasets/dataset_factory.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A factory-pattern class which returns classification image/label pairs."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datasets import cifar10
from datasets import flowers
from datasets import imagenet
from datasets import mnist

datasets_map = {
    'cifar10': cifar10,
    'flowers': flowers,
    'imagenet': imagenet,
    'mnist': mnist,
}


def get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None):
  """Given a dataset name and a split_name returns a Dataset.

  Args:
    name: String, the name of the dataset.
    split_name: A train/test split name.
    dataset_dir: The directory where the dataset files are stored.
    file_pattern: The file pattern to use for matching the dataset source files.
    reader: The subclass of tf.ReaderBase. If left as `None`, then the default
      reader defined by each dataset is used.

  Returns:
    A `Dataset` class.

  Raises:
    ValueError: If the dataset `name` is unknown.
  """
  if name not in datasets_map:
    raise ValueError('Name of dataset unknown %s' % name)
  return datasets_map[name].get_split(
      split_name,
      dataset_dir,
      file_pattern,
      reader)


================================================
FILE: models/slim/datasets/dataset_utils.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains utilities for downloading and converting datasets."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import sys
import tarfile

from six.moves import urllib
import tensorflow as tf

LABELS_FILENAME = 'labels.txt'


def int64_feature(values):
  """Returns a TF-Feature of int64s.

  Args:
    values: A scalar or list of values.

  Returns:
    a TF-Feature.
  """
  if not isinstance(values, (tuple, list)):
    values = [values]
  return tf.train.Feature(int64_list=tf.train.Int64List(value=values))


def bytes_feature(values):
  """Returns a TF-Feature of bytes.

  Args:
    values: A string.

  Returns:
    a TF-Feature.
  """
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))


def image_to_tfexample(image_data, image_format, height, width, class_id):
  return tf.train.Example(features=tf.train.Features(feature={
      'image/encoded': bytes_feature(image_data),
      'image/format': bytes_feature(image_format),
      'image/class/label': int64_feature(class_id),
      'image/height': int64_feature(height),
      'image/width': int64_feature(width),
  }))


def download_and_uncompress_tarball(tarball_url, dataset_dir):
  """Downloads the `tarball_url` and uncompresses it locally.

  Args:
    tarball_url: The URL of a tarball file.
    dataset_dir: The directory where the temporary files are stored.
  """
  filename = tarball_url.split('/')[-1]
  filepath = os.path.join(dataset_dir, filename)

  def _progress(count, block_size, total_size):
    sys.stdout.write('\r>> Downloading %s %.1f%%' % (
        filename, float(count * block_size) / float(total_size) * 100.0))
    sys.stdout.flush()
  filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress)
  print()
  statinfo = os.stat(filepath)
  print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
  tarfile.open(filepath, 'r:gz').extractall(dataset_dir)


def write_label_file(labels_to_class_names, dataset_dir,
                     filename=LABELS_FILENAME):
  """Writes a file with the list of class names.

  Args:
    labels_to_class_names: A map of (integer) labels to class names.
    dataset_dir: The directory in which the labels file should be written.
    filename: The filename where the class names are written.
  """
  labels_filename = os.path.join(dataset_dir, filename)
  with tf.gfile.Open(labels_filename, 'w') as f:
    for label in labels_to_class_names:
      class_name = labels_to_class_names[label]
      f.write('%d:%s\n' % (label, class_name))


def has_labels(dataset_dir, filename=LABELS_FILENAME):
  """Specifies whether or not the dataset directory contains a label map file.

  Args:
    dataset_dir: The directory in which the labels file is found.
    filename: The filename where the class names are written.

  Returns:
    `True` if the labels file exists and `False` otherwise.
  """
  return tf.gfile.Exists(os.path.join(dataset_dir, filename))


def read_label_file(dataset_dir, filename=LABELS_FILENAME):
  """Reads the labels file and returns a mapping from ID to class name.

  Args:
    dataset_dir: The directory in which the labels file is found.
    filename: The filename where the class names are written.

  Returns:
    A map from a label (integer) to class name.
  """
  labels_filename = os.path.join(dataset_dir, filename)
  with tf.gfile.Open(labels_filename, 'r') as f:
    lines = f.read().decode()
  lines = lines.split('\n')
  lines = filter(None, lines)

  labels_to_class_names = {}
  for line in lines:
    index = line.index(':')
    labels_to_class_names[int(line[:index])] = line[index+1:]
  return labels_to_class_names


================================================
FILE: models/slim/datasets/download_and_convert_cifar10.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Downloads and converts cifar10 data to TFRecords of TF-Example protos.

This module downloads the cifar10 data, uncompresses it, reads the files
that make up the cifar10 data and creates two TFRecord datasets: one for train
and one for test. Each TFRecord dataset is comprised of a set of TF-Example
protocol buffers, each of which contain a single image and label.

The script should take several minutes to run.

"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import cPickle
import os
import sys
import tarfile

import numpy as np
from six.moves import urllib
import tensorflow as tf

from datasets import dataset_utils

# The URL where the CIFAR data can be downloaded.
_DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'

# The number of training files.
_NUM_TRAIN_FILES = 5

# The height and width of each image.
_IMAGE_SIZE = 32

# The names of the classes.
_CLASS_NAMES = [
    'airplane',
    'automobile',
    'bird',
    'cat',
    'deer',
    'dog',
    'frog',
    'horse',
    'ship',
    'truck',
]


def _add_to_tfrecord(filename, tfrecord_writer, offset=0):
  """Loads data from the cifar10 pickle files and writes files to a TFRecord.

  Args:
    filename: The filename of the cifar10 pickle file.
    tfrecord_writer: The TFRecord writer to use for writing.
    offset: An offset into the absolute number of images previously written.

  Returns:
    The new offset.
  """
  with tf.gfile.Open(filename, 'r') as f:
    data = cPickle.load(f)

  images = data['data']
  num_images = images.shape[0]

  images = images.reshape((num_images, 3, 32, 32))
  labels = data['labels']

  with tf.Graph().as_default():
    image_placeholder = tf.placeholder(dtype=tf.uint8)
    encoded_image = tf.image.encode_png(image_placeholder)

    with tf.Session('') as sess:

      for j in range(num_images):
        sys.stdout.write('\r>> Reading file [%s] image %d/%d' % (
            filename, offset + j + 1, offset + num_images))
        sys.stdout.flush()

        image = np.squeeze(images[j]).transpose((1, 2, 0))
        label = labels[j]

        png_string = sess.run(encoded_image,
                              feed_dict={image_placeholder: image})

        example = dataset_utils.image_to_tfexample(
            png_string, 'png', _IMAGE_SIZE, _IMAGE_SIZE, label)
        tfrecord_writer.write(example.SerializeToString())

  return offset + num_images


def _get_output_filename(dataset_dir, split_name):
  """Creates the output filename.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
    split_name: The name of the train/test split.

  Returns:
    An absolute file path.
  """
  return '%s/cifar10_%s.tfrecord' % (dataset_dir, split_name)


def _download_and_uncompress_dataset(dataset_dir):
  """Downloads cifar10 and uncompresses it locally.

  Args:
    dataset_dir: The directory where the temporary files are stored.
  """
  filename = _DATA_URL.split('/')[-1]
  filepath = os.path.join(dataset_dir, filename)

  if not os.path.exists(filepath):
    def _progress(count, block_size, total_size):
      sys.stdout.write('\r>> Downloading %s %.1f%%' % (
          filename, float(count * block_size) / float(total_size) * 100.0))
      sys.stdout.flush()
    filepath, _ = urllib.request.urlretrieve(_DATA_URL, filepath, _progress)
    print()
    statinfo = os.stat(filepath)
    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
    tarfile.open(filepath, 'r:gz').extractall(dataset_dir)


def _clean_up_temporary_files(dataset_dir):
  """Removes temporary files used to create the dataset.

  Args:
    dataset_dir: The directory where the temporary files are stored.
  """
  filename = _DATA_URL.split('/')[-1]
  filepath = os.path.join(dataset_dir, filename)
  tf.gfile.Remove(filepath)

  tmp_dir = os.path.join(dataset_dir, 'cifar-10-batches-py')
  tf.gfile.DeleteRecursively(tmp_dir)


def run(dataset_dir):
  """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """
  if not tf.gfile.Exists(dataset_dir):
    tf.gfile.MakeDirs(dataset_dir)

  training_filename = _get_output_filename(dataset_dir, 'train')
  testing_filename = _get_output_filename(dataset_dir, 'test')

  if tf.gfile.Exists(training_filename) and tf.gfile.Exists(testing_filename):
    print('Dataset files already exist. Exiting without re-creating them.')
    return

  dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)

  # First, process the training data:
  with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer:
    offset = 0
    for i in range(_NUM_TRAIN_FILES):
      filename = os.path.join(dataset_dir,
                              'cifar-10-batches-py',
                              'data_batch_%d' % (i + 1))  # 1-indexed.
      offset = _add_to_tfrecord(filename, tfrecord_writer, offset)

  # Next, process the testing data:
  with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:
    filename = os.path.join(dataset_dir,
                            'cifar-10-batches-py',
                            'test_batch')
    _add_to_tfrecord(filename, tfrecord_writer)

  # Finally, write the labels file:
  labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
  dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

  _clean_up_temporary_files(dataset_dir)
  print('\nFinished converting the Cifar10 dataset!')


================================================
FILE: models/slim/datasets/download_and_convert_flowers.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Downloads and converts Flowers data to TFRecords of TF-Example protos.

This module downloads the Flowers data, uncompresses it, reads the files
that make up the Flowers data and creates two TFRecord datasets: one for train
and one for test. Each TFRecord dataset is comprised of a set of TF-Example
protocol buffers, each of which contain a single image and label.

The script should take about a minute to run.

"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
import os
import random
import sys

import tensorflow as tf

from datasets import dataset_utils

# The URL where the Flowers data can be downloaded.
_DATA_URL = 'http://download.tensorflow.org/example_images/flower_photos.tgz'

# The number of images in the validation set.
_NUM_VALIDATION = 350

# Seed for repeatability.
_RANDOM_SEED = 0

# The number of shards per dataset split.
_NUM_SHARDS = 5


class ImageReader(object):
  """Helper class that provides TensorFlow image coding utilities."""

  def __init__(self):
    # Initializes function that decodes RGB JPEG data.
    self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
    self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)

  def read_image_dims(self, sess, image_data):
    image = self.decode_jpeg(sess, image_data)
    return image.shape[0], image.shape[1]

  def decode_jpeg(self, sess, image_data):
    image = sess.run(self._decode_jpeg,
                     feed_dict={self._decode_jpeg_data: image_data})
    assert len(image.shape) == 3
    assert image.shape[2] == 3
    return image


def _get_filenames_and_classes(dataset_dir):
  """Returns a list of filenames and inferred class names.

  Args:
    dataset_dir: A directory containing a set of subdirectories representing
      class names. Each subdirectory should contain PNG or JPG encoded images.

  Returns:
    A list of image file paths, relative to `dataset_dir` and the list of
    subdirectories, representing class names.
  """
  flower_root = os.path.join(dataset_dir, 'flower_photos')
  directories = []
  class_names = []
  for filename in os.listdir(flower_root):
    path = os.path.join(flower_root, filename)
    if os.path.isdir(path):
      directories.append(path)
      class_names.append(filename)

  photo_filenames = []
  for directory in directories:
    for filename in os.listdir(directory):
      path = os.path.join(directory, filename)
      photo_filenames.append(path)

  return photo_filenames, sorted(class_names)


def _get_dataset_filename(dataset_dir, split_name, shard_id):
  output_filename = 'flowers_%s_%05d-of-%05d.tfrecord' % (
      split_name, shard_id, _NUM_SHARDS)
  return os.path.join(dataset_dir, output_filename)


def _convert_dataset(split_name, filenames, class_names_to_ids, dataset_dir):
  """Converts the given filenames to a TFRecord dataset.

  Args:
    split_name: The name of the dataset, either 'train' or 'validation'.
    filenames: A list of absolute paths to png or jpg images.
    class_names_to_ids: A dictionary from class names (strings) to ids
      (integers).
    dataset_dir: The directory where the converted datasets are stored.
  """
  assert split_name in ['train', 'validation']

  num_per_shard = int(math.ceil(len(filenames) / float(_NUM_SHARDS)))

  with tf.Graph().as_default():
    image_reader = ImageReader()

    with tf.Session('') as sess:

      for shard_id in range(_NUM_SHARDS):
        output_filename = _get_dataset_filename(
            dataset_dir, split_name, shard_id)

        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
          start_ndx = shard_id * num_per_shard
          end_ndx = min((shard_id+1) * num_per_shard, len(filenames))
          for i in range(start_ndx, end_ndx):
            sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
                i+1, len(filenames), shard_id))
            sys.stdout.flush()

            # Read the filename:
            image_data = tf.gfile.FastGFile(filenames[i], 'r').read()
            height, width = image_reader.read_image_dims(sess, image_data)

            class_name = os.path.basename(os.path.dirname(filenames[i]))
            class_id = class_names_to_ids[class_name]

            example = dataset_utils.image_to_tfexample(
                image_data, 'jpg', height, width, class_id)
            tfrecord_writer.write(example.SerializeToString())

  sys.stdout.write('\n')
  sys.stdout.flush()


def _clean_up_temporary_files(dataset_dir):
  """Removes temporary files used to create the dataset.

  Args:
    dataset_dir: The directory where the temporary files are stored.
  """
  filename = _DATA_URL.split('/')[-1]
  filepath = os.path.join(dataset_dir, filename)
  tf.gfile.Remove(filepath)

  tmp_dir = os.path.join(dataset_dir, 'flower_photos')
  tf.gfile.DeleteRecursively(tmp_dir)


def _dataset_exists(dataset_dir):
  for split_name in ['train', 'validation']:
    for shard_id in range(_NUM_SHARDS):
      output_filename = _get_dataset_filename(
          dataset_dir, split_name, shard_id)
      if not tf.gfile.Exists(output_filename):
        return False
  return True


def run(dataset_dir):
  """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """
  if not tf.gfile.Exists(dataset_dir):
    tf.gfile.MakeDirs(dataset_dir)

  if _dataset_exists(dataset_dir):
    print('Dataset files already exist. Exiting without re-creating them.')
    return

  dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir)
  photo_filenames, class_names = _get_filenames_and_classes(dataset_dir)
  class_names_to_ids = dict(zip(class_names, range(len(class_names))))

  # Divide into train and test:
  random.seed(_RANDOM_SEED)
  random.shuffle(photo_filenames)
  training_filenames = photo_filenames[_NUM_VALIDATION:]
  validation_filenames = photo_filenames[:_NUM_VALIDATION]

  # First, convert the training and validation sets.
  _convert_dataset('train', training_filenames, class_names_to_ids,
                   dataset_dir)
  _convert_dataset('validation', validation_filenames, class_names_to_ids,
                   dataset_dir)

  # Finally, write the labels file:
  labels_to_class_names = dict(zip(range(len(class_names)), class_names))
  dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

  _clean_up_temporary_files(dataset_dir)
  print('\nFinished converting the Flowers dataset!')


================================================
FILE: models/slim/datasets/download_and_convert_mnist.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Downloads and converts MNIST data to TFRecords of TF-Example protos.

This module downloads the MNIST data, uncompresses it, reads the files
that make up the MNIST data and creates two TFRecord datasets: one for train
and one for test. Each TFRecord dataset is comprised of a set of TF-Example
protocol buffers, each of which contain a single image and label.

The script should take about a minute to run.

"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import gzip
import os
import sys

import numpy as np
from six.moves import urllib
import tensorflow as tf

from datasets import dataset_utils

# The URLs where the MNIST data can be downloaded.
_DATA_URL = 'http://yann.lecun.com/exdb/mnist/'
_TRAIN_DATA_FILENAME = 'train-images-idx3-ubyte.gz'
_TRAIN_LABELS_FILENAME = 'train-labels-idx1-ubyte.gz'
_TEST_DATA_FILENAME = 't10k-images-idx3-ubyte.gz'
_TEST_LABELS_FILENAME = 't10k-labels-idx1-ubyte.gz'

_IMAGE_SIZE = 28
_NUM_CHANNELS = 1

# The names of the classes.
_CLASS_NAMES = [
    'zero',
    'one',
    'two',
    'three',
    'four',
    'five',
    'size',
    'seven',
    'eight',
    'nine',
]


def _extract_images(filename, num_images):
  """Extract the images into a numpy array.

  Args:
    filename: The path to an MNIST images file.
    num_images: The number of images in the file.

  Returns:
    A numpy array of shape [number_of_images, height, width, channels].
  """
  print('Extracting images from: ', filename)
  with gzip.open(filename) as bytestream:
    bytestream.read(16)
    buf = bytestream.read(
        _IMAGE_SIZE * _IMAGE_SIZE * num_images * _NUM_CHANNELS)
    data = np.frombuffer(buf, dtype=np.uint8)
    data = data.reshape(num_images, _IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)
  return data


def _extract_labels(filename, num_labels):
  """Extract the labels into a vector of int64 label IDs.

  Args:
    filename: The path to an MNIST labels file.
    num_labels: The number of labels in the file.

  Returns:
    A numpy array of shape [number_of_labels]
  """
  print('Extracting labels from: ', filename)
  with gzip.open(filename) as bytestream:
    bytestream.read(8)
    buf = bytestream.read(1 * num_labels)
    labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
  return labels


def _add_to_tfrecord(data_filename, labels_filename, num_images,
                     tfrecord_writer):
  """Loads data from the binary MNIST files and writes files to a TFRecord.

  Args:
    data_filename: The filename of the MNIST images.
    labels_filename: The filename of the MNIST labels.
    num_images: The number of images in the dataset.
    tfrecord_writer: The TFRecord writer to use for writing.
  """
  images = _extract_images(data_filename, num_images)
  labels = _extract_labels(labels_filename, num_images)

  shape = (_IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)
  with tf.Graph().as_default():
    image = tf.placeholder(dtype=tf.uint8, shape=shape)
    encoded_png = tf.image.encode_png(image)

    with tf.Session('') as sess:
      for j in range(num_images):
        sys.stdout.write('\r>> Converting image %d/%d' % (j + 1, num_images))
        sys.stdout.flush()

        png_string = sess.run(encoded_png, feed_dict={image: images[j]})

        example = dataset_utils.image_to_tfexample(
            png_string, 'png', _IMAGE_SIZE, _IMAGE_SIZE, labels[j])
        tfrecord_writer.write(example.SerializeToString())


def _get_output_filename(dataset_dir, split_name):
  """Creates the output filename.

  Args:
    dataset_dir: The directory where the temporary files are stored.
    split_name: The name of the train/test split.

  Returns:
    An absolute file path.
  """
  return '%s/mnist_%s.tfrecord' % (dataset_dir, split_name)


def _download_dataset(dataset_dir):
  """Downloads MNIST locally.

  Args:
    dataset_dir: The directory where the temporary files are stored.
  """
  for filename in [_TRAIN_DATA_FILENAME,
                   _TRAIN_LABELS_FILENAME,
                   _TEST_DATA_FILENAME,
                   _TEST_LABELS_FILENAME]:
    filepath = os.path.join(dataset_dir, filename)

    if not os.path.exists(filepath):
      print('Downloading file %s...' % filename)
      def _progress(count, block_size, total_size):
        sys.stdout.write('\r>> Downloading %.1f%%' % (
            float(count * block_size) / float(total_size) * 100.0))
        sys.stdout.flush()
      filepath, _ = urllib.request.urlretrieve(_DATA_URL + filename,
                                               filepath,
                                               _progress)
      print()
      with tf.gfile.GFile(filepath) as f:
        size = f.Size()
      print('Successfully downloaded', filename, size, 'bytes.')


def _clean_up_temporary_files(dataset_dir):
  """Removes temporary files used to create the dataset.

  Args:
    dataset_dir: The directory where the temporary files are stored.
  """
  for filename in [_TRAIN_DATA_FILENAME,
                   _TRAIN_LABELS_FILENAME,
                   _TEST_DATA_FILENAME,
                   _TEST_LABELS_FILENAME]:
    filepath = os.path.join(dataset_dir, filename)
    tf.gfile.Remove(filepath)


def run(dataset_dir):
  """Runs the download and conversion operation.

  Args:
    dataset_dir: The dataset directory where the dataset is stored.
  """
  if not tf.gfile.Exists(dataset_dir):
    tf.gfile.MakeDirs(dataset_dir)

  training_filename = _get_output_filename(dataset_dir, 'train')
  testing_filename = _get_output_filename(dataset_dir, 'test')

  if tf.gfile.Exists(training_filename) and tf.gfile.Exists(testing_filename):
    print('Dataset files already exist. Exiting without re-creating them.')
    return

  _download_dataset(dataset_dir)

  # First, process the training data:
  with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer:
    data_filename = os.path.join(dataset_dir, _TRAIN_DATA_FILENAME)
    labels_filename = os.path.join(dataset_dir, _TRAIN_LABELS_FILENAME)
    _add_to_tfrecord(data_filename, labels_filename, 60000, tfrecord_writer)

  # Next, process the testing data:
  with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer:
    data_filename = os.path.join(dataset_dir, _TEST_DATA_FILENAME)
    labels_filename = os.path.join(dataset_dir, _TEST_LABELS_FILENAME)
    _add_to_tfrecord(data_filename, labels_filename, 10000, tfrecord_writer)

  # Finally, write the labels file:
  labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES))
  dataset_utils.write_label_file(labels_to_class_names, dataset_dir)

  _clean_up_temporary_files(dataset_dir)
  print('\nFinished converting the MNIST dataset!')


================================================
FILE: models/slim/datasets/flowers.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides data for the flowers dataset.

The dataset scripts used to create the dataset can be found at:
tensorflow/models/slim/datasets/download_and_convert_flowers.py
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import tensorflow as tf

from datasets import dataset_utils

slim = tf.contrib.slim

_FILE_PATTERN = 'flowers_%s_*.tfrecord'

SPLITS_TO_SIZES = {'train': 3320, 'validation': 350}

_NUM_CLASSES = 5

_ITEMS_TO_DESCRIPTIONS = {
    'image': 'A color image of varying size.',
    'label': 'A single integer between 0 and 4',
}


def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
  """Gets a dataset tuple with instructions for reading flowers.

  Args:
    split_name: A train/validation split name.
    dataset_dir: The base directory of the dataset sources.
    file_pattern: The file pattern to use when matching the dataset sources.
      It is assumed that the pattern contains a '%s' string so that the split
      name can be inserted.
    reader: The TensorFlow reader type.

  Returns:
    A `Dataset` namedtuple.

  Raises:
    ValueError: if `split_name` is not a valid train/validation split.
  """
  if split_name not in SPLITS_TO_SIZES:
    raise ValueError('split name %s was not recognized.' % split_name)

  if not file_pattern:
    file_pattern = _FILE_PATTERN
  file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

  # Allowing None in the signature so that dataset_factory can use the default.
  if reader is None:
    reader = tf.TFRecordReader

  keys_to_features = {
      'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
      'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
      'image/class/label': tf.FixedLenFeature(
          [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
  }

  items_to_handlers = {
      'image': slim.tfexample_decoder.Image(),
      'label': slim.tfexample_decoder.Tensor('image/class/label'),
  }

  decoder = slim.tfexample_decoder.TFExampleDecoder(
      keys_to_features, items_to_handlers)

  labels_to_names = None
  if dataset_utils.has_labels(dataset_dir):
    labels_to_names = dataset_utils.read_label_file(dataset_dir)

  return slim.dataset.Dataset(
      data_sources=file_pattern,
      reader=reader,
      decoder=decoder,
      num_samples=SPLITS_TO_SIZES[split_name],
      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
      num_classes=_NUM_CLASSES,
      labels_to_names=labels_to_names)


================================================
FILE: models/slim/datasets/imagenet.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides data for the ImageNet ILSVRC 2012 Dataset plus some bounding boxes.

Some images have one or more bounding boxes associated with the label of the
image. See details here: http://image-net.org/download-bboxes

ImageNet is based upon WordNet 3.0. To uniquely identify a synset, we use
"WordNet ID" (wnid), which is a concatenation of POS ( i.e. part of speech )
and SYNSET OFFSET of WordNet. For more information, please refer to the
WordNet documentation[http://wordnet.princeton.edu/wordnet/documentation/].

"There are bounding boxes for over 3000 popular synsets available.
For each synset, there are on average 150 images with bounding boxes."

WARNING: Don't use for object detection, in this case all the bounding boxes
of the image belong to just one class.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
from six.moves import urllib
import tensorflow as tf

from datasets import dataset_utils

slim = tf.contrib.slim

# TODO(nsilberman): Add tfrecord file type once the script is updated.
_FILE_PATTERN = '%s-*'

_SPLITS_TO_SIZES = {
    'train': 1281167,
    'validation': 50000,
}

_ITEMS_TO_DESCRIPTIONS = {
    'image': 'A color image of varying height and width.',
    'label': 'The label id of the image, integer between 0 and 999',
    'label_text': 'The text of the label.',
    'object/bbox': 'A list of bounding boxes.',
    'object/label': 'A list of labels, one per each object.',
}

_NUM_CLASSES = 1001


def create_readable_names_for_imagenet_labels():
  """Create a dict mapping label id to human readable string.

  Returns:
      labels_to_names: dictionary where keys are integers from to 1000
      and values are human-readable names.

  We retrieve a synset file, which contains a list of valid synset labels used
  by ILSVRC competition. There is one synset one per line, eg.
          #   n01440764
          #   n01443537
  We also retrieve a synset_to_human_file, which contains a mapping from synsets
  to human-readable names for every synset in Imagenet. These are stored in a
  tsv format, as follows:
          #   n02119247    black fox
          #   n02119359    silver fox
  We assign each synset (in alphabetical order) an integer, starting from 1
  (since 0 is reserved for the background class).

  Code is based on
  https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py#L463
  """

  # pylint: disable=g-line-too-long
  base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/'
  synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url)
  synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url)

  filename, _ = urllib.request.urlretrieve(synset_url)
  synset_list = [s.strip() for s in open(filename).readlines()]
  num_synsets_in_ilsvrc = len(synset_list)
  assert num_synsets_in_ilsvrc == 1000

  filename, _ = urllib.request.urlretrieve(synset_to_human_url)
  synset_to_human_list = open(filename).readlines()
  num_synsets_in_all_imagenet = len(synset_to_human_list)
  assert num_synsets_in_all_imagenet == 21842

  synset_to_human = {}
  for s in synset_to_human_list:
    parts = s.strip().split('\t')
    assert len(parts) == 2
    synset = parts[0]
    human = parts[1]
    synset_to_human[synset] = human

  label_index = 1
  labels_to_names = {0: 'background'}
  for synset in synset_list:
    name = synset_to_human[synset]
    labels_to_names[label_index] = name
    label_index += 1

  return labels_to_names


def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
  """Gets a dataset tuple with instructions for reading ImageNet.

  Args:
    split_name: A train/test split name.
    dataset_dir: The base directory of the dataset sources.
    file_pattern: The file pattern to use when matching the dataset sources.
      It is assumed that the pattern contains a '%s' string so that the split
      name can be inserted.
    reader: The TensorFlow reader type.

  Returns:
    A `Dataset` namedtuple.

  Raises:
    ValueError: if `split_name` is not a valid train/test split.
  """
  if split_name not in _SPLITS_TO_SIZES:
    raise ValueError('split name %s was not recognized.' % split_name)

  if not file_pattern:
    file_pattern = _FILE_PATTERN
  file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

  # Allowing None in the signature so that dataset_factory can use the default.
  if reader is None:
    reader = tf.TFRecordReader

  keys_to_features = {
      'image/encoded': tf.FixedLenFeature(
          (), tf.string, default_value=''),
      'image/format': tf.FixedLenFeature(
          (), tf.string, default_value='jpeg'),
      'image/class/label': tf.FixedLenFeature(
          [], dtype=tf.int64, default_value=-1),
      'image/class/text': tf.FixedLenFeature(
          [], dtype=tf.string, default_value=''),
      'image/object/bbox/xmin': tf.VarLenFeature(
          dtype=tf.float32),
      'image/object/bbox/ymin': tf.VarLenFeature(
          dtype=tf.float32),
      'image/object/bbox/xmax': tf.VarLenFeature(
          dtype=tf.float32),
      'image/object/bbox/ymax': tf.VarLenFeature(
          dtype=tf.float32),
      'image/object/class/label': tf.VarLenFeature(
          dtype=tf.int64),
  }

  items_to_handlers = {
      'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
      'label': slim.tfexample_decoder.Tensor('image/class/label'),
      'label_text': slim.tfexample_decoder.Tensor('image/class/text'),
      'object/bbox': slim.tfexample_decoder.BoundingBox(
          ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
      'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'),
  }

  decoder = slim.tfexample_decoder.TFExampleDecoder(
      keys_to_features, items_to_handlers)

  labels_to_names = None
  if dataset_utils.has_labels(dataset_dir):
    labels_to_names = dataset_utils.read_label_file(dataset_dir)
  else:
    labels_to_names = create_readable_names_for_imagenet_labels()
    dataset_utils.write_label_file(labels_to_names, dataset_dir)

  return slim.dataset.Dataset(
      data_sources=file_pattern,
      reader=reader,
      decoder=decoder,
      num_samples=_SPLITS_TO_SIZES[split_name],
      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
      num_classes=_NUM_CLASSES,
      labels_to_names=labels_to_names)


================================================
FILE: models/slim/datasets/mnist.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides data for the MNIST dataset.

The dataset scripts used to create the dataset can be found at:
tensorflow/models/slim/data/create_mnist_dataset.py
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import tensorflow as tf

from datasets import dataset_utils

slim = tf.contrib.slim

_FILE_PATTERN = 'mnist_%s.tfrecord'

_SPLITS_TO_SIZES = {'train': 60000, 'test': 10000}

_NUM_CLASSES = 10

_ITEMS_TO_DESCRIPTIONS = {
    'image': 'A [28 x 28 x 1] grayscale image.',
    'label': 'A single integer between 0 and 9',
}


def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
  """Gets a dataset tuple with instructions for reading MNIST.

  Args:
    split_name: A train/test split name.
    dataset_dir: The base directory of the dataset sources.
    file_pattern: The file pattern to use when matching the dataset sources.
      It is assumed that the pattern contains a '%s' string so that the split
      name can be inserted.
    reader: The TensorFlow reader type.

  Returns:
    A `Dataset` namedtuple.

  Raises:
    ValueError: if `split_name` is not a valid train/test split.
  """
  if split_name not in _SPLITS_TO_SIZES:
    raise ValueError('split name %s was not recognized.' % split_name)

  if not file_pattern:
    file_pattern = _FILE_PATTERN
  file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

  # Allowing None in the signature so that dataset_factory can use the default.
  if reader is None:
    reader = tf.TFRecordReader

  keys_to_features = {
      'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
      'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'),
      'image/class/label': tf.FixedLenFeature(
          [1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)),
  }

  items_to_handlers = {
      'image': slim.tfexample_decoder.Image(shape=[28, 28, 1], channels=1),
      'label': slim.tfexample_decoder.Tensor('image/class/label', shape=[]),
  }

  decoder = slim.tfexample_decoder.TFExampleDecoder(
      keys_to_features, items_to_handlers)

  labels_to_names = None
  if dataset_utils.has_labels(dataset_dir):
    labels_to_names = dataset_utils.read_label_file(dataset_dir)

  return slim.dataset.Dataset(
      data_sources=file_pattern,
      reader=reader,
      decoder=decoder,
      num_samples=_SPLITS_TO_SIZES[split_name],
      num_classes=_NUM_CLASSES,
      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
      labels_to_names=labels_to_names)


================================================
FILE: models/slim/deployment/__init__.py
================================================


================================================
FILE: models/slim/deployment/model_deploy.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Deploy Slim models across multiple clones and replicas.

# TODO(sguada) docstring paragraph by (a) motivating the need for the file and
# (b) defining clones.

# TODO(sguada) describe the high-level components of model deployment.
# E.g. "each model deployment is composed of several parts: a DeploymentConfig,
# which captures A, B and C, an input_fn which loads data.. etc

To easily train a model on multiple GPUs or across multiple machines this
module provides a set of helper functions: `create_clones`,
`optimize_clones` and `deploy`.

Usage:

  g = tf.Graph()

  # Set up DeploymentConfig
  config = model_deploy.DeploymentConfig(num_clones=2, clone_on_cpu=True)

  # Create the global step on the device storing the variables.
  with tf.device(config.variables_device()):
    global_step = slim.create_global_step()

  # Define the inputs
  with tf.device(config.inputs_device()):
    images, labels = LoadData(...)
    inputs_queue = slim.data.prefetch_queue((images, labels))

  # Define the optimizer.
  with tf.device(config.optimizer_device()):
    optimizer = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum)

  # Define the model including the loss.
  def model_fn(inputs_queue):
    images, labels = inputs_queue.dequeue()
    predictions = CreateNetwork(images)
    slim.losses.log_loss(predictions, labels)

  model_dp = model_deploy.deploy(config, model_fn, [inputs_queue],
                                 optimizer=optimizer)

  # Run training.
  slim.learning.train(model_dp.train_op, my_log_dir,
                      summary_op=model_dp.summary_op)

The Clone namedtuple holds together the values associated with each call to
model_fn:
  * outputs: The return values of the calls to `model_fn()`.
  * scope: The scope used to create the clone.
  * device: The device used to create the clone.

DeployedModel namedtuple, holds together the values needed to train multiple
clones:
  * train_op: An operation that run the optimizer training op and include
    all the update ops created by `model_fn`. Present only if an optimizer
    was specified.
  * summary_op: An operation that run the summaries created by `model_fn`
    and process_gradients.
  * total_loss: A `Tensor` that contains the sum of all losses created by
    `model_fn` plus the regularization losses.
  * clones: List of `Clone` tuples returned by `create_clones()`.

DeploymentConfig parameters:
  * num_clones: Number of model clones to deploy in each replica.
  * clone_on_cpu: True if clones should be placed on CPU.
  * replica_id: Integer.  Index of the replica for which the model is
      deployed.  Usually 0 for the chief replica.
  * num_replicas: Number of replicas to use.
  * num_ps_tasks: Number of tasks for the `ps` job. 0 to not use replicas.
  * worker_job_name: A name for the worker job.
  * ps_job_name: A name for the parameter server job.

TODO(sguada):
  - describe side effect to the graph.
  - what happens to summaries and update_ops.
  - which graph collections are altered.
  - write a tutorial on how to use this.
  - analyze the possibility of calling deploy more than once.


"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections

import tensorflow as tf

from tensorflow.python.ops import control_flow_ops

slim = tf.contrib.slim


__all__ = ['create_clones',
           'deploy',
           'optimize_clones',
           'DeployedModel',
           'DeploymentConfig',
           'Clone',
          ]


# Namedtuple used to represent a clone during deployment.
Clone = collections.namedtuple('Clone',
                               ['outputs',  # Whatever model_fn() returned.
                                'scope',  # The scope used to create it.
                                'device',  # The device used to create.
                               ])

# Namedtuple used to represent a DeployedModel, returned by deploy().
DeployedModel = collections.namedtuple('DeployedModel',
                                       ['train_op',  # The `train_op`
                                        'summary_op',  # The `summary_op`
                                        'total_loss',  # The loss `Tensor`
                                        'clones',  # A list of `Clones` tuples.
                                       ])

# Default parameters for DeploymentConfig
_deployment_params = {'num_clones': 1,
                      'clone_on_cpu': False,
                      'replica_id': 0,
                      'num_replicas': 1,
                      'num_ps_tasks': 0,
                      'worker_job_name': 'worker',
                      'ps_job_name': 'ps'}


def create_clones(config, model_fn, args=None, kwargs=None):
  """Creates multiple clones according to config using a `model_fn`.

  The returned values of `model_fn(*args, **kwargs)` are collected along with
  the scope and device used to created it in a namedtuple
  `Clone(outputs, scope, device)`

  Note: it is assumed that any loss created by `model_fn` is collected at
  the tf.GraphKeys.LOSSES collection.

  To recover the losses, summaries or update_ops created by the clone use:
  ```python
    losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope)
    summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, clone.scope)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope)
  ```

  The deployment options are specified by the config object and support
  deploying one or several clones on different GPUs and one or several replicas
  of such clones.

  The argument `model_fn` is called `config.num_clones` times to create the
  model clones as `model_fn(*args, **kwargs)`.

  If `config` specifies deployment on multiple replicas then the default
  tensorflow device is set appropriatly for each call to `model_fn` and for the
  slim variable creation functions: model and global variables will be created
  on the `ps` device, the clone operations will be on the `worker` device.

  Args:
    config: A DeploymentConfig object.
    model_fn: A callable. Called as `model_fn(*args, **kwargs)`
    args: Optional list of arguments to pass to `model_fn`.
    kwargs: Optional list of keyword arguments to pass to `model_fn`.

  Returns:
    A list of namedtuples `Clone`.
  """
  clones = []
  args = args or []
  kwargs = kwargs or {}
  with slim.arg_scope([slim.model_variable, slim.variable],
                      device=config.variables_device()):
    # Create clones.
    for i in range(0, config.num_clones):
      with tf.name_scope(config.clone_scope(i)) as clone_scope:
        clone_device = config.clone_device(i)
        with tf.device(clone_device):
          with tf.variable_scope(tf.get_variable_scope(),
                                 reuse=True if i > 0 else None):
            outputs = model_fn(*args, **kwargs)
          clones.append(Clone(outputs, clone_scope, clone_device))
  return clones


def _gather_clone_loss(clone, num_clones, regularization_losses):
  """Gather the loss for a single clone.

  Args:
    clone: A Clone namedtuple.
    num_clones: The number of clones being deployed.
    regularization_losses: Possibly empty list of regularization_losses
      to add to the clone losses.

  Returns:
    A tensor for the total loss for the clone.  Can be None.
  """
  # The return value.
  sum_loss = None
  # Individual components of the loss that will need summaries.
  clone_loss = None
  regularization_loss = None
  # Compute and aggregate losses on the clone device.
  with tf.device(clone.device):
    all_losses = []
    clone_losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope)
    if clone_losses:
      clone_loss = tf.add_n(clone_losses, name='clone_loss')
      if num_clones > 1:
        clone_loss = tf.div(clone_loss, 1.0 * num_clones,
                            name='scaled_clone_loss')
      all_losses.append(clone_loss)
    if regularization_losses:
      regularization_loss = tf.add_n(regularization_losses,
                                     name='regularization_loss')
      all_losses.append(regularization_loss)
    if all_losses:
      sum_loss = tf.add_n(all_losses)
  # Add the summaries out of the clone device block.
  if clone_loss is not None:
    tf.summary.scalar(clone.scope + '/clone_loss', clone_loss)
  if regularization_loss is not None:
    tf.summary.scalar('regularization_loss', regularization_loss)
  return sum_loss


def _optimize_clone(optimizer, clone, num_clones, regularization_losses,
                    **kwargs):
  """Compute losses and gradients for a single clone.

  Args:
    optimizer: A tf.Optimizer  object.
    clone: A Clone namedtuple.
    num_clones: The number of clones being deployed.
    regularization_losses: Possibly empty list of regularization_losses
      to add to the clone losses.
    **kwargs: Dict of kwarg to pass to compute_gradients().

  Returns:
    A tuple (clone_loss, clone_grads_and_vars).
      - clone_loss: A tensor for the total loss for the clone.  Can be None.
      - clone_grads_and_vars: List of (gradient, variable) for the clone.
        Can be empty.
  """
  sum_loss = _gather_clone_loss(clone, num_clones, regularization_losses)
  clone_grad = None
  if sum_loss is not None:
    with tf.device(clone.device):
      clone_grad = optimizer.compute_gradients(sum_loss, **kwargs)
  return sum_loss, clone_grad


def optimize_clones(clones, optimizer,
                    regularization_losses=None,
                    clip_gradients=-1.0,
                    **kwargs):
  """Compute clone losses and gradients for the given list of `Clones`.

  Note: The regularization_losses are added to the first clone losses.

  Args:
   clones: List of `Clones` created by `create_clones()`.
   optimizer: An `Optimizer` object.
   regularization_losses: Optional list of regularization losses. If None it
     will gather them from tf.GraphKeys.REGULARIZATION_LOSSES. Pass `[]` to
     exclude them.
   **kwargs: Optional list of keyword arguments to pass to `compute_gradients`.

  Returns:
   A tuple (total_loss, grads_and_vars).
     - total_loss: A Tensor containing the average of the clone losses including
       the regularization loss.
     - grads_and_vars: A List of tuples (gradient, variable) containing the sum
       of the gradients for each variable.

  """
  grads_and_vars = []
  clones_losses = []
  num_clones = len(clones)
  if regularization_losses is None:
    regularization_losses = tf.get_collection(
        tf.GraphKeys.REGULARIZATION_LOSSES)
  for clone in clones:
    with tf.name_scope(clone.scope):
      clone_loss, clone_grad = _optimize_clone(
          optimizer, clone, num_clones, regularization_losses, **kwargs)
      if clip_gradients > 0:
        tf.logging.info('Clipping gradient by norm {}'.format(clip_gradients))
        clone_grad = slim.learning.clip_gradient_norms(
          clone_grad, clip_gradients)
      if clone_loss is not None:
        clones_losses.append(clone_loss)
        grads_and_vars.append(clone_grad)
      # Only use regularization_losses for the first clone
      regularization_losses = None
  # Compute the total_loss summing all the clones_losses.
  total_loss = tf.add_n(clones_losses, name='total_loss')
  # Sum the gradients accross clones.
  grads_and_vars = _sum_clones_gradients(grads_and_vars)
  return total_loss, grads_and_vars


def deploy(config,
           model_fn,
           args=None,
           kwargs=None,
           optimizer=None,
           summarize_gradients=False):
  """Deploys a Slim-constructed model across multiple clones.

  The deployment options are specified by the config object and support
  deploying one or several clones on different GPUs and one or several replicas
  of such clones.

  The argument `model_fn` is called `config.num_clones` times to create the
  model clones as `model_fn(*args, **kwargs)`.

  The optional argument `optimizer` is an `Optimizer` object.  If not `None`,
  the deployed model is configured for training with that optimizer.

  If `config` specifies deployment on multiple replicas then the default
  tensorflow device is set appropriatly for each call to `model_fn` and for the
  slim variable creation functions: model and global variables will be created
  on the `ps` device, the clone operations will be on the `worker` device.

  Args:
    config: A `DeploymentConfig` object.
    model_fn: A callable. Called as `model_fn(*args, **kwargs)`
    args: Optional list of arguments to pass to `model_fn`.
    kwargs: Optional list of keyword arguments to pass to `model_fn`.
    optimizer: Optional `Optimizer` object.  If passed the model is deployed
      for training with that optimizer.
    summarize_gradients: Whether or not add summaries to the gradients.

  Returns:
    A `DeployedModel` namedtuple.

  """
  # Gather initial summaries.
  summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

  # Create Clones.
  clones = create_clones(config, model_fn, args, kwargs)
  first_clone = clones[0]

  # Gather update_ops from the first clone. These contain, for example,
  # the updates for the batch_norm variables created by model_fn.
  update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone.scope)

  train_op = None
  total_loss = None
  with tf.device(config.optimizer_device()):
    if optimizer:
      # Place the global step on the device storing the variables.
      with tf.device(config.variables_device()):
        global_step = slim.get_or_create_global_step()

      # Compute the gradients for the clones.
      total_loss, clones_gradients = optimize_clones(clones, optimizer)

      if clones_gradients:
        if summarize_gradients:
          # Add summaries to the gradients.
          summaries |= set(_add_gradients_summaries(clones_gradients))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        train_op = control_flow_ops.with_dependencies([update_op], total_loss,
                                                      name='train_op')
    else:
      clones_losses = []
      regularization_losses = tf.get_collection(
          tf.GraphKeys.REGULARIZATION_LOSSES)
      for clone in clones:
        with tf.name_scope(clone.scope):
          clone_loss = _gather_clone_loss(clone, len(clones),
                                          regularization_losses)
          if clone_loss is not None:
            clones_losses.append(clone_loss)
          # Only use regularization_losses for the first clone
          regularization_losses = None
      if clones_losses:
        total_loss = tf.add_n(clones_losses, name='total_loss')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone.scope))

    if total_loss is not None:
      # Add total_loss to summary.
      summaries.add(tf.summary.scalar('total_loss', total_loss))

    if summaries:
      # Merge all summaries together.
      summary_op = tf.merge_summary(list(summaries), name='summary_op')
    else:
      summary_op = None

  return DeployedModel(train_op, summary_op, total_loss, clones)


def _sum_clones_gradients(clone_grads):
  """Calculate the sum gradient for each shared variable across all clones.

  This function assumes that the clone_grads has been scaled appropriately by
  1 / num_clones.

  Args:
    clone_grads: A List of List of tuples (gradient, variable), one list per
    `Clone`.

  Returns:
     List of tuples of (gradient, variable) where the gradient has been summed
     across all clones.
  """
  sum_grads = []
  for grad_and_vars in zip(*clone_grads):
    # Note that each grad_and_vars looks like the following:
    #   ((grad_var0_clone0, var0), ... (grad_varN_cloneN, varN))
    grads = []
    var = grad_and_vars[0][1]
    for g, v in grad_and_vars:
      assert v == var
      if g is not None:
        grads.append(g)
    if grads:
      if len(grads) > 1:
        sum_grad = tf.add_n(grads, name=var.op.name + '/sum_grads')
      else:
        sum_grad = grads[0]
      sum_grads.append((sum_grad, var))
  return sum_grads


def _add_gradients_summaries(grads_and_vars):
  """Add histogram summaries to gradients.

  Note: The summaries are also added to the SUMMARIES collection.

  Args:
    grads_and_vars: A list of gradient to variable pairs (tuples).

  Returns:
    The _list_ of the added summaries for grads_and_vars.
  """
  summaries = []
  for grad, var in grads_and_vars:
    if grad is not None:
      if isinstance(grad, tf.IndexedSlices):
        grad_values = grad.values
      else:
        grad_values = grad
      summaries.append(tf.histogram_summary(var.op.name + ':gradient',
                                            grad_values))
      summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm',
                                            tf.global_norm([grad_values])))
    else:
      tf.logging.info('Var %s has no gradient', var.op.name)
  return summaries


class DeploymentConfig(object):
  """Configuration for deploying a model with `deploy()`.

  You can pass an instance of this class to `deploy()` to specify exactly
  how to deploy the model to build.  If you do not pass one, an instance built
  from the default deployment_hparams will be used.
  """

  def __init__(self,
               num_clones=1,
               clone_on_cpu=False,
               replica_id=0,
               num_replicas=1,
               num_ps_tasks=0,
               worker_job_name='worker',
               ps_job_name='ps'):
    """Create a DeploymentConfig.

    The config describes how to deploy a model across multiple clones and
    replicas.  The model will be replicated `num_clones` times in each replica.
    If `clone_on_cpu` is True, each clone will placed on CPU.

    If `num_replicas` is 1, the model is deployed via a single process.  In that
    case `worker_device`, `num_ps_tasks`, and `ps_device` are ignored.

    If `num_replicas` is greater than 1, then `worker_device` and `ps_device`
    must specify TensorFlow devices for the `worker` and `ps` jobs and
    `num_ps_tasks` must be positive.

    Args:
      num_clones: Number of model clones to deploy in each replica.
      clone_on_cpu: If True clones would be placed on CPU.
      replica_id: Integer.  Index of the replica for which the model is
        deployed.  Usually 0 for the chief replica.
      num_replicas: Number of replicas to use.
      num_ps_tasks: Number of tasks for the `ps` job. 0 to not use replicas.
      worker_job_name: A name for the worker job.
      ps_job_name: A name for the parameter server job.

    Raises:
      ValueError: If the arguments are invalid.
    """
    if num_replicas > 1:
      if num_ps_tasks < 1:
        raise ValueError('When using replicas num_ps_tasks must be positive')
    if num_replicas > 1 or num_ps_tasks > 0:
      if not worker_job_name:
        raise ValueError('Must specify worker_job_name when using replicas')
      if not ps_job_name:
        raise ValueError('Must specify ps_job_name when using parameter server')
    if replica_id >= num_replicas:
      raise ValueError('replica_id must be less than num_replicas')
    self._num_clones = num_clones
    self._clone_on_cpu = clone_on_cpu
    self._replica_id = replica_id
    self._num_replicas = num_replicas
    self._num_ps_tasks = num_ps_tasks
    self._ps_device = '/job:' + ps_job_name if num_ps_tasks > 0 else ''
    self._worker_device = '/job:' + worker_job_name if num_ps_tasks > 0 else ''

  @property
  def num_clones(self):
    return self._num_clones

  @property
  def clone_on_cpu(self):
    return self._clone_on_cpu

  @property
  def replica_id(self):
    return self._replica_id

  @property
  def num_replicas(self):
    return self._num_replicas

  @property
  def num_ps_tasks(self):
    return self._num_ps_tasks

  @property
  def ps_device(self):
    return self._ps_device

  @property
  def worker_device(self):
    return self._worker_device

  def caching_device(self):
    """Returns the device to use for caching variables.

    Variables are cached on the worker CPU when using replicas.

    Returns:
      A device string or None if the variables do not need to be cached.
    """
    if self._num_ps_tasks > 0:
      return lambda op: op.device
    else:
      return None

  def clone_device(self, clone_index):
    """Device used to create the clone and all the ops inside the clone.

    Args:
      clone_index: Int, representing the clone_index.

    Returns:
      A value suitable for `tf.device()`.

    Raises:
      ValueError: if `clone_index` is greater or equal to the number of clones".
    """
    if clone_index >= self._num_clones:
      raise ValueError('clone_index must be less than num_clones')
    device = ''
    if self._num_ps_tasks > 0:
      device += self._worker_device
    if self._clone_on_cpu:
      device += '/cpu:0'
    else:
      if self._num_clones > 1:
        device += '/gpu:%d' % clone_index
    return device

  def clone_scope(self, clone_index):
    """Name scope to create the clone.

    Args:
      clone_index: Int, representing the clone_index.

    Returns:
      A name_scope suitable for `tf.name_scope()`.

    Raises:
      ValueError: if `clone_index` is greater or equal to the number of clones".
    """
    if clone_index >= self._num_clones:
      raise ValueError('clone_index must be less than num_clones')
    scope = ''
    if self._num_clones > 1:
      scope = 'clone_%d' % clone_index
    return scope

  def optimizer_device(self):
    """Device to use with the optimizer.

    Returns:
      A value suitable for `tf.device()`.
    """
    if self._num_ps_tasks > 0 or self._num_clones > 0:
      return self._worker_device + '/cpu:0'
    else:
      return ''

  def inputs_device(self):
    """Device to use to build the inputs.

    Returns:
      A value suitable for `tf.device()`.
    """
    device = ''
    if self._num_ps_tasks > 0:
      device += self._worker_device
    device += '/cpu:0'
    return device

  def variables_device(self):
    """Returns the device to use for variables created inside the clone.

    Returns:
      A value suitable for `tf.device()`.
    """
    device = ''
    if self._num_ps_tasks > 0:
      device += self._ps_device
    device += '/cpu:0'

    class _PSDeviceChooser(object):
      """Slim device chooser for variables when using PS."""

      def __init__(self, device, tasks):
        self._device = device
        self._tasks = tasks
        self._task = 0

      def choose(self, op):
        if op.device:
          return op.device
        node_def = op if isinstance(op, tf.NodeDef) else op.node_def
        if node_def.op == 'Variable':
          t = self._task
          self._task = (self._task + 1) % self._tasks
          d = '%s/task:%d' % (self._device, t)
          return d
        else:
          return op.device

    if not self._num_ps_tasks:
      return device
    else:
      chooser = _PSDeviceChooser(device, self._num_ps_tasks)
      return chooser.choose


================================================
FILE: models/slim/deployment/model_deploy_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for model_deploy."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from deployment import model_deploy

slim = tf.contrib.slim


class DeploymentConfigTest(tf.test.TestCase):

  def testDefaults(self):
    deploy_config = model_deploy.DeploymentConfig()

    self.assertEqual(slim.get_variables(), [])
    self.assertEqual(deploy_config.caching_device(), None)
    self.assertDeviceEqual(deploy_config.clone_device(0), '')
    self.assertEqual(deploy_config.clone_scope(0), '')
    self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
    self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
    self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')

  def testCPUonly(self):
    deploy_config = model_deploy.DeploymentConfig(clone_on_cpu=True)

    self.assertEqual(deploy_config.caching_device(), None)
    self.assertDeviceEqual(deploy_config.clone_device(0), 'CPU:0')
    self.assertEqual(deploy_config.clone_scope(0), '')
    self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
    self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
    self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')

  def testMultiGPU(self):
    deploy_config = model_deploy.DeploymentConfig(num_clones=2)

    self.assertEqual(deploy_config.caching_device(), None)
    self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0')
    self.assertDeviceEqual(deploy_config.clone_device(1), 'GPU:1')
    self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
    self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
    self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0')
    self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0')
    self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')

  def testPS(self):
    deploy_config = model_deploy.DeploymentConfig(num_clones=1, num_ps_tasks=1)

    self.assertDeviceEqual(deploy_config.clone_device(0),
                           '/job:worker')
    self.assertEqual(deploy_config.clone_scope(0), '')
    self.assertDeviceEqual(deploy_config.optimizer_device(),
                           '/job:worker/device:CPU:0')
    self.assertDeviceEqual(deploy_config.inputs_device(),
                           '/job:worker/device:CPU:0')
    with tf.device(deploy_config.variables_device()):
      a = tf.Variable(0)
      b = tf.Variable(0)
      c = tf.no_op()
      d = slim.variable('a', [],
                        caching_device=deploy_config.caching_device())
    self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
    self.assertDeviceEqual(a.device, a.value().device)
    self.assertDeviceEqual(b.device, '/job:ps/task:0/device:CPU:0')
    self.assertDeviceEqual(b.device, b.value().device)
    self.assertDeviceEqual(c.device, '')
    self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
    self.assertDeviceEqual(d.value().device, '')

  def testMultiGPUPS(self):
    deploy_config = model_deploy.DeploymentConfig(num_clones=2, num_ps_tasks=1)

    self.assertEqual(deploy_config.caching_device()(tf.no_op()), '')
    self.assertDeviceEqual(deploy_config.clone_device(0),
                           '/job:worker/device:GPU:0')
    self.assertDeviceEqual(deploy_config.clone_device(1),
                           '/job:worker/device:GPU:1')
    self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
    self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
    self.assertDeviceEqual(deploy_config.optimizer_device(),
                           '/job:worker/device:CPU:0')
    self.assertDeviceEqual(deploy_config.inputs_device(),
                           '/job:worker/device:CPU:0')

  def testReplicasPS(self):
    deploy_config = model_deploy.DeploymentConfig(num_replicas=2,
                                                  num_ps_tasks=2)

    self.assertDeviceEqual(deploy_config.clone_device(0),
                           '/job:worker')
    self.assertEqual(deploy_config.clone_scope(0), '')
    self.assertDeviceEqual(deploy_config.optimizer_device(),
                           '/job:worker/device:CPU:0')
    self.assertDeviceEqual(deploy_config.inputs_device(),
                           '/job:worker/device:CPU:0')

  def testReplicasMultiGPUPS(self):
    deploy_config = model_deploy.DeploymentConfig(num_replicas=2,
                                                  num_clones=2,
                                                  num_ps_tasks=2)
    self.assertDeviceEqual(deploy_config.clone_device(0),
                           '/job:worker/device:GPU:0')
    self.assertDeviceEqual(deploy_config.clone_device(1),
                           '/job:worker/device:GPU:1')
    self.assertEqual(deploy_config.clone_scope(0), 'clone_0')
    self.assertEqual(deploy_config.clone_scope(1), 'clone_1')
    self.assertDeviceEqual(deploy_config.optimizer_device(),
                           '/job:worker/device:CPU:0')
    self.assertDeviceEqual(deploy_config.inputs_device(),
                           '/job:worker/device:CPU:0')

  def testVariablesPS(self):
    deploy_config = model_deploy.DeploymentConfig(num_ps_tasks=2)

    with tf.device(deploy_config.variables_device()):
      a = tf.Variable(0)
      b = tf.Variable(0)
      c = tf.no_op()
      d = slim.variable('a', [],
                        caching_device=deploy_config.caching_device())

    self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0')
    self.assertDeviceEqual(a.device, a.value().device)
    self.assertDeviceEqual(b.device, '/job:ps/task:1/device:CPU:0')
    self.assertDeviceEqual(b.device, b.value().device)
    self.assertDeviceEqual(c.device, '')
    self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0')
    self.assertDeviceEqual(d.value().device, '')


def LogisticClassifier(inputs, labels, scope=None, reuse=None):
  with tf.variable_scope(scope, 'LogisticClassifier', [inputs, labels],
                         reuse=reuse):
    predictions = slim.fully_connected(inputs, 1, activation_fn=tf.sigmoid,
                                       scope='fully_connected')
    slim.losses.log_loss(predictions, labels)
    return predictions


def BatchNormClassifier(inputs, labels, scope=None, reuse=None):
  with tf.variable_scope(scope, 'BatchNormClassifier', [inputs, labels],
                         reuse=reuse):
    inputs = slim.batch_norm(inputs, decay=0.1)
    predictions = slim.fully_connected(inputs, 1,
                                       activation_fn=tf.sigmoid,
                                       scope='fully_connected')
    slim.losses.log_loss(predictions, labels)
    return predictions


class CreatecloneTest(tf.test.TestCase):

  def setUp(self):
    # Create an easy training set:
    np.random.seed(0)

    self._inputs = np.zeros((16, 4))
    self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)
    self._logdir = self.get_temp_dir()

    for i in range(16):
      j = int(2 * self._labels[i] + np.random.randint(0, 2))
      self._inputs[i, j] = 1

  def testCreateLogisticClassifier(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = LogisticClassifier
      clone_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=1)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      clone = clones[0]
      self.assertEqual(len(slim.get_variables()), 2)
      for v in slim.get_variables():
        self.assertDeviceEqual(v.device, 'CPU:0')
        self.assertDeviceEqual(v.value().device, 'CPU:0')
      self.assertEqual(clone.outputs.op.name,
                       'LogisticClassifier/fully_connected/Sigmoid')
      self.assertEqual(clone.scope, '')
      self.assertDeviceEqual(clone.device, '')
      self.assertEqual(len(slim.losses.get_losses()), 1)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(update_ops, [])

  def testCreateSingleclone(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      clone_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=1)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      clone = clones[0]
      self.assertEqual(len(slim.get_variables()), 5)
      for v in slim.get_variables():
        self.assertDeviceEqual(v.device, 'CPU:0')
        self.assertDeviceEqual(v.value().device, 'CPU:0')
      self.assertEqual(clone.outputs.op.name,
                       'BatchNormClassifier/fully_connected/Sigmoid')
      self.assertEqual(clone.scope, '')
      self.assertDeviceEqual(clone.device, '')
      self.assertEqual(len(slim.losses.get_losses()), 1)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(len(update_ops), 2)

  def testCreateMulticlone(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      clone_args = (tf_inputs, tf_labels)
      num_clones = 4
      deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      self.assertEqual(len(slim.get_variables()), 5)
      for v in slim.get_variables():
        self.assertDeviceEqual(v.device, 'CPU:0')
        self.assertDeviceEqual(v.value().device, 'CPU:0')
      self.assertEqual(len(clones), num_clones)
      for i, clone in enumerate(clones):
        self.assertEqual(
            clone.outputs.op.name,
            'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope)
        self.assertEqual(len(update_ops), 2)
        self.assertEqual(clone.scope, 'clone_%d/' % i)
        self.assertDeviceEqual(clone.device, 'GPU:%d' % i)

  def testCreateOnecloneWithPS(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      clone_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                    num_ps_tasks=1)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      self.assertEqual(len(clones), 1)
      clone = clones[0]
      self.assertEqual(clone.outputs.op.name,
                       'BatchNormClassifier/fully_connected/Sigmoid')
      self.assertDeviceEqual(clone.device, '/job:worker')
      self.assertEqual(clone.scope, '')
      self.assertEqual(len(slim.get_variables()), 5)
      for v in slim.get_variables():
        self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
        self.assertDeviceEqual(v.device, v.value().device)

  def testCreateMulticloneWithPS(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      clone_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=2,
                                                    num_ps_tasks=2)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      self.assertEqual(len(slim.get_variables()), 5)
      for i, v in enumerate(slim.get_variables()):
        t = i % 2
        self.assertDeviceEqual(v.device, '/job:ps/task:%d/device:CPU:0' % t)
        self.assertDeviceEqual(v.device, v.value().device)
      self.assertEqual(len(clones), 2)
      for i, clone in enumerate(clones):
        self.assertEqual(
            clone.outputs.op.name,
            'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i)
        self.assertEqual(clone.scope, 'clone_%d/' % i)
        self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:%d' % i)


class OptimizeclonesTest(tf.test.TestCase):

  def setUp(self):
    # Create an easy training set:
    np.random.seed(0)

    self._inputs = np.zeros((16, 4))
    self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)
    self._logdir = self.get_temp_dir()

    for i in range(16):
      j = int(2 * self._labels[i] + np.random.randint(0, 2))
      self._inputs[i, j] = 1

  def testCreateLogisticClassifier(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = LogisticClassifier
      clone_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=1)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      self.assertEqual(len(slim.get_variables()), 2)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(update_ops, [])

      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
      total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
                                                                optimizer)
      self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
      self.assertEqual(total_loss.op.name, 'total_loss')
      for g, v in grads_and_vars:
        self.assertDeviceEqual(g.device, '')
        self.assertDeviceEqual(v.device, 'CPU:0')

  def testCreateSingleclone(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      clone_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=1)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      self.assertEqual(len(slim.get_variables()), 5)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(len(update_ops), 2)

      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
      total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
                                                                optimizer)
      self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
      self.assertEqual(total_loss.op.name, 'total_loss')
      for g, v in grads_and_vars:
        self.assertDeviceEqual(g.device, '')
        self.assertDeviceEqual(v.device, 'CPU:0')

  def testCreateMulticlone(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      clone_args = (tf_inputs, tf_labels)
      num_clones = 4
      deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, clone_args)
      self.assertEqual(len(slim.get_variables()), 5)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(len(update_ops), num_clones * 2)

      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
      total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
                                                                optimizer)
      self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
      self.assertEqual(total_loss.op.name, 'total_loss')
      for g, v in grads_and_vars:
        self.assertDeviceEqual(g.device, '')
        self.assertDeviceEqual(v.device, 'CPU:0')

  def testCreateMulticloneCPU(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      model_args = (tf_inputs, tf_labels)
      num_clones = 4
      deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones,
                                                    clone_on_cpu=True)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, model_args)
      self.assertEqual(len(slim.get_variables()), 5)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(len(update_ops), num_clones * 2)

      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
      total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
                                                                optimizer)
      self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
      self.assertEqual(total_loss.op.name, 'total_loss')
      for g, v in grads_and_vars:
        self.assertDeviceEqual(g.device, '')
        self.assertDeviceEqual(v.device, 'CPU:0')

  def testCreateOnecloneWithPS(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      model_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=1,
                                                    num_ps_tasks=1)

      self.assertEqual(slim.get_variables(), [])
      clones = model_deploy.create_clones(deploy_config, model_fn, model_args)
      self.assertEqual(len(slim.get_variables()), 5)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(len(update_ops), 2)

      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)
      total_loss, grads_and_vars = model_deploy.optimize_clones(clones,
                                                                optimizer)
      self.assertEqual(len(grads_and_vars), len(tf.trainable_variables()))
      self.assertEqual(total_loss.op.name, 'total_loss')
      for g, v in grads_and_vars:
        self.assertDeviceEqual(g.device, '/job:worker')
        self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')


class DeployTest(tf.test.TestCase):

  def setUp(self):
    # Create an easy training set:
    np.random.seed(0)

    self._inputs = np.zeros((16, 4))
    self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32)
    self._logdir = self.get_temp_dir()

    for i in range(16):
      j = int(2 * self._labels[i] + np.random.randint(0, 2))
      self._inputs[i, j] = 1

  def testLocalTrainOp(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      model_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=2,
                                                    clone_on_cpu=True)

      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)

      self.assertEqual(slim.get_variables(), [])
      model = model_deploy.deploy(deploy_config, model_fn, model_args,
                                  optimizer=optimizer)

      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(len(update_ops), 4)
      self.assertEqual(len(model.clones), 2)
      self.assertEqual(model.total_loss.op.name, 'total_loss')
      self.assertEqual(model.summary_op.op.name, 'summary_op/summary_op')
      self.assertEqual(model.train_op.op.name, 'train_op')

      with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        moving_mean = tf.contrib.framework.get_variables_by_name(
            'moving_mean')[0]
        moving_variance = tf.contrib.framework.get_variables_by_name(
            'moving_variance')[0]
        initial_loss = sess.run(model.total_loss)
        initial_mean, initial_variance = sess.run([moving_mean,
                                                   moving_variance])
        self.assertAllClose(initial_mean, [0.0, 0.0, 0.0, 0.0])
        self.assertAllClose(initial_variance, [1.0, 1.0, 1.0, 1.0])
        for _ in range(10):
          sess.run(model.train_op)
        final_loss = sess.run(model.total_loss)
        self.assertLess(final_loss, initial_loss / 10.0)

        final_mean, final_variance = sess.run([moving_mean,
                                               moving_variance])
        self.assertAllClose(final_mean, [0.125, 0.25, 0.375, 0.25])
        self.assertAllClose(final_variance, [0.109375, 0.1875,
                                             0.234375, 0.1875])

  def testNoSummariesOnGPU(self):
    with tf.Graph().as_default():
      deploy_config = model_deploy.DeploymentConfig(num_clones=2)

      # clone function creates a fully_connected layer with a regularizer loss.
      def ModelFn():
        inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
        reg = tf.contrib.layers.l2_regularizer(0.001)
        tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg)

      model = model_deploy.deploy(
          deploy_config, ModelFn,
          optimizer=tf.train.GradientDescentOptimizer(1.0))
      # The model summary op should have a few summary inputs and all of them
      # should be on the CPU.
      self.assertTrue(model.summary_op.op.inputs)
      for inp in  model.summary_op.op.inputs:
        self.assertEqual('/device:CPU:0', inp.device)

  def testNoSummariesOnGPUForEvals(self):
    with tf.Graph().as_default():
      deploy_config = model_deploy.DeploymentConfig(num_clones=2)

      # clone function creates a fully_connected layer with a regularizer loss.
      def ModelFn():
        inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
        reg = tf.contrib.layers.l2_regularizer(0.001)
        tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg)

      # No optimizer here, it's an eval.
      model = model_deploy.deploy(deploy_config, ModelFn)
      # The model summary op should have a few summary inputs and all of them
      # should be on the CPU.
      self.assertTrue(model.summary_op.op.inputs)
      for inp in  model.summary_op.op.inputs:
        self.assertEqual('/device:CPU:0', inp.device)


if __name__ == '__main__':
  tf.test.main()


================================================
FILE: models/slim/nets/__init__.py
================================================


================================================
FILE: models/slim/nets/alexnet.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains a model definition for AlexNet.

This work was first described in:
  ImageNet Classification with Deep Convolutional Neural Networks
  Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton

and later refined in:
  One weird trick for parallelizing convolutional neural networks
  Alex Krizhevsky, 2014

Here we provide the implementation proposed in "One weird trick" and not
"ImageNet Classification", as per the paper, the LRN layers have been removed.

Usage:
  with slim.arg_scope(alexnet.alexnet_v2_arg_scope()):
    outputs, end_points = alexnet.alexnet_v2(inputs)

@@alexnet_v2
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)


def alexnet_v2_arg_scope(weight_decay=0.0005):
  with slim.arg_scope([slim.conv2d, slim.fully_connected],
                      activation_fn=tf.nn.relu,
                      biases_initializer=tf.constant_initializer(0.1),
                      weights_regularizer=slim.l2_regularizer(weight_decay)):
    with slim.arg_scope([slim.conv2d], padding='SAME'):
      with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
        return arg_sc


def alexnet_v2(inputs,
               num_classes=1000,
               is_training=True,
               dropout_keep_prob=0.5,
               spatial_squeeze=True,
               scope='alexnet_v2'):
  """AlexNet version 2.

  Described in: http://arxiv.org/pdf/1404.5997v2.pdf
  Parameters from:
  github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
  layers-imagenet-1gpu.cfg

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224. To use in fully
        convolutional mode, set spatial_squeeze to false.
        The LRN layers have been removed and change the initializers from
        random_normal_initializer to xavier_initializer.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
  with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
    end_points_collection = sc.name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=[end_points_collection]):
      net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
                        scope='conv1')
      net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
      net = slim.conv2d(net, 192, [5, 5], scope='conv2')
      net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
      net = slim.conv2d(net, 384, [3, 3], scope='conv3')
      net = slim.conv2d(net, 384, [3, 3], scope='conv4')
      net = slim.conv2d(net, 256, [3, 3], scope='conv5')
      net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')

      # Use conv2d instead of fully_connected layers.
      with slim.arg_scope([slim.conv2d],
                          weights_initializer=trunc_normal(0.005),
                          biases_initializer=tf.constant_initializer(0.1)):
        net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
                          scope='fc6')
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                           scope='dropout6')
        net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                           scope='dropout7')
        net = slim.conv2d(net, num_classes, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          biases_initializer=tf.zeros_initializer,
                          scope='fc8')

      # Convert end_points_collection into a end_point dict.
      end_points = slim.utils.convert_collection_to_dict(end_points_collection)
      if spatial_squeeze:
        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
        end_points[sc.name + '/fc8'] = net
      return net, end_points
alexnet_v2.default_image_size = 224


================================================
FILE: models/slim/nets/alexnet_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.nets.alexnet."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from nets import alexnet

slim = tf.contrib.slim


class AlexnetV2Test(tf.test.TestCase):

  def testBuild(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = alexnet.alexnet_v2(inputs, num_classes)
      self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed')
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])

  def testFullyConvolutional(self):
    batch_size = 1
    height, width = 300, 400
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False)
      self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd')
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, 4, 7, num_classes])

  def testEndPoints(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      _, end_points = alexnet.alexnet_v2(inputs, num_classes)
      expected_names = ['alexnet_v2/conv1',
                        'alexnet_v2/pool1',
                        'alexnet_v2/conv2',
                        'alexnet_v2/pool2',
                        'alexnet_v2/conv3',
                        'alexnet_v2/conv4',
                        'alexnet_v2/conv5',
                        'alexnet_v2/pool5',
                        'alexnet_v2/fc6',
                        'alexnet_v2/fc7',
                        'alexnet_v2/fc8'
                       ]
      self.assertSetEqual(set(end_points.keys()), set(expected_names))

  def testModelVariables(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      alexnet.alexnet_v2(inputs, num_classes)
      expected_names = ['alexnet_v2/conv1/weights',
                        'alexnet_v2/conv1/biases',
                        'alexnet_v2/conv2/weights',
                        'alexnet_v2/conv2/biases',
                        'alexnet_v2/conv3/weights',
                        'alexnet_v2/conv3/biases',
                        'alexnet_v2/conv4/weights',
                        'alexnet_v2/conv4/biases',
                        'alexnet_v2/conv5/weights',
                        'alexnet_v2/conv5/biases',
                        'alexnet_v2/fc6/weights',
                        'alexnet_v2/fc6/biases',
                        'alexnet_v2/fc7/weights',
                        'alexnet_v2/fc7/biases',
                        'alexnet_v2/fc8/weights',
                        'alexnet_v2/fc8/biases',
                       ]
      model_variables = [v.op.name for v in slim.get_model_variables()]
      self.assertSetEqual(set(model_variables), set(expected_names))

  def testEvaluation(self):
    batch_size = 2
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      eval_inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False)
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])
      predictions = tf.argmax(logits, 1)
      self.assertListEqual(predictions.get_shape().as_list(), [batch_size])

  def testTrainEvalWithReuse(self):
    train_batch_size = 2
    eval_batch_size = 1
    train_height, train_width = 224, 224
    eval_height, eval_width = 300, 400
    num_classes = 1000
    with self.test_session():
      train_inputs = tf.random_uniform(
          (train_batch_size, train_height, train_width, 3))
      logits, _ = alexnet.alexnet_v2(train_inputs)
      self.assertListEqual(logits.get_shape().as_list(),
                           [train_batch_size, num_classes])
      tf.get_variable_scope().reuse_variables()
      eval_inputs = tf.random_uniform(
          (eval_batch_size, eval_height, eval_width, 3))
      logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False,
                                     spatial_squeeze=False)
      self.assertListEqual(logits.get_shape().as_list(),
                           [eval_batch_size, 4, 7, num_classes])
      logits = tf.reduce_mean(logits, [1, 2])
      predictions = tf.argmax(logits, 1)
      self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])

  def testForward(self):
    batch_size = 1
    height, width = 224, 224
    with self.test_session() as sess:
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = alexnet.alexnet_v2(inputs)
      sess.run(tf.global_variables_initializer())
      output = sess.run(logits)
      self.assertTrue(output.any())

if __name__ == '__main__':
  tf.test.main()


================================================
FILE: models/slim/nets/cifarnet.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains a variant of the CIFAR-10 model definition."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim

trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev)


def cifarnet(images, num_classes=10, is_training=False,
             dropout_keep_prob=0.5,
             prediction_fn=slim.softmax,
             scope='CifarNet'):
  """Creates a variant of the CifarNet model.

  Note that since the output is a set of 'logits', the values fall in the
  interval of (-infinity, infinity). Consequently, to convert the outputs to a
  probability distribution over the characters, one will need to convert them
  using the softmax function:

        logits = cifarnet.cifarnet(images, is_training=False)
        probabilities = tf.nn.softmax(logits)
        predictions = tf.argmax(logits, 1)

  Args:
    images: A batch of `Tensors` of size [batch_size, height, width, channels].
    num_classes: the number of classes in the dataset.
    is_training: specifies whether or not we're currently training the model.
      This variable will determine the behaviour of the dropout layer.
    dropout_keep_prob: the percentage of activation values that are retained.
    prediction_fn: a function to get predictions out of logits.
    scope: Optional variable_scope.

  Returns:
    logits: the pre-softmax activations, a tensor of size
      [batch_size, `num_classes`]
    end_points: a dictionary from components of the network to the corresponding
      activation.
  """
  end_points = {}

  with tf.variable_scope(scope, 'CifarNet', [images, num_classes]):
    net = slim.conv2d(images, 64, [5, 5], scope='conv1')
    end_points['conv1'] = net
    net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
    end_points['pool1'] = net
    net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1')
    net = slim.conv2d(net, 64, [5, 5], scope='conv2')
    end_points['conv2'] = net
    net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2')
    net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
    end_points['pool2'] = net
    net = slim.flatten(net)
    end_points['Flatten'] = net
    net = slim.fully_connected(net, 384, scope='fc3')
    end_points['fc3'] = net
    net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                       scope='dropout3')
    net = slim.fully_connected(net, 192, scope='fc4')
    end_points['fc4'] = net
    logits = slim.fully_connected(net, num_classes,
                                  biases_initializer=tf.zeros_initializer,
                                  weights_initializer=trunc_normal(1/192.0),
                                  weights_regularizer=None,
                                  activation_fn=None,
                                  scope='logits')

    end_points['Logits'] = logits
    end_points['Predictions'] = prediction_fn(logits, scope='Predictions')

  return logits, end_points
cifarnet.default_image_size = 32


def cifarnet_arg_scope(weight_decay=0.004):
  """Defines the default cifarnet argument scope.

  Args:
    weight_decay: The weight decay to use for regularizing the model.

  Returns:
    An `arg_scope` to use for the inception v3 model.
  """
  with slim.arg_scope(
      [slim.conv2d],
      weights_initializer=tf.truncated_normal_initializer(stddev=5e-2),
      activation_fn=tf.nn.relu):
    with slim.arg_scope(
        [slim.fully_connected],
        biases_initializer=tf.constant_initializer(0.1),
        weights_initializer=trunc_normal(0.04),
        weights_regularizer=slim.l2_regularizer(weight_decay),
        activation_fn=tf.nn.relu) as sc:
      return sc


================================================
FILE: models/slim/nets/inception.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Brings all inception models under one namespace."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# pylint: disable=unused-import
from nets.inception_resnet_v2 import inception_resnet_v2
from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope
from nets.inception_v1 import inception_v1
from nets.inception_v1 import inception_v1_arg_scope
from nets.inception_v1 import inception_v1_base
from nets.inception_v2 import inception_v2
from nets.inception_v2 import inception_v2_arg_scope
from nets.inception_v2 import inception_v2_base
from nets.inception_v2_tsn import inception_v2_tsn
from nets.inception_v2_tsn import inception_v2_tsn_arg_scope
from nets.inception_v2_tsn import inception_v2_tsn_base
from nets.inception_v3 import inception_v3
from nets.inception_v3 import inception_v3_arg_scope
from nets.inception_v3 import inception_v3_base
from nets.inception_v4 import inception_v4
from nets.inception_v4 import inception_v4_arg_scope
from nets.inception_v4 import inception_v4_base
# pylint: enable=unused-import


================================================
FILE: models/slim/nets/inception_resnet_v2.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition of the Inception Resnet V2 architecture.

As described in http://arxiv.org/abs/1602.07261.

  Inception-v4, Inception-ResNet and the Impact of Residual Connections
    on Learning
  Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import tensorflow as tf

slim = tf.contrib.slim


def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
  """Builds the 35x35 resnet block."""
  with tf.variable_scope(scope, 'Block35', [net], reuse=reuse):
    with tf.variable_scope('Branch_0'):
      tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1')
    with tf.variable_scope('Branch_1'):
      tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
      tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3')
    with tf.variable_scope('Branch_2'):
      tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1')
      tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3')
      tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3')
    mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2])
    up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
                     activation_fn=None, scope='Conv2d_1x1')
    net += scale * up
    if activation_fn:
      net = activation_fn(net)
  return net


def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
  """Builds the 17x17 resnet block."""
  with tf.variable_scope(scope, 'Block17', [net], reuse=reuse):
    with tf.variable_scope('Branch_0'):
      tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
    with tf.variable_scope('Branch_1'):
      tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1')
      tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7],
                                  scope='Conv2d_0b_1x7')
      tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1],
                                  scope='Conv2d_0c_7x1')
    mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2])
    up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
                     activation_fn=None, scope='Conv2d_1x1')
    net += scale * up
    if activation_fn:
      net = activation_fn(net)
  return net


def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None):
  """Builds the 8x8 resnet block."""
  with tf.variable_scope(scope, 'Block8', [net], reuse=reuse):
    with tf.variable_scope('Branch_0'):
      tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1')
    with tf.variable_scope('Branch_1'):
      tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1')
      tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3],
                                  scope='Conv2d_0b_1x3')
      tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1],
                                  scope='Conv2d_0c_3x1')
    mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2])
    up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None,
                     activation_fn=None, scope='Conv2d_1x1')
    net += scale * up
    if activation_fn:
      net = activation_fn(net)
  return net


def inception_resnet_v2(inputs, num_classes=1001, is_training=True,
                        dropout_keep_prob=0.8,
                        reuse=None,
                        scope='InceptionResnetV2'):
  """Creates the Inception Resnet V2 model.

  Args:
    inputs: a 4-D tensor of size [batch_size, height, width, 3].
    num_classes: number of predicted classes.
    is_training: whether is training or not.
    dropout_keep_prob: float, the fraction to keep before final layer.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    logits: the logits outputs of the model.
    end_points: the set of end_points from the inception model.
  """
  end_points = {}

  with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse):
    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
      with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                          stride=1, padding='SAME'):

        # 149 x 149 x 32
        net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID',
                          scope='Conv2d_1a_3x3')
        end_points['Conv2d_1a_3x3'] = net
        # 147 x 147 x 32
        net = slim.conv2d(net, 32, 3, padding='VALID',
                          scope='Conv2d_2a_3x3')
        end_points['Conv2d_2a_3x3'] = net
        # 147 x 147 x 64
        net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
        end_points['Conv2d_2b_3x3'] = net
        # 73 x 73 x 64
        net = slim.max_pool2d(net, 3, stride=2, padding='VALID',
                              scope='MaxPool_3a_3x3')
        end_points['MaxPool_3a_3x3'] = net
        # 73 x 73 x 80
        net = slim.conv2d(net, 80, 1, padding='VALID',
                          scope='Conv2d_3b_1x1')
        end_points['Conv2d_3b_1x1'] = net
        # 71 x 71 x 192
        net = slim.conv2d(net, 192, 3, padding='VALID',
                          scope='Conv2d_4a_3x3')
        end_points['Conv2d_4a_3x3'] = net
        # 35 x 35 x 192
        net = slim.max_pool2d(net, 3, stride=2, padding='VALID',
                              scope='MaxPool_5a_3x3')
        end_points['MaxPool_5a_3x3'] = net

        # 35 x 35 x 320
        with tf.variable_scope('Mixed_5b'):
          with tf.variable_scope('Branch_0'):
            tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1')
          with tf.variable_scope('Branch_1'):
            tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1')
            tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5,
                                        scope='Conv2d_0b_5x5')
          with tf.variable_scope('Branch_2'):
            tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1')
            tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3,
                                        scope='Conv2d_0b_3x3')
            tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3,
                                        scope='Conv2d_0c_3x3')
          with tf.variable_scope('Branch_3'):
            tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME',
                                         scope='AvgPool_0a_3x3')
            tower_pool_1 = slim.conv2d(tower_pool, 64, 1,
                                       scope='Conv2d_0b_1x1')
          net = tf.concat(axis=3, values=[tower_conv, tower_conv1_1,
                              tower_conv2_2, tower_pool_1])

        end_points['Mixed_5b'] = net
        net = slim.repeat(net, 10, block35, scale=0.17)

        # 17 x 17 x 1024
        with tf.variable_scope('Mixed_6a'):
          with tf.variable_scope('Branch_0'):
            tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID',
                                     scope='Conv2d_1a_3x3')
          with tf.variable_scope('Branch_1'):
            tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
            tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3,
                                        scope='Conv2d_0b_3x3')
            tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3,
                                        stride=2, padding='VALID',
                                        scope='Conv2d_1a_3x3')
          with tf.variable_scope('Branch_2'):
            tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
                                         scope='MaxPool_1a_3x3')
          net = tf.concat(axis=3, values=[tower_conv, tower_conv1_2, tower_pool])

        end_points['Mixed_6a'] = net
        net = slim.repeat(net, 20, block17, scale=0.10)

        # Auxillary tower
        with tf.variable_scope('AuxLogits'):
          aux = slim.avg_pool2d(net, 5, stride=3, padding='VALID',
                                scope='Conv2d_1a_3x3')
          aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1')
          aux = slim.conv2d(aux, 768, aux.get_shape()[1:3],
                            padding='VALID', scope='Conv2d_2a_5x5')
          aux = slim.flatten(aux)
          aux = slim.fully_connected(aux, num_classes, activation_fn=None,
                                     scope='Logits')
          end_points['AuxLogits'] = aux

        with tf.variable_scope('Mixed_7a'):
          with tf.variable_scope('Branch_0'):
            tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
            tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2,
                                       padding='VALID', scope='Conv2d_1a_3x3')
          with tf.variable_scope('Branch_1'):
            tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
            tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2,
                                        padding='VALID', scope='Conv2d_1a_3x3')
          with tf.variable_scope('Branch_2'):
            tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1')
            tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3,
                                        scope='Conv2d_0b_3x3')
            tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2,
                                        padding='VALID', scope='Conv2d_1a_3x3')
          with tf.variable_scope('Branch_3'):
            tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID',
                                         scope='MaxPool_1a_3x3')
          net = tf.concat(axis=3, values=[tower_conv_1, tower_conv1_1,
                              tower_conv2_2, tower_pool])

        end_points['Mixed_7a'] = net

        net = slim.repeat(net, 9, block8, scale=0.20)
        net = block8(net, activation_fn=None)

        net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
        end_points['Conv2d_7b_1x1'] = net

        with tf.variable_scope('Logits'):
          end_points['PrePool'] = net
          net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
                                scope='AvgPool_1a_8x8')
          net = slim.flatten(net)

          net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                             scope='Dropout')

          end_points['PreLogitsFlatten'] = net
          logits = slim.fully_connected(net, num_classes, activation_fn=None,
                                        scope='Logits')
          end_points['Logits'] = logits
          end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')

    return logits, end_points
inception_resnet_v2.default_image_size = 299


def inception_resnet_v2_arg_scope(weight_decay=0.00004,
                                  batch_norm_decay=0.9997,
                                  batch_norm_epsilon=0.001):
  """Yields the scope with the default parameters for inception_resnet_v2.

  Args:
    weight_decay: the weight decay for weights variables.
    batch_norm_decay: decay for the moving average of batch_norm momentums.
    batch_norm_epsilon: small float added to variance to avoid dividing by zero.

  Returns:
    a arg_scope with the parameters needed for inception_resnet_v2.
  """
  # Set weight_decay for weights in conv2d and fully_connected layers.
  with slim.arg_scope([slim.conv2d, slim.fully_connected],
                      weights_regularizer=slim.l2_regularizer(weight_decay),
                      biases_regularizer=slim.l2_regularizer(weight_decay)):

    batch_norm_params = {
        'decay': batch_norm_decay,
        'epsilon': batch_norm_epsilon,
    }
    # Set activation_fn and parameters for batch_norm.
    with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu,
                        normalizer_fn=slim.batch_norm,
                        normalizer_params=batch_norm_params) as scope:
      return scope


================================================
FILE: models/slim/nets/inception_resnet_v2_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.inception_resnet_v2."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from nets import inception


class InceptionTest(tf.test.TestCase):

  def testBuildLogits(self):
    batch_size = 5
    height, width = 299, 299
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = inception.inception_resnet_v2(inputs, num_classes)
      self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])

  def testBuildEndPoints(self):
    batch_size = 5
    height, width = 299, 299
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      _, end_points = inception.inception_resnet_v2(inputs, num_classes)
      self.assertTrue('Logits' in end_points)
      logits = end_points['Logits']
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])
      self.assertTrue('AuxLogits' in end_points)
      aux_logits = end_points['AuxLogits']
      self.assertListEqual(aux_logits.get_shape().as_list(),
                           [batch_size, num_classes])
      pre_pool = end_points['PrePool']
      self.assertListEqual(pre_pool.get_shape().as_list(),
                           [batch_size, 8, 8, 1536])

  def testVariablesSetDevice(self):
    batch_size = 5
    height, width = 299, 299
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      # Force all Variables to reside on the device.
      with tf.variable_scope('on_cpu'), tf.device('/cpu:0'):
        inception.inception_resnet_v2(inputs, num_classes)
      with tf.variable_scope('on_gpu'), tf.device('/gpu:0'):
        inception.inception_resnet_v2(inputs, num_classes)
      for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_cpu'):
        self.assertDeviceEqual(v.device, '/cpu:0')
      for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_gpu'):
        self.assertDeviceEqual(v.device, '/gpu:0')

  def testHalfSizeImages(self):
    batch_size = 5
    height, width = 150, 150
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, end_points = inception.inception_resnet_v2(inputs, num_classes)
      self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])
      pre_pool = end_points['PrePool']
      self.assertListEqual(pre_pool.get_shape().as_list(),
                           [batch_size, 3, 3, 1536])

  def testUnknownBatchSize(self):
    batch_size = 1
    height, width = 299, 299
    num_classes = 1000
    with self.test_session() as sess:
      inputs = tf.placeholder(tf.float32, (None, height, width, 3))
      logits, _ = inception.inception_resnet_v2(inputs, num_classes)
      self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits'))
      self.assertListEqual(logits.get_shape().as_list(),
                           [None, num_classes])
      images = tf.random_uniform((batch_size, height, width, 3))
      sess.run(tf.global_variables_initializer())
      output = sess.run(logits, {inputs: images.eval()})
      self.assertEquals(output.shape, (batch_size, num_classes))

  def testEvaluation(self):
    batch_size = 2
    height, width = 299, 299
    num_classes = 1000
    with self.test_session() as sess:
      eval_inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = inception.inception_resnet_v2(eval_inputs,
                                                num_classes,
                                                is_training=False)
      predictions = tf.argmax(logits, 1)
      sess.run(tf.global_variables_initializer())
      output = sess.run(predictions)
      self.assertEquals(output.shape, (batch_size,))

  def testTrainEvalWithReuse(self):
    train_batch_size = 5
    eval_batch_size = 2
    height, width = 150, 150
    num_classes = 1000
    with self.test_session() as sess:
      train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
      inception.inception_resnet_v2(train_inputs, num_classes)
      eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
      logits, _ = inception.inception_resnet_v2(eval_inputs,
                                                num_classes,
                                                is_training=False,
                                                reuse=True)
      predictions = tf.argmax(logits, 1)
      sess.run(tf.global_variables_initializer())
      output = sess.run(predictions)
      self.assertEquals(output.shape, (eval_batch_size,))


if __name__ == '__main__':
  tf.test.main()


================================================
FILE: models/slim/nets/inception_utils.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains common code shared by all inception models.

Usage of arg scope:
  with slim.arg_scope(inception_arg_scope()):
    logits, end_points = inception.inception_v3(images, num_classes,
                                                is_training=is_training)

"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim


def inception_arg_scope(weight_decay=0.00004,
                        use_batch_norm=True,
                        batch_norm_decay=0.9997,
                        batch_norm_epsilon=0.001):
  """Defines the default arg scope for inception models.

  Args:
    weight_decay: The weight decay to use for regularizing the model.
    use_batch_norm: "If `True`, batch_norm is applied after each convolution.
    batch_norm_decay: Decay for batch norm moving average.
    batch_norm_epsilon: Small float added to variance to avoid dividing by zero
      in batch norm.

  Returns:
    An `arg_scope` to use for the inception models.
  """
  batch_norm_params = {
      # Decay for the moving averages.
      'decay': batch_norm_decay,
      # epsilon to prevent 0s in variance.
      'epsilon': batch_norm_epsilon,
      # collection containing update_ops.
      'updates_collections': tf.GraphKeys.UPDATE_OPS,
  }
  if use_batch_norm:
    normalizer_fn = slim.batch_norm
    normalizer_params = batch_norm_params
  else:
    normalizer_fn = None
    normalizer_params = {}
  # Set weight_decay for weights in Conv and FC layers.
  with slim.arg_scope([slim.conv2d, slim.fully_connected],
                      weights_regularizer=slim.l2_regularizer(weight_decay)):
    with slim.arg_scope(
        [slim.conv2d],
        weights_initializer=slim.variance_scaling_initializer(),
        activation_fn=tf.nn.relu,
        normalizer_fn=normalizer_fn,
        normalizer_params=normalizer_params) as sc:
      return sc


================================================
FILE: models/slim/nets/inception_v1.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition for inception v1 classification network."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from nets import inception_utils

slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)


def inception_v1_base(inputs,
                      final_endpoint='Mixed_5c',
                      scope='InceptionV1'):
  """Defines the Inception V1 base architecture.

  This architecture is defined in:
    Going deeper with convolutions
    Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
    Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
    http://arxiv.org/pdf/1409.4842v1.pdf.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
      'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
      'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
      'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']
    scope: Optional variable_scope.

  Returns:
    A dictionary from components of the network to the corresponding activation.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values.
  """
  end_points = {}
  with tf.variable_scope(scope, 'InceptionV1', [inputs]):
    with slim.arg_scope(
        [slim.conv2d, slim.fully_connected],
        weights_initializer=trunc_normal(0.01)):
      with slim.arg_scope([slim.conv2d, slim.max_pool2d],
                          stride=1, padding='SAME'):
        end_point = 'Conv2d_1a_7x7'
        net = slim.conv2d(inputs, 64, [7, 7], stride=2, scope=end_point)
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points
        end_point = 'MaxPool_2a_3x3'
        net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points
        end_point = 'Conv2d_2b_1x1'
        net = slim.conv2d(net, 64, [1, 1], scope=end_point)
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points
        end_point = 'Conv2d_2c_3x3'
        net = slim.conv2d(net, 192, [3, 3], scope=end_point)
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points
        end_point = 'MaxPool_3a_3x3'
        net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_3b'
        with tf.variable_scope(end_point):
          with tf.variable_scope('Branch_0'):
            branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
          with tf.variable_scope('Branch_1'):
            branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_2'):
            branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 32, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')
          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_3c'
        with tf.variable_scope(end_point):
          with tf.variable_scope('Branch_0'):
            branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
          with tf.variable_scope('Branch_1'):
            branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 192, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_2'):
            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'MaxPool_4a_3x3'
        net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_4b'
        with tf.variable_scope(end_point):
          with tf.variable_scope('Branch_0'):
            branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
          with tf.variable_scope('Branch_1'):
            branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 208, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_2'):
            branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 48, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_4c'
        with tf.variable_scope(end_point):
          with tf.variable_scope('Branch_0'):
            branch_0 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
          with tf.variable_scope('Branch_1'):
            branch_1 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_2'):
            branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_4d'
        with tf.variable_scope(end_point):
          with tf.variable_scope('Branch_0'):
            branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
          with tf.variable_scope('Branch_1'):
            branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 256, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_2'):
            branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_4e'
        with tf.variable_scope(end_point):
          with tf.variable_scope('Branch_0'):
            branch_0 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')
          with tf.variable_scope('Branch_1'):
            branch_1 = slim.conv2d(net, 144, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 288, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_2'):
            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_4f'
        with tf.variable_scope(end_point):
          with tf.variable_scope('Branch_0'):
            branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')
          with tf.variable_scope('Branch_1'):
            branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_2'):
            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'MaxPool_5a_2x2'
        net = slim.max_pool2d(net, [2, 2], stride=2, scope=end_point)
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_5b'
        with tf.variable_scope(end_point):
          with tf.variable_scope('Branch_0'):
            branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')
          with tf.variable_scope('Branch_1'):
            branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_2'):
            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0a_3x3')
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points

        end_point = 'Mixed_5c'
        with tf.variable_scope(end_point):
          with tf.variable_scope('Branch_0'):
            branch_0 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1')
          with tf.variable_scope('Branch_1'):
            branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
            branch_1 = slim.conv2d(branch_1, 384, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_2'):
            branch_2 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1')
            branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
          with tf.variable_scope('Branch_3'):
            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if final_endpoint == end_point: return net, end_points
    raise ValueError('Unknown final endpoint %s' % final_endpoint)


def inception_v1(inputs,
                 num_classes=1000,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 prediction_fn=slim.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='InceptionV1'):
  """Defines the Inception V1 architecture.

  This architecture is defined in:

    Going deeper with convolutions
    Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
    Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
    http://arxiv.org/pdf/1409.4842v1.pdf.

  The default image size used to train this network is 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether is training or not.
    dropout_keep_prob: the percentage of activation values that are retained.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    logits: the pre-softmax activations, a tensor of size
      [batch_size, num_classes]
    end_points: a dictionary from components of the network to the corresponding
      activation.
  """
  # Final pooling and prediction
  with tf.variable_scope(scope, 'InceptionV1', [inputs, num_classes],
                         reuse=reuse) as scope:
    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
      net, end_points = inception_v1_base(inputs, scope=scope)
      with tf.variable_scope('Logits'):
        net = slim.avg_pool2d(net, [7, 7], stride=1, scope='MaxPool_0a_7x7')
        net = slim.dropout(net,
                           dropout_keep_prob, scope='Dropout_0b')
        logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                             normalizer_fn=None, scope='Conv2d_0c_1x1')
        if spatial_squeeze:
          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')

        end_points['Logits'] = logits
        end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
  return logits, end_points
inception_v1.default_image_size = 224

inception_v1_arg_scope = inception_utils.inception_arg_scope


================================================
FILE: models/slim/nets/inception_v1_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for nets.inception_v1."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from nets import inception

slim = tf.contrib.slim


class InceptionV1Test(tf.test.TestCase):

  def testBuildClassificationNetwork(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000

    inputs = tf.random_uniform((batch_size, height, width, 3))
    logits, end_points = inception.inception_v1(inputs, num_classes)
    self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [batch_size, num_classes])
    self.assertTrue('Predictions' in end_points)
    self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
                         [batch_size, num_classes])

  def testBuildBaseNetwork(self):
    batch_size = 5
    height, width = 224, 224

    inputs = tf.random_uniform((batch_size, height, width, 3))
    mixed_6c, end_points = inception.inception_v1_base(inputs)
    self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c'))
    self.assertListEqual(mixed_6c.get_shape().as_list(),
                         [batch_size, 7, 7, 1024])
    expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
                          'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b',
                          'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c',
                          'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2',
                          'Mixed_5b', 'Mixed_5c']
    self.assertItemsEqual(end_points.keys(), expected_endpoints)

  def testBuildOnlyUptoFinalEndpoint(self):
    batch_size = 5
    height, width = 224, 224
    endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
                 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
                 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d',
                 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b',
                 'Mixed_5c']
    for index, endpoint in enumerate(endpoints):
      with tf.Graph().as_default():
        inputs = tf.random_uniform((batch_size, height, width, 3))
        out_tensor, end_points = inception.inception_v1_base(
            inputs, final_endpoint=endpoint)
        self.assertTrue(out_tensor.op.name.startswith(
            'InceptionV1/' + endpoint))
        self.assertItemsEqual(endpoints[:index+1], end_points)

  def testBuildAndCheckAllEndPointsUptoMixed5c(self):
    batch_size = 5
    height, width = 224, 224

    inputs = tf.random_uniform((batch_size, height, width, 3))
    _, end_points = inception.inception_v1_base(inputs,
                                                final_endpoint='Mixed_5c')
    endpoints_shapes = {'Conv2d_1a_7x7': [5, 112, 112, 64],
                        'MaxPool_2a_3x3': [5, 56, 56, 64],
                        'Conv2d_2b_1x1': [5, 56, 56, 64],
                        'Conv2d_2c_3x3': [5, 56, 56, 192],
                        'MaxPool_3a_3x3': [5, 28, 28, 192],
                        'Mixed_3b': [5, 28, 28, 256],
                        'Mixed_3c': [5, 28, 28, 480],
                        'MaxPool_4a_3x3': [5, 14, 14, 480],
                        'Mixed_4b': [5, 14, 14, 512],
                        'Mixed_4c': [5, 14, 14, 512],
                        'Mixed_4d': [5, 14, 14, 512],
                        'Mixed_4e': [5, 14, 14, 528],
                        'Mixed_4f': [5, 14, 14, 832],
                        'MaxPool_5a_2x2': [5, 7, 7, 832],
                        'Mixed_5b': [5, 7, 7, 832],
                        'Mixed_5c': [5, 7, 7, 1024]}

    self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
    for endpoint_name in endpoints_shapes:
      expected_shape = endpoints_shapes[endpoint_name]
      self.assertTrue(endpoint_name in end_points)
      self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
                           expected_shape)

  def testModelHasExpectedNumberOfParameters(self):
    batch_size = 5
    height, width = 224, 224
    inputs = tf.random_uniform((batch_size, height, width, 3))
    with slim.arg_scope(inception.inception_v1_arg_scope()):
      inception.inception_v1_base(inputs)
    total_params, _ = slim.model_analyzer.analyze_vars(
        slim.get_model_variables())
    self.assertAlmostEqual(5607184, total_params)

  def testHalfSizeImages(self):
    batch_size = 5
    height, width = 112, 112

    inputs = tf.random_uniform((batch_size, height, width, 3))
    mixed_5c, _ = inception.inception_v1_base(inputs)
    self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c'))
    self.assertListEqual(mixed_5c.get_shape().as_list(),
                         [batch_size, 4, 4, 1024])

  def testUnknownImageShape(self):
    tf.reset_default_graph()
    batch_size = 2
    height, width = 224, 224
    num_classes = 1000
    input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
    with self.test_session() as sess:
      inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))
      logits, end_points = inception.inception_v1(inputs, num_classes)
      self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])
      pre_pool = end_points['Mixed_5c']
      feed_dict = {inputs: input_np}
      tf.global_variables_initializer().run()
      pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
      self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])

  def testUnknowBatchSize(self):
    batch_size = 1
    height, width = 224, 224
    num_classes = 1000

    inputs = tf.placeholder(tf.float32, (None, height, width, 3))
    logits, _ = inception.inception_v1(inputs, num_classes)
    self.assertTrue(logits.op.name.startswith('InceptionV1/Logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [None, num_classes])
    images = tf.random_uniform((batch_size, height, width, 3))

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(logits, {inputs: images.eval()})
      self.assertEquals(output.shape, (batch_size, num_classes))

  def testEvaluation(self):
    batch_size = 2
    height, width = 224, 224
    num_classes = 1000

    eval_inputs = tf.random_uniform((batch_size, height, width, 3))
    logits, _ = inception.inception_v1(eval_inputs, num_classes,
                                       is_training=False)
    predictions = tf.argmax(logits, 1)

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(predictions)
      self.assertEquals(output.shape, (batch_size,))

  def testTrainEvalWithReuse(self):
    train_batch_size = 5
    eval_batch_size = 2
    height, width = 224, 224
    num_classes = 1000

    train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
    inception.inception_v1(train_inputs, num_classes)
    eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
    logits, _ = inception.inception_v1(eval_inputs, num_classes, reuse=True)
    predictions = tf.argmax(logits, 1)

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(predictions)
      self.assertEquals(output.shape, (eval_batch_size,))

  def testLogitsNotSqueezed(self):
    num_classes = 25
    images = tf.random_uniform([1, 224, 224, 3])
    logits, _ = inception.inception_v1(images,
                                       num_classes=num_classes,
                                       spatial_squeeze=False)

    with self.test_session() as sess:
      tf.global_variables_initializer().run()
      logits_out = sess.run(logits)
      self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])


if __name__ == '__main__':
  tf.test.main()


================================================
FILE: models/slim/nets/inception_v2.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition for inception v2 classification network."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from nets import inception_utils

slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)


def inception_v2_base(inputs,
                      final_endpoint='Mixed_5c',
                      min_depth=16,
                      depth_multiplier=1.0,
                      scope=None):
  """Inception v2 (6a2).

  Constructs an Inception v2 network from inputs to the given final endpoint.
  This method can construct the network up to the layer inception(5b) as
  described in http://arxiv.org/abs/1502.03167.

  Args:
    inputs: a tensor of shape [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
      'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a',
      'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b',
      'Mixed_5c'].
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    scope: Optional variable_scope.

  Returns:
    tensor_out: output tensor corresponding to the final_endpoint.
    end_points: a set of activations for external use, for example summaries or
                losses.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
                or depth_multiplier <= 0
  """

  # end_points will collect relevant activations for external use, for example
  # summaries or losses.
  end_points = {}

  # Used to find thinned depths for each layer.
  if depth_multiplier <= 0:
    raise ValueError('depth_multiplier is not greater than zero.')
  depth = lambda d: max(int(d * depth_multiplier), min_depth)

  with tf.variable_scope(scope, 'InceptionV2', [inputs]):
    with slim.arg_scope(
        [slim.conv2d, slim.max_pool2d, slim.avg_pool2d, slim.separable_conv2d],
        stride=1, padding='SAME'):

      # Note that sizes in the comments below assume an input spatial size of
      # 224x224, however, the inputs can be of any size greater 32x32.

      # 224 x 224 x 3
      end_point = 'Conv2d_1a_7x7'
      # depthwise_multiplier here is different from depth_multiplier.
      # depthwise_multiplier determines the output channels of the initial
      # depthwise conv (see docs for tf.nn.separable_conv2d), while
      # depth_multiplier controls the # channels of the subsequent 1x1
      # convolution. Must have
      #   in_channels * depthwise_multipler <= out_channels
      # so that the separable convolution is not overparameterized.
      depthwise_multiplier = min(int(depth(64) / 3), 8)
      net = slim.separable_conv2d(
          inputs, depth(64), [7, 7], depth_multiplier=depthwise_multiplier,
          stride=2, weights_initializer=trunc_normal(1.0),
          scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 112 x 112 x 64
      end_point = 'MaxPool_2a_3x3'
      net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 56 x 56 x 64
      end_point = 'Conv2d_2b_1x1'
      net = slim.conv2d(net, depth(64), [1, 1], scope=end_point,
                        weights_initializer=trunc_normal(0.1))
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 56 x 56 x 64
      end_point = 'Conv2d_2c_3x3'
      net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 56 x 56 x 192
      end_point = 'MaxPool_3a_3x3'
      net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 192
      # Inception module.
      end_point = 'Mixed_3b'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(64), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(32), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 256
      end_point = 'Mixed_3c'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 28 x 28 x 320
      end_point = 'Mixed_4a'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_0 = slim.conv2d(branch_0, depth(160), [3, 3], stride=2,
                                 scope='Conv2d_1a_3x3')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(
              branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
          branch_1 = slim.conv2d(
              branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.max_pool2d(
              net, [3, 3], stride=2, scope='MaxPool_1a_3x3')
        net = tf.concat(3, [branch_0, branch_1, branch_2])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_4b'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(224), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(64), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(
              branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_4c'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(128), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(128), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_4d'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(160), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(160), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points

      # 14 x 14 x 576
      end_point = 'Mixed_4e'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(96), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(192), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(160), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(192), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(96), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 14 x 14 x 576
      end_point = 'Mixed_5a'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(
              net, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2,
                                 scope='Conv2d_1a_3x3')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(192), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(256), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2,
                                 scope='Conv2d_1a_3x3')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
                                     scope='MaxPool_1a_3x3')
        net = tf.concat(3, [branch_0, branch_1, branch_2])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
      # 7 x 7 x 1024
      end_point = 'Mixed_5b'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(192), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(160), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points

      # 7 x 7 x 1024
      end_point = 'Mixed_5c'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(
              net, depth(192), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
                                 scope='Conv2d_0b_3x3')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(
              net, depth(192), [1, 1],
              weights_initializer=trunc_normal(0.09),
              scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(128), [1, 1],
              weights_initializer=trunc_normal(0.1),
              scope='Conv2d_0b_1x1')
        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
        end_points[end_point] = net
        if end_point == final_endpoint: return net, end_points
    raise ValueError('Unknown final endpoint %s' % final_endpoint)


def inception_v2(inputs,
                 num_classes=1000,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 min_depth=16,
                 depth_multiplier=1.0,
                 prediction_fn=slim.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='InceptionV2'):
  """Inception v2 model for classification.

  Constructs an Inception v2 network for classification as described in
  http://arxiv.org/abs/1502.03167.

  The default image size used to train this network is 224x224.

  Args:
    inputs: a tensor of shape [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether is training or not.
    dropout_keep_prob: the percentage of activation values that are retained.
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    logits: the pre-softmax activations, a tensor of size
      [batch_size, num_classes]
    end_points: a dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
                or depth_multiplier <= 0
  """
  if depth_multiplier <= 0:
    raise ValueError('depth_multiplier is not greater than zero.')

  # Final pooling and prediction
  with tf.variable_scope(scope, 'InceptionV2', [inputs, num_classes],
                         reuse=reuse) as scope:
    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
      net, end_points = inception_v2_base(
          inputs, scope=scope, min_depth=min_depth,
          depth_multiplier=depth_multiplier)
      with tf.variable_scope('Logits'):
        kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7])
        net = slim.avg_pool2d(net, kernel_size, padding='VALID',
                              scope='AvgPool_1a_{}x{}'.format(*kernel_size))
        # 1 x 1 x 1024
        net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
        logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                             normalizer_fn=None, scope='Conv2d_1c_1x1')
        if spatial_squeeze:
          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
      end_points['Logits'] = logits
      end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
  return logits, end_points
inception_v2.default_image_size = 224


def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
  """Define kernel size which is automatically reduced for small input.

  If the shape of the input images is unknown at graph construction time this
  function assumes that the input images are is large enough.

  Args:
    input_tensor: input tensor of size [batch_size, height, width, channels].
    kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]

  Returns:
    a tensor with the kernel size.

  TODO(jrru): Make this function work with unknown shapes. Theoretically, this
  can be done with the code below. Problems are two-fold: (1) If the shape was
  known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
  handle tensors that define the kernel size.
      shape = tf.shape(input_tensor)
      return = tf.pack([tf.minimum(shape[1], kernel_size[0]),
                        tf.minimum(shape[2], kernel_size[1])])

  """
  shape = input_tensor.get_shape().as_list()
  if shape[1] is None or shape[2] is None:
    kernel_size_out = kernel_size
  else:
    kernel_size_out = [min(shape[1], kernel_size[0]),
                       min(shape[2], kernel_size[1])]
  return kernel_size_out


inception_v2_arg_scope = inception_utils.inception_arg_scope


================================================
FILE: models/slim/nets/inception_v2_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for nets.inception_v2."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from nets import inception

slim = tf.contrib.slim


class InceptionV2Test(tf.test.TestCase):

  def testBuildClassificationNetwork(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000

    inputs = tf.random_uniform((batch_size, height, width, 3))
    logits, end_points = inception.inception_v2(inputs, num_classes)
    self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [batch_size, num_classes])
    self.assertTrue('Predictions' in end_points)
    self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
                         [batch_size, num_classes])

  def testBuildBaseNetwork(self):
    batch_size = 5
    height, width = 224, 224

    inputs = tf.random_uniform((batch_size, height, width, 3))
    mixed_5c, end_points = inception.inception_v2_base(inputs)
    self.assertTrue(mixed_5c.op.name.startswith('InceptionV2/Mixed_5c'))
    self.assertListEqual(mixed_5c.get_shape().as_list(),
                         [batch_size, 7, 7, 1024])
    expected_endpoints = ['Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b',
                          'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a',
                          'Mixed_5b', 'Mixed_5c', 'Conv2d_1a_7x7',
                          'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3',
                          'MaxPool_3a_3x3']
    self.assertItemsEqual(end_points.keys(), expected_endpoints)

  def testBuildOnlyUptoFinalEndpoint(self):
    batch_size = 5
    height, width = 224, 224
    endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
                 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
                 'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
                 'Mixed_5a', 'Mixed_5b', 'Mixed_5c']
    for index, endpoint in enumerate(endpoints):
      with tf.Graph().as_default():
        inputs = tf.random_uniform((batch_size, height, width, 3))
        out_tensor, end_points = inception.inception_v2_base(
            inputs, final_endpoint=endpoint)
        self.assertTrue(out_tensor.op.name.startswith(
            'InceptionV2/' + endpoint))
        self.assertItemsEqual(endpoints[:index+1], end_points)

  def testBuildAndCheckAllEndPointsUptoMixed5c(self):
    batch_size = 5
    height, width = 224, 224

    inputs = tf.random_uniform((batch_size, height, width, 3))
    _, end_points = inception.inception_v2_base(inputs,
                                                final_endpoint='Mixed_5c')
    endpoints_shapes = {'Mixed_3b': [batch_size, 28, 28, 256],
                        'Mixed_3c': [batch_size, 28, 28, 320],
                        'Mixed_4a': [batch_size, 14, 14, 576],
                        'Mixed_4b': [batch_size, 14, 14, 576],
                        'Mixed_4c': [batch_size, 14, 14, 576],
                        'Mixed_4d': [batch_size, 14, 14, 576],
                        'Mixed_4e': [batch_size, 14, 14, 576],
                        'Mixed_5a': [batch_size, 7, 7, 1024],
                        'Mixed_5b': [batch_size, 7, 7, 1024],
                        'Mixed_5c': [batch_size, 7, 7, 1024],
                        'Conv2d_1a_7x7': [batch_size, 112, 112, 64],
                        'MaxPool_2a_3x3': [batch_size, 56, 56, 64],
                        'Conv2d_2b_1x1': [batch_size, 56, 56, 64],
                        'Conv2d_2c_3x3': [batch_size, 56, 56, 192],
                        'MaxPool_3a_3x3': [batch_size, 28, 28, 192]}
    self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
    for endpoint_name in endpoints_shapes:
      expected_shape = endpoints_shapes[endpoint_name]
      self.assertTrue(endpoint_name in end_points)
      self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
                           expected_shape)

  def testModelHasExpectedNumberOfParameters(self):
    batch_size = 5
    height, width = 224, 224
    inputs = tf.random_uniform((batch_size, height, width, 3))
    with slim.arg_scope(inception.inception_v2_arg_scope()):
      inception.inception_v2_base(inputs)
    total_params, _ = slim.model_analyzer.analyze_vars(
        slim.get_model_variables())
    self.assertAlmostEqual(10173112, total_params)

  def testBuildEndPointsWithDepthMultiplierLessThanOne(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000

    inputs = tf.random_uniform((batch_size, height, width, 3))
    _, end_points = inception.inception_v2(inputs, num_classes)

    endpoint_keys = [key for key in end_points.keys()
                     if key.startswith('Mixed') or key.startswith('Conv')]

    _, end_points_with_multiplier = inception.inception_v2(
        inputs, num_classes, scope='depth_multiplied_net',
        depth_multiplier=0.5)

    for key in endpoint_keys:
      original_depth = end_points[key].get_shape().as_list()[3]
      new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
      self.assertEqual(0.5 * original_depth, new_depth)

  def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000

    inputs = tf.random_uniform((batch_size, height, width, 3))
    _, end_points = inception.inception_v2(inputs, num_classes)

    endpoint_keys = [key for key in end_points.keys()
                     if key.startswith('Mixed') or key.startswith('Conv')]

    _, end_points_with_multiplier = inception.inception_v2(
        inputs, num_classes, scope='depth_multiplied_net',
        depth_multiplier=2.0)

    for key in endpoint_keys:
      original_depth = end_points[key].get_shape().as_list()[3]
      new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
      self.assertEqual(2.0 * original_depth, new_depth)

  def testRaiseValueErrorWithInvalidDepthMultiplier(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000

    inputs = tf.random_uniform((batch_size, height, width, 3))
    with self.assertRaises(ValueError):
      _ = inception.inception_v2(inputs, num_classes, depth_multiplier=-0.1)
    with self.assertRaises(ValueError):
      _ = inception.inception_v2(inputs, num_classes, depth_multiplier=0.0)

  def testHalfSizeImages(self):
    batch_size = 5
    height, width = 112, 112
    num_classes = 1000

    inputs = tf.random_uniform((batch_size, height, width, 3))
    logits, end_points = inception.inception_v2(inputs, num_classes)
    self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [batch_size, num_classes])
    pre_pool = end_points['Mixed_5c']
    self.assertListEqual(pre_pool.get_shape().as_list(),
                         [batch_size, 4, 4, 1024])

  def testUnknownImageShape(self):
    tf.reset_default_graph()
    batch_size = 2
    height, width = 224, 224
    num_classes = 1000
    input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
    with self.test_session() as sess:
      inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))
      logits, end_points = inception.inception_v2(inputs, num_classes)
      self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])
      pre_pool = end_points['Mixed_5c']
      feed_dict = {inputs: input_np}
      tf.global_variables_initializer().run()
      pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
      self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024])

  def testUnknowBatchSize(self):
    batch_size = 1
    height, width = 224, 224
    num_classes = 1000

    inputs = tf.placeholder(tf.float32, (None, height, width, 3))
    logits, _ = inception.inception_v2(inputs, num_classes)
    self.assertTrue(logits.op.name.startswith('InceptionV2/Logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [None, num_classes])
    images = tf.random_uniform((batch_size, height, width, 3))

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(logits, {inputs: images.eval()})
      self.assertEquals(output.shape, (batch_size, num_classes))

  def testEvaluation(self):
    batch_size = 2
    height, width = 224, 224
    num_classes = 1000

    eval_inputs = tf.random_uniform((batch_size, height, width, 3))
    logits, _ = inception.inception_v2(eval_inputs, num_classes,
                                       is_training=False)
    predictions = tf.argmax(logits, 1)

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(predictions)
      self.assertEquals(output.shape, (batch_size,))

  def testTrainEvalWithReuse(self):
    train_batch_size = 5
    eval_batch_size = 2
    height, width = 150, 150
    num_classes = 1000

    train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
    inception.inception_v2(train_inputs, num_classes)
    eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
    logits, _ = inception.inception_v2(eval_inputs, num_classes, reuse=True)
    predictions = tf.argmax(logits, 1)

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(predictions)
      self.assertEquals(output.shape, (eval_batch_size,))

  def testLogitsNotSqueezed(self):
    num_classes = 25
    images = tf.random_uniform([1, 224, 224, 3])
    logits, _ = inception.inception_v2(images,
                                       num_classes=num_classes,
                                       spatial_squeeze=False)

    with self.test_session() as sess:
      tf.global_variables_initializer().run()
      logits_out = sess.run(logits)
      self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])


if __name__ == '__main__':
  tf.test.main()


================================================
FILE: models/slim/nets/inception_v2_tsn.py
================================================
"""Contains the definition for inception v2 (TSN) classification network."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from tensorflow.python.ops import init_ops
from tensorflow.python.platform import tf_logging as logging

slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
random_normal = lambda stddev: tf.random_normal_initializer(0.0, stddev)

def conv_set(net, num_outputs, filter_size, stride=1, weight_std=0.001,
             padding=0):
  if padding > 0:
    net = tf.pad(net, [[0, 0], [padding, padding], [padding, padding], [0, 0]])
  net = slim.conv2d(
    net, num_outputs, filter_size,
    stride=stride,
    padding='VALID')
  net = slim.batch_norm(net,
                        updates_collections=tf.GraphKeys.UPDATE_OPS,
                        epsilon=1e-5,
                        decay=0.9,
                        scale=True)
  net = tf.nn.relu(net)
  return net


def pool(net, pool_type='avg', kernel=3, stride=1, padding=0):
  if pool_type == 'avg':
    fn = slim.avg_pool2d
  elif pool_type == 'max':
    fn = slim.max_pool2d
  else:
    raise ValueError('Unknown pool type')
  with tf.name_scope('%s_pool' % pool_type):
    net = fn(net, [kernel, kernel], stride=stride,
             padding='VALID' if padding==0 else 'SAME')
  return net


def inception_module(net, small_module=False,
                     num_outputs=[64,64,64,32,64,96,96],
                     force_max_pool=False):
  all_nets = []
  if not small_module:
    with tf.variable_scope('1x1'):
      net_1 = conv_set(net, num_outputs[0], [1, 1])
    all_nets.append(net_1)

  with tf.variable_scope('3x3_reduce'):
    net_2 = conv_set(net, num_outputs[1], [1, 1])
  with tf.variable_scope('3x3'):
    net_2 = conv_set(net_2, num_outputs[2], [3, 3],
                     padding=1,
                     stride=2 if small_module else 1)
  all_nets.append(net_2)

  with tf.variable_scope('double_3x3_reduce'):
    net_3 = conv_set(net, num_outputs[4], [1, 1])
  with tf.variable_scope('double_3x3_1'):
    net_3 = conv_set(net_3, num_outputs[5], [3, 3], padding=1)
  with tf.variable_scope('double_3x3_2'):
    net_3 = conv_set(net_3, num_outputs[6], [3, 3], padding=1,
                     stride=2 if small_module else 1)
  all_nets.append(net_3)

  with tf.variable_scope('pool'):
    if small_module:
      net_4 = pool(net, 'max', 3, 2, 1)
    elif force_max_pool:
      net_4 = pool(net, 'max', 3, 1, 1)
    else:
      net_4 = pool(net, 'avg', 3, 1, 1)
  if not small_module:
    with tf.variable_scope('pool_proj'):
      net_4 = conv_set(net_4, num_outputs[3], [1, 1])
  all_nets.append(net_4)

  net = tf.concat(all_nets, 3)
  return net


def inception_v2_tsn_base(inputs,
                          final_endpoint='Mixed_5c',
                          min_depth=16,
                          depth_multiplier=1.0,
                          scope=None,
                          is_training=False,
                          train_top_bn=False):
  """Inception v2 (TSN code).

  """

  # end_points will collect relevant activations for external use, for example
  # summaries or losses.
  end_points = {}

  with tf.variable_scope(scope, 'InceptionV2_TSN', [inputs]):
      # 224 x 224 x 3
      end_point = 'conv1/7x7_s2'
      with tf.variable_scope(end_point):
        with slim.arg_scope(
          [slim.batch_norm],
          is_training=is_training if train_top_bn else False,
          trainable=True if train_top_bn else False):
          net = conv_set(inputs, 64, [7, 7],
                         stride=2,
                         padding=3)
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 112 x 112 x 64
      end_point = 'pool1/3x3_s2'
      net = slim.max_pool2d(net, [3, 3], scope=end_point,
                            stride=2, padding='SAME')
      # net = pool(net, 'max', 3, 2, 1)
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 56 x 56 x 64
      end_point = 'conv2/3x3_reduce'
      with tf.variable_scope(end_point):
        net = conv_set(net, 64, [1, 1], weight_std=0.1,
                       padding=0)
      # net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2,
      #                       padding='SAME')
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points
      end_point = 'conv2/3x3'
      with tf.variable_scope(end_point):
        net = conv_set(net, 192, [3, 3], weight_std=0.1, padding=1)
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points
      end_point = 'pool2/3x3_s2'
      net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2,
                            padding='SAME')
      # net = pool(net, 'max', 3, 2, 1)
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points

      # Inception module.
      end_point = 'inception_3a'
      with tf.variable_scope(end_point):
        net = inception_module(net)
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points

      end_point = 'inception_3b'
      with tf.variable_scope(end_point):
        net = inception_module(net, num_outputs=[64,64,96,64,64,96,96])
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points

      end_point = 'inception_3c'
      with tf.variable_scope(end_point):
        net = inception_module(net, small_module=True,
                               num_outputs=[-1,128,160,-1,64,96,96])
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points

      end_point = 'inception_4a'
      with tf.variable_scope(end_point):
        net = inception_module(net, num_outputs=[224,64,96,128,96,128,128])
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points

      end_point = 'inception_4b'
      with tf.variable_scope(end_point):
        net = inception_module(net, num_outputs=[192,96,128,128,96,128,128])
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points

      end_point = 'inception_4c'
      with tf.variable_scope(end_point):
        net = inception_module(net, num_outputs=[160,128,160,128,128,160,160])
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points

      end_point = 'inception_4d'
      with tf.variable_scope(end_point):
        net = inception_module(net, num_outputs=[96,128,192,128,160,192,192])
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points

      end_point = 'inception_4e'
      with tf.variable_scope(end_point):
        net = inception_module(net, small_module=True,
                               num_outputs=[-1,128,192,-1,192,256,256])
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points

      end_point = 'inception_5a'
      with tf.variable_scope(end_point):
        net = inception_module(net, num_outputs=[352,192,320,128,160,224,224])
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points

      end_point = 'inception_5b'
      with tf.variable_scope(end_point):
        net = inception_module(net, num_outputs=[352,192,320,128,192,224,224],
                              force_max_pool=True)
      end_points[tf.get_variable_scope().name + '/' + end_point] = net
      if end_point == final_endpoint: return net, end_points

  return net, end_points


def inception_v2_tsn(inputs,
                     num_classes=1000,
                     is_training=True,
                     dropout_keep_prob=0.2,
                     min_depth=16,
                     depth_multiplier=1.0,
                     prediction_fn=slim.softmax,
                     spatial_squeeze=True,
                     reuse=None,
                     conv_only=None,
                     conv_endpoint='inception_5b',
                     # conv_endpoint='inception_5a',  # testing for now
                     train_top_bn=False,
                     scope='InceptionV2_TSN'):
  """Inception v2 model for video classification.

  """
  if depth_multiplier <= 0:
    raise ValueError('depth_multiplier is not greater than zero.')

  # Final pooling and prediction
  with tf.variable_scope(scope, 'InceptionV2_TSN', [inputs, num_classes],
                         reuse=reuse) as scope:
    with slim.arg_scope([slim.dropout],
                        is_training=is_training):
      with slim.arg_scope([slim.batch_norm],
                          is_training=False,
                          trainable=False):
        net, end_points = inception_v2_tsn_base(
            inputs, scope=scope, min_depth=min_depth,
            depth_multiplier=depth_multiplier,
            final_endpoint=conv_endpoint if conv_only else None,
            is_training=is_training,
            train_top_bn=train_top_bn)
        if conv_only:
          return net, end_points
        with tf.variable_scope('Logits'):
          kernel_size = _reduced_kernel_size_for_small_input(net, [100, 100])
          net = slim.avg_pool2d(net, kernel_size, padding='VALID', stride=1,
                                scope='AvgPool_Logits_{}x{}'.format(*kernel_size))
          # The following would give the same output/performance too.
          # net = tf.reduce_mean(net, axis=[1,2], keep_dims=True)
          # 1 x 1 x 1024
          logging.info('Using dropout %f' % (1-dropout_keep_prob))
          net = slim.dropout(net, keep_prob=dropout_keep_prob,
                             scope='Dropout_Logits')
          logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                               normalizer_fn=None,
                               weights_initializer=random_normal(0.001),
                               biases_initializer=init_ops.zeros_initializer())
          if spatial_squeeze:
            logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
        end_points['Logits'] = logits
        end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
  return logits, end_points
inception_v2_tsn.default_image_size = 224


def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
  """Define kernel size which is automatically reduced for small input.

  If the shape of the input images is unknown at graph construction time this
  function assumes that the input images are is large enough.

  Args:
    input_tensor: input tensor of size [batch_size, height, width, channels].
    kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]

  Returns:
    a tensor with the kernel size.

  TODO(jrru): Make this function work with unknown shapes. Theoretically, this
  can be done with the code below. Problems are two-fold: (1) If the shape was
  known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
  handle tensors that define the kernel size.
      shape = tf.shape(input_tensor)
      return = tf.pack([tf.minimum(shape[1], kernel_size[0]),
                        tf.minimum(shape[2], kernel_size[1])])

  """
  shape = input_tensor.get_shape().as_list()
  if shape[1] is None or shape[2] is None:
    kernel_size_out = kernel_size
  else:
    kernel_size_out = [min(shape[1], kernel_size[0]),
                       min(shape[2], kernel_size[1])]
  return kernel_size_out


def inception_v2_tsn_arg_scope(weight_decay=0.00004):
  """Defines the default InceptionV2 arg scope.

  Args:
    weight_decay: The weight decay to use for regularizing the model.

  Returns:
    An `arg_scope` to use for the inception v3 model.
  """
  batch_norm_params = {
      # Decay for the moving averages.
      'decay': 0.9997,
      # epsilon to prevent 0s in variance.
      'epsilon': 0.001,
      # collection containing update_ops.
      'updates_collections': tf.GraphKeys.UPDATE_OPS,
      # Allow a gamma variable
      'scale': True,
  }

  # Set weight_decay for weights in Conv and FC layers.
  with slim.arg_scope([slim.conv2d, slim.fully_connected],
                      weights_regularizer=slim.l2_regularizer(weight_decay)):
    with slim.arg_scope(
        [slim.conv2d],
        weights_initializer=tf.contrib.layers.xavier_initializer(),
        activation_fn=None,  # manually added later, as I need to add BN after
                             # the convolution
        biases_initializer=init_ops.constant_initializer(value=0.2),
        normalizer_fn=None) as sc:
      return sc


================================================
FILE: models/slim/nets/inception_v3.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition for inception v3 classification network."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from nets import inception_utils

slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
random_normal = lambda stddev: tf.random_normal_initializer(0.0, stddev)


def inception_v3_base(inputs,
                      final_endpoint='Mixed_7c',
                      min_depth=16,
                      depth_multiplier=1.0,
                      scope=None):
  """Inception model from http://arxiv.org/abs/1512.00567.

  Constructs an Inception v3 network from inputs to the given final endpoint.
  This method can construct the network up to the final inception block
  Mixed_7c.

  Note that the names of the layers in the paper do not correspond to the names
  of the endpoints registered by this function although they build the same
  network.

  Here is a mapping from the old_names to the new names:
  Old name          | New name
  =======================================
  conv0             | Conv2d_1a_3x3
  conv1             | Conv2d_2a_3x3
  conv2             | Conv2d_2b_3x3
  pool1             | MaxPool_3a_3x3
  conv3             | Conv2d_3b_1x1
  conv4             | Conv2d_4a_3x3
  pool2             | MaxPool_5a_3x3
  mixed_35x35x256a  | Mixed_5b
  mixed_35x35x288a  | Mixed_5c
  mixed_35x35x288b  | Mixed_5d
  mixed_17x17x768a  | Mixed_6a
  mixed_17x17x768b  | Mixed_6b
  mixed_17x17x768c  | Mixed_6c
  mixed_17x17x768d  | Mixed_6d
  mixed_17x17x768e  | Mixed_6e
  mixed_8x8x1280a   | Mixed_7a
  mixed_8x8x2048a   | Mixed_7b
  mixed_8x8x2048b   | Mixed_7c

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    final_endpoint: specifies the endpoint to construct the network up to. It
      can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
      'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
      'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c',
      'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'].
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    scope: Optional variable_scope.

  Returns:
    tensor_out: output tensor corresponding to the final_endpoint.
    end_points: a set of activations for external use, for example summaries or
                losses.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
                or depth_multiplier <= 0
  """
  # end_points will collect relevant activations for external use, for example
  # summaries or losses.
  end_points = {}

  if depth_multiplier <= 0:
    raise ValueError('depth_multiplier is not greater than zero.')
  depth = lambda d: max(int(d * depth_multiplier), min_depth)

  with tf.variable_scope(scope, 'InceptionV3', [inputs]):
    with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                        stride=1, padding='VALID'):
      # 299 x 299 x 3
      end_point = 'Conv2d_1a_3x3'
      net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 149 x 149 x 32
      end_point = 'Conv2d_2a_3x3'
      net = slim.conv2d(net, depth(32), [3, 3], scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 147 x 147 x 32
      end_point = 'Conv2d_2b_3x3'
      net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 147 x 147 x 64
      end_point = 'MaxPool_3a_3x3'
      net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 73 x 73 x 64
      end_point = 'Conv2d_3b_1x1'
      net = slim.conv2d(net, depth(80), [1, 1], scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 73 x 73 x 80.
      end_point = 'Conv2d_4a_3x3'
      net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 71 x 71 x 192.
      end_point = 'MaxPool_5a_3x3'
      net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # 35 x 35 x 192.

    # Inception blocks
    with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                        stride=1, padding='SAME'):
      # mixed: 35 x 35 x 256.
      end_point = 'Mixed_5b'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
                                 scope='Conv2d_0b_5x5')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(branch_3, depth(32), [1, 1],
                                 scope='Conv2d_0b_1x1')
        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points

      # mixed_1: 35 x 35 x 288.
      end_point = 'Mixed_5c'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1')
          branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
                                 scope='Conv_1_0c_5x5')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(net, depth(64), [1, 1],
                                 scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
                                 scope='Conv2d_0b_1x1')
        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points

      # mixed_2: 35 x 35 x 288.
      end_point = 'Mixed_5d'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
                                 scope='Conv2d_0b_5x5')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
                                 scope='Conv2d_0c_3x3')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
                                 scope='Conv2d_0b_1x1')
        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points

      # mixed_3: 17 x 17 x 768.
      end_point = 'Mixed_6a'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2,
                                 padding='VALID', scope='Conv2d_1a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],
                                 scope='Conv2d_0b_3x3')
          branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2,
                                 padding='VALID', scope='Conv2d_1a_1x1')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
                                     scope='MaxPool_1a_3x3')
        net = tf.concat([branch_0, branch_1, branch_2], 3)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points

      # mixed4: 17 x 17 x 768.
      end_point = 'Mixed_6b'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(128), [1, 7],
                                 scope='Conv2d_0b_1x7')
          branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
                                 scope='Conv2d_0c_7x1')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],
                                 scope='Conv2d_0b_7x1')
          branch_2 = slim.conv2d(branch_2, depth(128), [1, 7],
                                 scope='Conv2d_0c_1x7')
          branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],
                                 scope='Conv2d_0d_7x1')
          branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
                                 scope='Conv2d_0e_1x7')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
                                 scope='Conv2d_0b_1x1')
        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points

      # mixed_5: 17 x 17 x 768.
      end_point = 'Mixed_6c'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],
                                 scope='Conv2d_0b_1x7')
          branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
                                 scope='Conv2d_0c_7x1')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
                                 scope='Conv2d_0b_7x1')
          branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],
                                 scope='Conv2d_0c_1x7')
          branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
                                 scope='Conv2d_0d_7x1')
          branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
                                 scope='Conv2d_0e_1x7')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
                                 scope='Conv2d_0b_1x1')
        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # mixed_6: 17 x 17 x 768.
      end_point = 'Mixed_6d'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],
                                 scope='Conv2d_0b_1x7')
          branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
                                 scope='Conv2d_0c_7x1')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
                                 scope='Conv2d_0b_7x1')
          branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],
                                 scope='Conv2d_0c_1x7')
          branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
                                 scope='Conv2d_0d_7x1')
          branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
                                 scope='Conv2d_0e_1x7')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
                                 scope='Conv2d_0b_1x1')
        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points

      # mixed_7: 17 x 17 x 768.
      end_point = 'Mixed_6e'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],
                                 scope='Conv2d_0b_1x7')
          branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
                                 scope='Conv2d_0c_7x1')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],
                                 scope='Conv2d_0b_7x1')
          branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
                                 scope='Conv2d_0c_1x7')
          branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],
                                 scope='Conv2d_0d_7x1')
          branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
                                 scope='Conv2d_0e_1x7')
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
                                 scope='Conv2d_0b_1x1')
        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points

      # mixed_8: 8 x 8 x 1280.
      end_point = 'Mixed_7a'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
          branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2,
                                 padding='VALID', scope='Conv2d_1a_3x3')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],
                                 scope='Conv2d_0b_1x7')
          branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
                                 scope='Conv2d_0c_7x1')
          branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2,
                                 padding='VALID', scope='Conv2d_1a_3x3')
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
                                     scope='MaxPool_1a_3x3')
        net = tf.concat([branch_0, branch_1, branch_2], 3)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
      # mixed_9: 8 x 8 x 2048.
      end_point = 'Mixed_7b'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = tf.concat([
              slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
              slim.conv2d(branch_1, depth(384), [3, 1],
                          scope='Conv2d_0b_3x1')], 3)
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(
              branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
          branch_2 = tf.concat([
              slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
              slim.conv2d(branch_2, depth(384), [3, 1],
                          scope='Conv2d_0d_3x1')], 3)
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points

      # mixed_10: 8 x 8 x 2048.
      end_point = 'Mixed_7c'
      with tf.variable_scope(end_point):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = tf.concat([
              slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
              slim.conv2d(branch_1, depth(384), [3, 1],
                          scope='Conv2d_0c_3x1')], 3)
        with tf.variable_scope('Branch_2'):
          branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
          branch_2 = slim.conv2d(
              branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
          branch_2 = tf.concat([
              slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
              slim.conv2d(branch_2, depth(384), [3, 1],
                          scope='Conv2d_0d_3x1')], 3)
        with tf.variable_scope('Branch_3'):
          branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
          branch_3 = slim.conv2d(
              branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
        net = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
      end_points[end_point] = net
      if end_point == final_endpoint: return net, end_points
    raise ValueError('Unknown final endpoint %s' % final_endpoint)


def inception_v3(inputs,
                 num_classes=1000,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 min_depth=16,
                 depth_multiplier=1.0,
                 prediction_fn=slim.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='InceptionV3'):
  """Inception model from http://arxiv.org/abs/1512.00567.

  "Rethinking the Inception Architecture for Computer Vision"

  Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
  Zbigniew Wojna.

  With the default arguments this method constructs the exact model defined in
  the paper. However, one can experiment with variations of the inception_v3
  network by changing arguments dropout_keep_prob, min_depth and
  depth_multiplier.

  The default image size used to train this network is 299x299.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether is training or not.
    dropout_keep_prob: the percentage of activation values that are retained.
    min_depth: Minimum depth value (number of channels) for all convolution ops.
      Enforced when depth_multiplier < 1, and not an active constraint when
      depth_multiplier >= 1.
    depth_multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
    prediction_fn: a function to get predictions out of logits.
    spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
        of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    logits: the pre-softmax activations, a tensor of size
      [batch_size, num_classes]
    end_points: a dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: if 'depth_multiplier' is less than or equal to zero.
  """
  if depth_multiplier <= 0:
    raise ValueError('depth_multiplier is not greater than zero.')
  depth = lambda d: max(int(d * depth_multiplier), min_depth)

  with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes],
                         reuse=reuse) as scope:
    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
      net, end_points = inception_v3_base(
          inputs, scope=scope, min_depth=min_depth,
          depth_multiplier=depth_multiplier)

      # Auxiliary Head logits
      with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                          stride=1, padding='SAME'):
        aux_logits = end_points['Mixed_6e']
        with tf.variable_scope('AuxLogits'):
          # rgirdhar: for large images, in pose
          kernel_size = _reduced_kernel_size_for_small_input(net, [30, 30])
          aux_logits = slim.avg_pool2d(
              aux_logits, kernel_size, stride=3, padding='VALID',
              scope='AvgPool_1a_5x5')
          aux_logits = slim.conv2d(aux_logits, depth(128), [1, 1],
                                   scope='Conv2d_1b_1x1')

          # Shape of feature map before the final layer.
          kernel_size = _reduced_kernel_size_for_small_input(
              aux_logits, [5, 5])
          aux_logits = slim.conv2d(
              aux_logits, depth(768), kernel_size,
              weights_initializer=trunc_normal(0.01),
              padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size))
          aux_logits = slim.conv2d(
              aux_logits, num_classes, [1, 1], activation_fn=None,
              normalizer_fn=None, weights_initializer=trunc_normal(0.001),
              scope='Conv2d_2b_1x1')
          if spatial_squeeze:
            aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze')
          end_points['AuxLogits'] = aux_logits

      # Final pooling and prediction
      with tf.variable_scope('Logits'):
        # kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8])
        # rgirdhar: for large images, in pose
        kernel_size = _reduced_kernel_size_for_small_input(net, [30, 30])
        net = slim.avg_pool2d(net, kernel_size, padding='VALID',
                              scope='AvgPool_1a_{}x{}'.format(*kernel_size))
        # 1 x 1 x 2048
        net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
        end_points['PreLogits'] = net
        # 2048
        logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                             normalizer_fn=None, scope='Conv2d_1c_1x1')
        if spatial_squeeze:
          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
        # 1000
      end_points['Logits'] = logits
      end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
  return logits, end_points
inception_v3.default_image_size = 299


def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
  """Define kernel size which is automatically reduced for small input.

  If the shape of the input images is unknown at graph construction time this
  function assumes that the input images are is large enough.

  Args:
    input_tensor: input tensor of size [batch_size, height, width, channels].
    kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]

  Returns:
    a tensor with the kernel size.

  TODO(jrru): Make this function work with unknown shapes. Theoretically, this
  can be done with the code below. Problems are two-fold: (1) If the shape was
  known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
  handle tensors that define the kernel size.
      shape = tf.shape(input_tensor)
      return = tf.pack([tf.minimum(shape[1], kernel_size[0]),
                        tf.minimum(shape[2], kernel_size[1])])

  """
  shape = input_tensor.get_shape().as_list()
  if shape[1] is None or shape[2] is None:
    kernel_size_out = kernel_size
  else:
    kernel_size_out = [min(shape[1], kernel_size[0]),
                       min(shape[2], kernel_size[1])]
  return kernel_size_out


inception_v3_arg_scope = inception_utils.inception_arg_scope


================================================
FILE: models/slim/nets/inception_v3_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for nets.inception_v1."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from nets import inception

slim = tf.contrib.slim


class InceptionV3Test(tf.test.TestCase):

  def testBuildClassificationNetwork(self):
    batch_size = 5
    height, width = 299, 299
    num_classes = 1000

    inputs = tf.random_uniform((batch_size, height, width, 3))
    logits, end_points = inception.inception_v3(inputs, num_classes)
    self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [batch_size, num_classes])
    self.assertTrue('Predictions' in end_points)
    self.assertListEqual(end_points['Predictions'].get_shape().as_list(),
                         [batch_size, num_classes])

  def testBuildBaseNetwork(self):
    batch_size = 5
    height, width = 299, 299

    inputs = tf.random_uniform((batch_size, height, width, 3))
    final_endpoint, end_points = inception.inception_v3_base(inputs)
    self.assertTrue(final_endpoint.op.name.startswith(
        'InceptionV3/Mixed_7c'))
    self.assertListEqual(final_endpoint.get_shape().as_list(),
                         [batch_size, 8, 8, 2048])
    expected_endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
                          'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',
                          'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
                          'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
                          'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']
    self.assertItemsEqual(end_points.keys(), expected_endpoints)

  def testBuildOnlyUptoFinalEndpoint(self):
    batch_size = 5
    height, width = 299, 299
    endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
                 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3',
                 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
                 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
                 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']

    for index, endpoint in enumerate(endpoints):
      with tf.Graph().as_default():
        inputs = tf.random_uniform((batch_size, height, width, 3))
        out_tensor, end_points = inception.inception_v3_base(
            inputs, final_endpoint=endpoint)
        self.assertTrue(out_tensor.op.name.startswith(
            'InceptionV3/' + endpoint))
        self.assertItemsEqual(endpoints[:index+1], end_points)

  def testBuildAndCheckAllEndPointsUptoMixed7c(self):
    batch_size = 5
    height, width = 299, 299

    inputs = tf.random_uniform((batch_size, height, width, 3))
    _, end_points = inception.inception_v3_base(
        inputs, final_endpoint='Mixed_7c')
    endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32],
                        'Conv2d_2a_3x3': [batch_size, 147, 147, 32],
                        'Conv2d_2b_3x3': [batch_size, 147, 147, 64],
                        'MaxPool_3a_3x3': [batch_size, 73, 73, 64],
                        'Conv2d_3b_1x1': [batch_size, 73, 73, 80],
                        'Conv2d_4a_3x3': [batch_size, 71, 71, 192],
                        'MaxPool_5a_3x3': [batch_size, 35, 35, 192],
                        'Mixed_5b': [batch_size, 35, 35, 256],
                        'Mixed_5c': [batch_size, 35, 35, 288],
                        'Mixed_5d': [batch_size, 35, 35, 288],
                        'Mixed_6a': [batch_size, 17, 17, 768],
                        'Mixed_6b': [batch_size, 17, 17, 768],
                        'Mixed_6c': [batch_size, 17, 17, 768],
                        'Mixed_6d': [batch_size, 17, 17, 768],
                        'Mixed_6e': [batch_size, 17, 17, 768],
                        'Mixed_7a': [batch_size, 8, 8, 1280],
                        'Mixed_7b': [batch_size, 8, 8, 2048],
                        'Mixed_7c': [batch_size, 8, 8, 2048]}
    self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
    for endpoint_name in endpoints_shapes:
      expected_shape = endpoints_shapes[endpoint_name]
      self.assertTrue(endpoint_name in end_points)
      self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
                           expected_shape)

  def testModelHasExpectedNumberOfParameters(self):
    batch_size = 5
    height, width = 299, 299
    inputs = tf.random_uniform((batch_size, height, width, 3))
    with slim.arg_scope(inception.inception_v3_arg_scope()):
      inception.inception_v3_base(inputs)
    total_params, _ = slim.model_analyzer.analyze_vars(
        slim.get_model_variables())
    self.assertAlmostEqual(21802784, total_params)

  def testBuildEndPoints(self):
    batch_size = 5
    height, width = 299, 299
    num_classes = 1000

    inputs = tf.random_uniform((batch_size, height, width, 3))
    _, end_points = inception.inception_v3(inputs, num_classes)
    self.assertTrue('Logits' in end_points)
    logits = end_points['Logits']
    self.assertListEqual(logits.get_shape().as_list(),
                         [batch_size, num_classes])
    self.assertTrue('AuxLogits' in end_points)
    aux_logits = end_points['AuxLogits']
    self.assertListEqual(aux_logits.get_shape().as_list(),
                         [batch_size, num_classes])
    self.assertTrue('Mixed_7c' in end_points)
    pre_pool = end_points['Mixed_7c']
    self.assertListEqual(pre_pool.get_shape().as_list(),
                         [batch_size, 8, 8, 2048])
    self.assertTrue('PreLogits' in end_points)
    pre_logits = end_points['PreLogits']
    self.assertListEqual(pre_logits.get_shape().as_list(),
                         [batch_size, 1, 1, 2048])

  def testBuildEndPointsWithDepthMultiplierLessThanOne(self):
    batch_size = 5
    height, width = 299, 299
    num_classes = 1000

    inputs = tf.random_uniform((batch_size, height, width, 3))
    _, end_points = inception.inception_v3(inputs, num_classes)

    endpoint_keys = [key for key in end_points.keys()
                     if key.startswith('Mixed') or key.startswith('Conv')]

    _, end_points_with_multiplier = inception.inception_v3(
        inputs, num_classes, scope='depth_multiplied_net',
        depth_multiplier=0.5)

    for key in endpoint_keys:
      original_depth = end_points[key].get_shape().as_list()[3]
      new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
      self.assertEqual(0.5 * original_depth, new_depth)

  def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self):
    batch_size = 5
    height, width = 299, 299
    num_classes = 1000

    inputs = tf.random_uniform((batch_size, height, width, 3))
    _, end_points = inception.inception_v3(inputs, num_classes)

    endpoint_keys = [key for key in end_points.keys()
                     if key.startswith('Mixed') or key.startswith('Conv')]

    _, end_points_with_multiplier = inception.inception_v3(
        inputs, num_classes, scope='depth_multiplied_net',
        depth_multiplier=2.0)

    for key in endpoint_keys:
      original_depth = end_points[key].get_shape().as_list()[3]
      new_depth = end_points_with_multiplier[key].get_shape().as_list()[3]
      self.assertEqual(2.0 * original_depth, new_depth)

  def testRaiseValueErrorWithInvalidDepthMultiplier(self):
    batch_size = 5
    height, width = 299, 299
    num_classes = 1000

    inputs = tf.random_uniform((batch_size, height, width, 3))
    with self.assertRaises(ValueError):
      _ = inception.inception_v3(inputs, num_classes, depth_multiplier=-0.1)
    with self.assertRaises(ValueError):
      _ = inception.inception_v3(inputs, num_classes, depth_multiplier=0.0)

  def testHalfSizeImages(self):
    batch_size = 5
    height, width = 150, 150
    num_classes = 1000

    inputs = tf.random_uniform((batch_size, height, width, 3))
    logits, end_points = inception.inception_v3(inputs, num_classes)
    self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [batch_size, num_classes])
    pre_pool = end_points['Mixed_7c']
    self.assertListEqual(pre_pool.get_shape().as_list(),
                         [batch_size, 3, 3, 2048])

  def testUnknownImageShape(self):
    tf.reset_default_graph()
    batch_size = 2
    height, width = 299, 299
    num_classes = 1000
    input_np = np.random.uniform(0, 1, (batch_size, height, width, 3))
    with self.test_session() as sess:
      inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3))
      logits, end_points = inception.inception_v3(inputs, num_classes)
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])
      pre_pool = end_points['Mixed_7c']
      feed_dict = {inputs: input_np}
      tf.global_variables_initializer().run()
      pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict)
      self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 8, 2048])

  def testUnknowBatchSize(self):
    batch_size = 1
    height, width = 299, 299
    num_classes = 1000

    inputs = tf.placeholder(tf.float32, (None, height, width, 3))
    logits, _ = inception.inception_v3(inputs, num_classes)
    self.assertTrue(logits.op.name.startswith('InceptionV3/Logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [None, num_classes])
    images = tf.random_uniform((batch_size, height, width, 3))

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(logits, {inputs: images.eval()})
      self.assertEquals(output.shape, (batch_size, num_classes))

  def testEvaluation(self):
    batch_size = 2
    height, width = 299, 299
    num_classes = 1000

    eval_inputs = tf.random_uniform((batch_size, height, width, 3))
    logits, _ = inception.inception_v3(eval_inputs, num_classes,
                                       is_training=False)
    predictions = tf.argmax(logits, 1)

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(predictions)
      self.assertEquals(output.shape, (batch_size,))

  def testTrainEvalWithReuse(self):
    train_batch_size = 5
    eval_batch_size = 2
    height, width = 150, 150
    num_classes = 1000

    train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
    inception.inception_v3(train_inputs, num_classes)
    eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
    logits, _ = inception.inception_v3(eval_inputs, num_classes,
                                       is_training=False, reuse=True)
    predictions = tf.argmax(logits, 1)

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(predictions)
      self.assertEquals(output.shape, (eval_batch_size,))

  def testLogitsNotSqueezed(self):
    num_classes = 25
    images = tf.random_uniform([1, 299, 299, 3])
    logits, _ = inception.inception_v3(images,
                                       num_classes=num_classes,
                                       spatial_squeeze=False)

    with self.test_session() as sess:
      tf.global_variables_initializer().run()
      logits_out = sess.run(logits)
      self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes])


if __name__ == '__main__':
  tf.test.main()


================================================
FILE: models/slim/nets/inception_v4.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the definition of the Inception V4 architecture.

As described in http://arxiv.org/abs/1602.07261.

  Inception-v4, Inception-ResNet and the Impact of Residual Connections
    on Learning
  Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from nets import inception_utils

slim = tf.contrib.slim


def block_inception_a(inputs, scope=None, reuse=None):
  """Builds Inception-A block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
    with tf.variable_scope(scope, 'BlockInceptionA', [inputs], reuse=reuse):
      with tf.variable_scope('Branch_0'):
        branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1')
      with tf.variable_scope('Branch_1'):
        branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
      with tf.variable_scope('Branch_2'):
        branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
        branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
        branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
      with tf.variable_scope('Branch_3'):
        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
        branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1')
      return tf.concat(3, [branch_0, branch_1, branch_2, branch_3])


def block_reduction_a(inputs, scope=None, reuse=None):
  """Builds Reduction-A block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
    with tf.variable_scope(scope, 'BlockReductionA', [inputs], reuse=reuse):
      with tf.variable_scope('Branch_0'):
        branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID',
                               scope='Conv2d_1a_3x3')
      with tf.variable_scope('Branch_1'):
        branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
        branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2,
                               padding='VALID', scope='Conv2d_1a_3x3')
      with tf.variable_scope('Branch_2'):
        branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
                                   scope='MaxPool_1a_3x3')
      return tf.concat(3, [branch_0, branch_1, branch_2])


def block_inception_b(inputs, scope=None, reuse=None):
  """Builds Inception-B block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
    with tf.variable_scope(scope, 'BlockInceptionB', [inputs], reuse=reuse):
      with tf.variable_scope('Branch_0'):
        branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
      with tf.variable_scope('Branch_1'):
        branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7')
        branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1')
      with tf.variable_scope('Branch_2'):
        branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
        branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1')
        branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7')
        branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1')
        branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7')
      with tf.variable_scope('Branch_3'):
        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
        branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
      return tf.concat(3, [branch_0, branch_1, branch_2, branch_3])


def block_reduction_b(inputs, scope=None, reuse=None):
  """Builds Reduction-B block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
    with tf.variable_scope(scope, 'BlockReductionB', [inputs], reuse=reuse):
      with tf.variable_scope('Branch_0'):
        branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
        branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2,
                               padding='VALID', scope='Conv2d_1a_3x3')
      with tf.variable_scope('Branch_1'):
        branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7')
        branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1')
        branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2,
                               padding='VALID', scope='Conv2d_1a_3x3')
      with tf.variable_scope('Branch_2'):
        branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
                                   scope='MaxPool_1a_3x3')
      return tf.concat(3, [branch_0, branch_1, branch_2])


def block_inception_c(inputs, scope=None, reuse=None):
  """Builds Inception-C block for Inception v4 network."""
  # By default use stride=1 and SAME padding
  with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
                      stride=1, padding='SAME'):
    with tf.variable_scope(scope, 'BlockInceptionC', [inputs], reuse=reuse):
      with tf.variable_scope('Branch_0'):
        branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
      with tf.variable_scope('Branch_1'):
        branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
        branch_1 = tf.concat(3, [
            slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'),
            slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')])
      with tf.variable_scope('Branch_2'):
        branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
        branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1')
        branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3')
        branch_2 = tf.concat(3, [
            slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'),
            slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')])
      with tf.variable_scope('Branch_3'):
        branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
        branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1')
      return tf.concat(3, [branch_0, branch_1, branch_2, branch_3])


def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):
  """Creates the Inception V4 network up to the given final endpoint.

  Args:
    inputs: a 4-D tensor of size [batch_size, height, width, 3].
    final_endpoint: specifies the endpoint to construct the network up to.
      It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
      'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
      'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e',
      'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c',
      'Mixed_7d']
    scope: Optional variable_scope.

  Returns:
    logits: the logits outputs of the model.
    end_points: the set of end_points from the inception model.

  Raises:
    ValueError: if final_endpoint is not set to one of the predefined values,
  """
  end_points = {}

  def add_and_check_final(name, net):
    end_points[name] = net
    return name == final_endpoint

  with tf.variable_scope(scope, 'InceptionV4', [inputs]):
    with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                        stride=1, padding='SAME'):
      # 299 x 299 x 3
      net = slim.conv2d(inputs, 32, [3, 3], stride=2,
                        padding='VALID', scope='Conv2d_1a_3x3')
      if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points
      # 149 x 149 x 32
      net = slim.conv2d(net, 32, [3, 3], padding='VALID',
                        scope='Conv2d_2a_3x3')
      if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points
      # 147 x 147 x 32
      net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3')
      if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points
      # 147 x 147 x 64
      with tf.variable_scope('Mixed_3a'):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
                                     scope='MaxPool_0a_3x3')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID',
                                 scope='Conv2d_0a_3x3')
        net = tf.concat(3, [branch_0, branch_1])
        if add_and_check_final('Mixed_3a', net): return net, end_points

      # 73 x 73 x 160
      with tf.variable_scope('Mixed_4a'):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
          branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID',
                                 scope='Conv2d_1a_3x3')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
          branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7')
          branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1')
          branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID',
                                 scope='Conv2d_1a_3x3')
        net = tf.concat(3, [branch_0, branch_1])
        if add_and_check_final('Mixed_4a', net): return net, end_points

      # 71 x 71 x 192
      with tf.variable_scope('Mixed_5a'):
        with tf.variable_scope('Branch_0'):
          branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID',
                                 scope='Conv2d_1a_3x3')
        with tf.variable_scope('Branch_1'):
          branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
                                     scope='MaxPool_1a_3x3')
        net = tf.concat(3, [branch_0, branch_1])
        if add_and_check_final('Mixed_5a', net): return net, end_points

      # 35 x 35 x 384
      # 4 x Inception-A blocks
      for idx in xrange(4):
        block_scope = 'Mixed_5' + chr(ord('b') + idx)
        net = block_inception_a(net, block_scope)
        if add_and_check_final(block_scope, net): return net, end_points

      # 35 x 35 x 384
      # Reduction-A block
      net = block_reduction_a(net, 'Mixed_6a')
      if add_and_check_final('Mixed_6a', net): return net, end_points

      # 17 x 17 x 1024
      # 7 x Inception-B blocks
      for idx in xrange(7):
        block_scope = 'Mixed_6' + chr(ord('b') + idx)
        net = block_inception_b(net, block_scope)
        if add_and_check_final(block_scope, net): return net, end_points

      # 17 x 17 x 1024
      # Reduction-B block
      net = block_reduction_b(net, 'Mixed_7a')
      if add_and_check_final('Mixed_7a', net): return net, end_points

      # 8 x 8 x 1536
      # 3 x Inception-C blocks
      for idx in xrange(3):
        block_scope = 'Mixed_7' + chr(ord('b') + idx)
        net = block_inception_c(net, block_scope)
        if add_and_check_final(block_scope, net): return net, end_points
  raise ValueError('Unknown final endpoint %s' % final_endpoint)


def inception_v4(inputs, num_classes=1001, is_training=True,
                 dropout_keep_prob=0.8,
                 reuse=None,
                 scope='InceptionV4',
                 create_aux_logits=True):
  """Creates the Inception V4 model.

  Args:
    inputs: a 4-D tensor of size [batch_size, height, width, 3].
    num_classes: number of predicted classes.
    is_training: whether is training or not.
    dropout_keep_prob: float, the fraction to keep before final layer.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    create_aux_logits: Whether to include the auxilliary logits.

  Returns:
    logits: the logits outputs of the model.
    end_points: the set of end_points from the inception model.
  """
  end_points = {}
  with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope:
    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
      net, end_points = inception_v4_base(inputs, scope=scope)

      with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                          stride=1, padding='SAME'):
        # Auxiliary Head logits
        if create_aux_logits:
          with tf.variable_scope('AuxLogits'):
            # 17 x 17 x 1024
            aux_logits = end_points['Mixed_6h']
            aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3,
                                         padding='VALID',
                                         scope='AvgPool_1a_5x5')
            aux_logits = slim.conv2d(aux_logits, 128, [1, 1],
                                     scope='Conv2d_1b_1x1')
            aux_logits = slim.conv2d(aux_logits, 768,
                                     aux_logits.get_shape()[1:3],
                                     padding='VALID', scope='Conv2d_2a')
            aux_logits = slim.flatten(aux_logits)
            aux_logits = slim.fully_connected(aux_logits, num_classes,
                                              activation_fn=None,
                                              scope='Aux_logits')
            end_points['AuxLogits'] = aux_logits

        # Final pooling and prediction
        with tf.variable_scope('Logits'):
          # 8 x 8 x 1536
          net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
                                scope='AvgPool_1a')
          # 1 x 1 x 1536
          net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b')
          net = slim.flatten(net, scope='PreLogitsFlatten')
          end_points['PreLogitsFlatten'] = net
          # 1536
          logits = slim.fully_connected(net, num_classes, activation_fn=None,
                                        scope='Logits')
          end_points['Logits'] = logits
          end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
    return logits, end_points
inception_v4.default_image_size = 299


inception_v4_arg_scope = inception_utils.inception_arg_scope


================================================
FILE: models/slim/nets/inception_v4_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.inception_v4."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from nets import inception


class InceptionTest(tf.test.TestCase):

  def testBuildLogits(self):
    batch_size = 5
    height, width = 299, 299
    num_classes = 1000
    inputs = tf.random_uniform((batch_size, height, width, 3))
    logits, end_points = inception.inception_v4(inputs, num_classes)
    auxlogits = end_points['AuxLogits']
    predictions = end_points['Predictions']
    self.assertTrue(auxlogits.op.name.startswith('InceptionV4/AuxLogits'))
    self.assertListEqual(auxlogits.get_shape().as_list(),
                         [batch_size, num_classes])
    self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [batch_size, num_classes])
    self.assertTrue(predictions.op.name.startswith(
        'InceptionV4/Logits/Predictions'))
    self.assertListEqual(predictions.get_shape().as_list(),
                         [batch_size, num_classes])

  def testBuildWithoutAuxLogits(self):
    batch_size = 5
    height, width = 299, 299
    num_classes = 1000
    inputs = tf.random_uniform((batch_size, height, width, 3))
    logits, endpoints = inception.inception_v4(inputs, num_classes,
                                               create_aux_logits=False)
    self.assertFalse('AuxLogits' in endpoints)
    self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [batch_size, num_classes])

  def testAllEndPointsShapes(self):
    batch_size = 5
    height, width = 299, 299
    num_classes = 1000
    inputs = tf.random_uniform((batch_size, height, width, 3))
    _, end_points = inception.inception_v4(inputs, num_classes)
    endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32],
                        'Conv2d_2a_3x3': [batch_size, 147, 147, 32],
                        'Conv2d_2b_3x3': [batch_size, 147, 147, 64],
                        'Mixed_3a': [batch_size, 73, 73, 160],
                        'Mixed_4a': [batch_size, 71, 71, 192],
                        'Mixed_5a': [batch_size, 35, 35, 384],
                        # 4 x Inception-A blocks
                        'Mixed_5b': [batch_size, 35, 35, 384],
                        'Mixed_5c': [batch_size, 35, 35, 384],
                        'Mixed_5d': [batch_size, 35, 35, 384],
                        'Mixed_5e': [batch_size, 35, 35, 384],
                        # Reduction-A block
                        'Mixed_6a': [batch_size, 17, 17, 1024],
                        # 7 x Inception-B blocks
                        'Mixed_6b': [batch_size, 17, 17, 1024],
                        'Mixed_6c': [batch_size, 17, 17, 1024],
                        'Mixed_6d': [batch_size, 17, 17, 1024],
                        'Mixed_6e': [batch_size, 17, 17, 1024],
                        'Mixed_6f': [batch_size, 17, 17, 1024],
                        'Mixed_6g': [batch_size, 17, 17, 1024],
                        'Mixed_6h': [batch_size, 17, 17, 1024],
                        # Reduction-A block
                        'Mixed_7a': [batch_size, 8, 8, 1536],
                        # 3 x Inception-C blocks
                        'Mixed_7b': [batch_size, 8, 8, 1536],
                        'Mixed_7c': [batch_size, 8, 8, 1536],
                        'Mixed_7d': [batch_size, 8, 8, 1536],
                        # Logits and predictions
                        'AuxLogits': [batch_size, num_classes],
                        'PreLogitsFlatten': [batch_size, 1536],
                        'Logits': [batch_size, num_classes],
                        'Predictions': [batch_size, num_classes]}
    self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys())
    for endpoint_name in endpoints_shapes:
      expected_shape = endpoints_shapes[endpoint_name]
      self.assertTrue(endpoint_name in end_points)
      self.assertListEqual(end_points[endpoint_name].get_shape().as_list(),
                           expected_shape)

  def testBuildBaseNetwork(self):
    batch_size = 5
    height, width = 299, 299
    inputs = tf.random_uniform((batch_size, height, width, 3))
    net, end_points = inception.inception_v4_base(inputs)
    self.assertTrue(net.op.name.startswith(
        'InceptionV4/Mixed_7d'))
    self.assertListEqual(net.get_shape().as_list(), [batch_size, 8, 8, 1536])
    expected_endpoints = [
        'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a',
        'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
        'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
        'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a',
        'Mixed_7b', 'Mixed_7c', 'Mixed_7d']
    self.assertItemsEqual(end_points.keys(), expected_endpoints)
    for name, op in end_points.iteritems():
      self.assertTrue(op.name.startswith('InceptionV4/' + name))

  def testBuildOnlyUpToFinalEndpoint(self):
    batch_size = 5
    height, width = 299, 299
    all_endpoints = [
        'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a',
        'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
        'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d',
        'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a',
        'Mixed_7b', 'Mixed_7c', 'Mixed_7d']
    for index, endpoint in enumerate(all_endpoints):
      with tf.Graph().as_default():
        inputs = tf.random_uniform((batch_size, height, width, 3))
        out_tensor, end_points = inception.inception_v4_base(
            inputs, final_endpoint=endpoint)
        self.assertTrue(out_tensor.op.name.startswith(
            'InceptionV4/' + endpoint))
        self.assertItemsEqual(all_endpoints[:index+1], end_points)

  def testVariablesSetDevice(self):
    batch_size = 5
    height, width = 299, 299
    num_classes = 1000
    inputs = tf.random_uniform((batch_size, height, width, 3))
    # Force all Variables to reside on the device.
    with tf.variable_scope('on_cpu'), tf.device('/cpu:0'):
      inception.inception_v4(inputs, num_classes)
    with tf.variable_scope('on_gpu'), tf.device('/gpu:0'):
      inception.inception_v4(inputs, num_classes)
    for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_cpu'):
      self.assertDeviceEqual(v.device, '/cpu:0')
    for v in tf.get_collection(tf.GraphKeys.VARIABLES, scope='on_gpu'):
      self.assertDeviceEqual(v.device, '/gpu:0')

  def testHalfSizeImages(self):
    batch_size = 5
    height, width = 150, 150
    num_classes = 1000
    inputs = tf.random_uniform((batch_size, height, width, 3))
    logits, end_points = inception.inception_v4(inputs, num_classes)
    self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [batch_size, num_classes])
    pre_pool = end_points['Mixed_7d']
    self.assertListEqual(pre_pool.get_shape().as_list(),
                         [batch_size, 3, 3, 1536])

  def testUnknownBatchSize(self):
    batch_size = 1
    height, width = 299, 299
    num_classes = 1000
    with self.test_session() as sess:
      inputs = tf.placeholder(tf.float32, (None, height, width, 3))
      logits, _ = inception.inception_v4(inputs, num_classes)
      self.assertTrue(logits.op.name.startswith('InceptionV4/Logits'))
      self.assertListEqual(logits.get_shape().as_list(),
                           [None, num_classes])
      images = tf.random_uniform((batch_size, height, width, 3))
      sess.run(tf.global_variables_initializer())
      output = sess.run(logits, {inputs: images.eval()})
      self.assertEquals(output.shape, (batch_size, num_classes))

  def testEvaluation(self):
    batch_size = 2
    height, width = 299, 299
    num_classes = 1000
    with self.test_session() as sess:
      eval_inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = inception.inception_v4(eval_inputs,
                                         num_classes,
                                         is_training=False)
      predictions = tf.argmax(logits, 1)
      sess.run(tf.global_variables_initializer())
      output = sess.run(predictions)
      self.assertEquals(output.shape, (batch_size,))

  def testTrainEvalWithReuse(self):
    train_batch_size = 5
    eval_batch_size = 2
    height, width = 150, 150
    num_classes = 1000
    with self.test_session() as sess:
      train_inputs = tf.random_uniform((train_batch_size, height, width, 3))
      inception.inception_v4(train_inputs, num_classes)
      eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3))
      logits, _ = inception.inception_v4(eval_inputs,
                                         num_classes,
                                         is_training=False,
                                         reuse=True)
      predictions = tf.argmax(logits, 1)
      sess.run(tf.global_variables_initializer())
      output = sess.run(predictions)
      self.assertEquals(output.shape, (eval_batch_size,))


if __name__ == '__main__':
  tf.test.main()


================================================
FILE: models/slim/nets/lenet.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains a variant of the LeNet model definition."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim


def lenet(images, num_classes=10, is_training=False,
          dropout_keep_prob=0.5,
          prediction_fn=slim.softmax,
          scope='LeNet'):
  """Creates a variant of the LeNet model.

  Note that since the output is a set of 'logits', the values fall in the
  interval of (-infinity, infinity). Consequently, to convert the outputs to a
  probability distribution over the characters, one will need to convert them
  using the softmax function:

        logits = lenet.lenet(images, is_training=False)
        probabilities = tf.nn.softmax(logits)
        predictions = tf.argmax(logits, 1)

  Args:
    images: A batch of `Tensors` of size [batch_size, height, width, channels].
    num_classes: the number of classes in the dataset.
    is_training: specifies whether or not we're currently training the model.
      This variable will determine the behaviour of the dropout layer.
    dropout_keep_prob: the percentage of activation values that are retained.
    prediction_fn: a function to get predictions out of logits.
    scope: Optional variable_scope.

  Returns:
    logits: the pre-softmax activations, a tensor of size
      [batch_size, `num_classes`]
    end_points: a dictionary from components of the network to the corresponding
      activation.
  """
  end_points = {}

  with tf.variable_scope(scope, 'LeNet', [images, num_classes]):
    net = slim.conv2d(images, 32, [5, 5], scope='conv1')
    net = slim.max_pool2d(net, [2, 2], 2, scope='pool1')
    net = slim.conv2d(net, 64, [5, 5], scope='conv2')
    net = slim.max_pool2d(net, [2, 2], 2, scope='pool2')
    net = slim.flatten(net)
    end_points['Flatten'] = net

    net = slim.fully_connected(net, 1024, scope='fc3')
    net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                       scope='dropout3')
    logits = slim.fully_connected(net, num_classes, activation_fn=None,
                                  scope='fc4')

  end_points['Logits'] = logits
  end_points['Predictions'] = prediction_fn(logits, scope='Predictions')

  return logits, end_points
lenet.default_image_size = 28


def lenet_arg_scope(weight_decay=0.0):
  """Defines the default lenet argument scope.

  Args:
    weight_decay: The weight decay to use for regularizing the model.

  Returns:
    An `arg_scope` to use for the inception v3 model.
  """
  with slim.arg_scope(
      [slim.conv2d, slim.fully_connected],
      weights_regularizer=slim.l2_regularizer(weight_decay),
      weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
      activation_fn=tf.nn.relu) as sc:
    return sc


================================================
FILE: models/slim/nets/nets_factory.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains a factory for building various models."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools

import tensorflow as tf
import numpy as np
import sys

from nets import alexnet
from nets import cifarnet
from nets import inception
from nets import lenet
from nets import overfeat
from nets import resnet_v1
from nets import resnet_v2
from nets import vgg

sys.path.append('libs/tensorflow_compact_bilinear_pooling/')
from compact_bilinear_pooling import compact_bilinear_pooling_layer

slim = tf.contrib.slim

networks_map = {'alexnet_v2': alexnet.alexnet_v2,
                'cifarnet': cifarnet.cifarnet,
                'overfeat': overfeat.overfeat,
                'vgg_a': vgg.vgg_a,
                'vgg_16': vgg.vgg_16,
                'vgg_19': vgg.vgg_19,
                'inception_v1': inception.inception_v1,
                'inception_v2': inception.inception_v2,
                'inception_v2_tsn': inception.inception_v2_tsn,
                'inception_v3': inception.inception_v3,
                'inception_v4': inception.inception_v4,
                'inception_resnet_v2': inception.inception_resnet_v2,
                'lenet': lenet.lenet,
                'resnet_v1_50': resnet_v1.resnet_v1_50,
                'resnet_v1_101': resnet_v1.resnet_v1_101,
                'resnet_v1_152': resnet_v1.resnet_v1_152,
                'resnet_v1_200': resnet_v1.resnet_v1_200,
                'resnet_v2_50': resnet_v2.resnet_v2_50,
                'resnet_v2_101': resnet_v2.resnet_v2_101,
                'resnet_v2_152': resnet_v2.resnet_v2_152,
                'resnet_v2_200': resnet_v2.resnet_v2_200,
               }

last_conv_map = {'inception_v3': 'Mixed_7c',
                 'inception_v2_tsn': 'InceptionV2_TSN/inception_5b',
                 'resnet_v1_101': 'resnet_v1_101/block4',
                 'vgg_16': 'vgg_16/conv5',
                }

arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope,
                  'cifarnet': cifarnet.cifarnet_arg_scope,
                  'overfeat': overfeat.overfeat_arg_scope,
                  'vgg_a': vgg.vgg_arg_scope,
                  'vgg_16': vgg.vgg_arg_scope,
                  'vgg_19': vgg.vgg_arg_scope,
                  'inception_v1': inception.inception_v3_arg_scope,
                  'inception_v2': inception.inception_v3_arg_scope,
                  'inception_v2_tsn': inception.inception_v2_tsn_arg_scope,
                  'inception_v3': inception.inception_v3_arg_scope,
                  'inception_v4': inception.inception_v4_arg_scope,
                  'inception_resnet_v2':
                  inception.inception_resnet_v2_arg_scope,
                  'lenet': lenet.lenet_arg_scope,
                  'resnet_v1_50': resnet_v1.resnet_arg_scope,
                  'resnet_v1_101': resnet_v1.resnet_arg_scope,
                  'resnet_v1_152': resnet_v1.resnet_arg_scope,
                  'resnet_v1_200': resnet_v1.resnet_arg_scope,
                  'resnet_v2_50': resnet_v2.resnet_arg_scope,
                  'resnet_v2_101': resnet_v2.resnet_arg_scope,
                  'resnet_v2_152': resnet_v2.resnet_arg_scope,
                  'resnet_v2_200': resnet_v2.resnet_arg_scope,
                 }


def get_network_fn(name, num_classes, num_pose_keypoints, cfg,
                   weight_decay=0.0, is_training=False):
  """Returns a network_fn such as `logits, end_points = network_fn(images)`.

  Args:
    name: The name of the network.
    num_classes: The number of classes to use for classification.
    num_pose_keypoints: The number of channels to output for pose.
    weight_decay: The l2 coefficient for the model weights.
    is_training: `True` if the model is being used for training and `False`
      otherwise.

  Returns:
    network_fn: A function that applies the model to a batch of images. It has
      the following signature:
        logits, end_points = network_fn(images)
  Raises:
    ValueError: If network `name` is not recognized.
  """
  if name not in networks_map:
    raise ValueError('Name of network unknown %s' % name)
  arg_scope = arg_scopes_map[name](weight_decay=weight_decay)
  func = networks_map[name]
  @functools.wraps(func)
  def network_fn(images):
    with slim.arg_scope(arg_scope):
      frames_per_video = 1  # same for single image datasets
      if images.get_shape().ndims == 5:
        im_shape = images.get_shape().as_list()
        frames_per_video = im_shape[1]
        images = tf.reshape(
          images, [-1, im_shape[-3], im_shape[-2], im_shape[-1]])

      # Main Network Function
      kwargs = {}
      if cfg.NET.DROPOUT >= 0:  # if -1, then just ignore it and use nw def.
        kwargs['dropout_keep_prob'] = (1-cfg.NET.DROPOUT)
      logits, end_points = func(images, num_classes, is_training=is_training,
                                train_top_bn=cfg.NET.TRAIN_TOP_BN,
                                **kwargs)

      # rgirdhar: add another end point for heatmap prediction
      try:
        last_conv = end_points[last_conv_map[name]]
      except:
        raise ValueError('End point {} not found. Choose from: {}'.format(
          last_conv_map[name], ' '.join(end_points)))
      random_normal = lambda stddev: tf.random_normal_initializer(0.0, stddev)

      with slim.arg_scope([slim.dropout],
                          is_training=is_training,
                          keep_prob=0.2 if cfg.NET.DROPOUT < 0
                                        else (1.0-cfg.NET.DROPOUT)):
        with tf.variable_scope('PoseLogits'):
          last_conv_pose_name = getattr(
            cfg.NET.LAST_CONV_MAP_FOR_POSE, name)
          last_conv_pose = end_points[last_conv_pose_name]
          pose_pre_logits = slim.conv2d(
            last_conv_pose, 768, [1, 1],
            weights_initializer=random_normal(0.001),
            activation_fn=tf.nn.relu,
            normalizer_fn=None,
            biases_initializer=tf.zeros_initializer(),
            padding='SAME', scope='ExtraConv2d_1x1')
          pose_logits = slim.conv2d(pose_pre_logits, num_pose_keypoints, [1, 1], activation_fn=None,
                                    normalizer_fn=None, scope='Conv2d_1c_1x1')
          end_points['PoseLogits'] = pose_logits

        if cfg.NET.USE_POSE_ATTENTION_LOGITS:
          with tf.variable_scope('PoseAttention'):
            # use the pose prediction as an attention map to get the features
            # step1: split pose logits over channels
            pose_logits_parts = tf.split(
              pose_logits, pose_logits.get_shape().as_list()[-1],
              axis=pose_logits.get_shape().ndims-1)
            part_logits = []
            # allows to choose which dimension of pose to use for heatmaps
            parts_to_use = pose_logits_parts
            if cfg.NET.USE_POSE_ATTENTION_LOGITS_DIMS != [-1]:
              parts_to_use = (np.array(pose_logits_parts)[
                cfg.NET.USE_POSE_ATTENTION_LOGITS_DIMS]).tolist()
            tf.logging.info('Using {} parts for pose attention logits'.format(
              len(parts_to_use)))
            for part in parts_to_use:
              part_logits.append(tf.reduce_mean(part * last_conv, axis=[1,2],
                                                keep_dims=True))
            if cfg.NET.USE_POSE_ATTENTION_LOGITS_AVGED_HMAP:
              part_logits.append(tf.reduce_mean(
                last_conv * tf.reduce_mean(pose_logits, axis=-1, keep_dims=True),
                axis=[1,2], keep_dims=True))
            part_logits.append(tf.reduce_mean(last_conv, axis=[1,2],
                                              keep_dims=True))
            net = tf.concat(part_logits, axis=-1)
            net = slim.dropout(net)
            logits = slim.conv2d(net, num_classes, [1, 1],
                                 weights_initializer=random_normal(0.001),
                                 biases_initializer=tf.zeros_initializer(),
                                 activation_fn=None,
                                 normalizer_fn=None)
        elif cfg.NET.USE_POSE_LOGITS_DIRECTLY:
          with tf.variable_scope('ActionFromPose'):
            net = tf.reduce_mean(
              pose_pre_logits, axis=[1, 2], keep_dims=True)
            net = slim.conv2d(net, 768, [1, 1],
                              normalizer_fn=None,
                              weights_initializer=random_normal(0.001),
                              biases_initializer=tf.zeros_initializer())
            if cfg.NET.USE_POSE_LOGITS_DIRECTLY_PLUS_LOGITS:
              net = tf.concat([
                net, tf.reduce_mean(last_conv, axis=[1, 2], keep_dims=True)],
                axis=-1)
            net = slim.dropout(net)
            logits = slim.conv2d(net, num_classes, [1, 1],
                                 weights_initializer=random_normal(0.001),
                                 biases_initializer=tf.zeros_initializer(),
                                 activation_fn=None,
                                 normalizer_fn=None)
        elif cfg.NET.USE_POSE_LOGITS_DIRECTLY_v2:
          with tf.variable_scope('ActionFromPose_v2'):
            net = tf.concat([
              pose_pre_logits,
              last_conv],
              axis=-1)
            if cfg.NET.USE_POSE_LOGITS_DIRECTLY_v2_EXTRA_LAYER:
              net = tf.nn.relu(net)
              net = slim.conv2d(net, net.get_shape().as_list()[-1], [1, 1],
                                weights_initializer=random_normal(0.001),
                                biases_initializer=tf.zeros_initializer())
            net = tf.reduce_mean(net, axis=[1, 2], keep_dims=True)
            net = slim.dropout(net)
            logits = slim.conv2d(net, num_classes, [1, 1],
                                 weights_initializer=random_normal(0.001),
                                 biases_initializer=tf.zeros_initializer(),
                                 activation_fn=None,
                                 normalizer_fn=None)
        elif cfg.NET.USE_COMPACT_BILINEAR_POOLING:
          last_conv_shape = last_conv.get_shape().as_list()
          net = compact_bilinear_pooling_layer(
            last_conv, last_conv, last_conv_shape[-1])
          net.set_shape([last_conv_shape[0], last_conv_shape[-1]])
          net = tf.expand_dims(tf.expand_dims(
            net, 1), 1)
          net = slim.dropout(net)
          logits = slim.conv2d(net, num_classes, [1, 1],
                               weights_initializer=random_normal(0.001),
                               biases_initializer=tf.zeros_initializer(),
                               activation_fn=None,
                               normalizer_fn=None)
        elif cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION:
          with tf.variable_scope('PosePrelogitsBasedAttention'):
            # If the following is set, just train on top of image features,
            # don't add the prelogits at all. This was useful as pose seemed to
            # not help with it at all.
            if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_SINGLE_LAYER_ATT:
              net = last_conv
            else:
              net = pose_pre_logits
            # nMaps = num_classes if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_PER_CLASS else 1
            # For simplicity, since multiple maps doesn't seem to help, I'm
            # not allowing that to keep the following code simple.
            # nMaps = 1
            # For NIPS2017 rebuttal, they wanted to see nums with per-class
            # attention, so doing that too
            nMaps = num_classes if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_PER_CLASS else 1
            all_att_logits = []
            for rank_id in range(cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_RANK):
              scope_name = 'Conv2d_PrePose_Attn'
              if rank_id >= 1:
                scope_name += str(rank_id)
              net = slim.conv2d(net, nMaps,
                                [1, 1],
                                weights_initializer=random_normal(0.001),
                                biases_initializer=tf.zeros_initializer(),
                                activation_fn=None,
                                normalizer_fn=None,
                                scope=scope_name)
              all_att_logits.append(net)
            if len(all_att_logits) > 1:
              attention_logits = tf.stack(all_att_logits, axis=-1)
            else:
              attention_logits = all_att_logits[0]

            if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_SOFTMAX_ATT:
              # bring the number of channels earlier to make softmax easier
              attention_logits = tf.transpose(attention_logits, [0, 3, 1, 2])
              att_shape = attention_logits.get_shape().as_list()
              attention_logits = tf.reshape(
                attention_logits, [att_shape[0], att_shape[1], -1])
              attention_logits = tf.nn.softmax(attention_logits)
              attention_logits = tf.reshape(attention_logits, att_shape)
              attention_logits = tf.transpose(attention_logits, [0, 2, 3, 1])
            if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_RELU_ATT:
              attention_logits = tf.nn.relu(attention_logits)
            end_points['PosePrelogitsBasedAttention'] = attention_logits

            if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_WITH_POSE_FEAT:
              if cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_WITH_POSE_FEAT_2LAYER:
                pose_logits = slim.conv2d(
                  pose_logits, pose_logits.get_shape()[-1],
                  [1, 1], weights_initializer=random_normal(0.001),
                  biases_initializer=tf.zeros_initializer())
              last_conv = tf.concat([last_conv, pose_logits], axis=-1)
            last_conv = slim.dropout(last_conv)
            # Top-down attention
            all_logits = []
            for _ in range(cfg.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_RANK):
              logits = slim.conv2d(last_conv, num_classes, [1, 1],
                                   weights_initializer=random_normal(0.001),
                                   biases_initializer=tf.zeros_initializer(),
                                   activation_fn=None, normalizer_fn=None)
              all_logits.append(logits)
            if len(all_logits) > 1:
              logits = tf.stack(all_logits, axis=-1)
            else:
              logits = all_logits[0]
            end_points['TopDownAttention'] = logits

            # attended_feats = []
            # for attention_logit in tf.unstack(attention_logits, axis=-1):
            #   attended_feats.append(tf.reduce_mean(
            #     tf.expand_dims(attention_logit, axis=-1) * logits,
            #     axis=[1,2],
            #     keep_dims=True))
            # attended_feat = tf.stack(attended_feats, axis=-1)
            # # Since only 1 attention map (asserted above)
            # logits = attended_feat[..., 0]

            # better way to do the above:
            logits = tf.reduce_mean(
              attention_logits * logits,
              axis=[1, 2],
              keep_dims=True)
            if logits.get_shape().ndims == 5:
              # i.e. rank was > 1
              logits = tf.reduce_sum(logits, axis=-1)

            # if nMaps == 1:
            #   # remove the extra dimension that is added for multi-class
            #   # attention case
            #   attended_feat = attended_feat[..., 0]
            #   logits = slim.conv2d(attended_feat, num_classes, [1, 1],
            #                        weights_initializer=random_normal(0.001),
            #                        biases_initializer=tf.zeros_initializer(),
            #                        activation_fn=None,
            #                        normalizer_fn=None)
            # else:
            #   logits = tf.concat([
            #     slim.conv2d(el, 1, [1, 1],
            #                 weights_initializer=random_normal(0.001),
            #                 biases_initializer=tf.zeros_initializer(),
            #                 activation_fn=None,
            #                 normalizer_fn=None) for el in
            #     tf.unstack(attended_feat, axis=-1)], axis=-1)
        # This is just to protect against the case where I don't do any of the
        # above and get the original logits from the network, which has already
        # been squeezed, or in case of vgg 16, passed through fc layers
        if logits.get_shape().ndims > 2:
          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
        end_points['Logits'] = logits

      if frames_per_video > 1:
        with tf.name_scope('FramePooling'):
          # for now stick with avg pool
          end_points['logits_beforePool'] = logits
          old_logits = logits
          logits = tf.stack([el for el in tf.split(
            old_logits, int(old_logits.get_shape().as_list()[0] /
                            frames_per_video))])
          if cfg.NET.USE_TEMPORAL_ATT:
            with tf.variable_scope('TemporalAttention'):
              logits = tf.expand_dims(logits, axis=-2)  #[bs, 3, 1, nc]
              logits_att = slim.conv2d(
                logits, 1, [1, 1],
                weights_initializer=random_normal(0.001),
                biases_initializer=tf.constant_initializer(
                  1.0 / logits.get_shape().as_list()[1]),
                activation_fn=None, normalizer_fn=None)
              logits = logits * logits_att
              logits = tf.squeeze(logits, axis=-2)
              end_points['TemporalAttention'] = logits_att
          logits = tf.reduce_mean(logits, axis=1)
      return logits, end_points

  if hasattr(func, 'default_image_size'):
    network_fn.default_image_size = func.default_image_size

  return network_fn


================================================
FILE: models/slim/nets/nets_factory_test.py
================================================
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Tests for slim.inception."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import tensorflow as tf

from nets import nets_factory


class NetworksTest(tf.test.TestCase):

  def testGetNetworkFn(self):
    batch_size = 5
    num_classes = 1000
    for net in nets_factory.networks_map:
      with self.test_session():
        net_fn = nets_factory.get_network_fn(net, num_classes)
        # Most networks use 224 as their default_image_size
        image_size = getattr(net_fn, 'default_image_size', 224)
        inputs = tf.random_uniform((batch_size, image_size, image_size, 3))
        logits, end_points = net_fn(inputs)
        self.assertTrue(isinstance(logits, tf.Tensor))
        self.assertTrue(isinstance(end_points, dict))
        self.assertEqual(logits.get_shape().as_list()[0], batch_size)
        self.assertEqual(logits.get_shape().as_list()[-1], num_classes)

if __name__ == '__main__':
  tf.test.main()


================================================
FILE: models/slim/nets/overfeat.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains the model definition for the OverFeat network.

The definition for the network was obtained from:
  OverFeat: Integrated Recognition, Localization and Detection using
  Convolutional Networks
  Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
  Yann LeCun, 2014
  http://arxiv.org/abs/1312.6229

Usage:
  with slim.arg_scope(overfeat.overfeat_arg_scope()):
    outputs, end_points = overfeat.overfeat(inputs)

@@overfeat
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)


def overfeat_arg_scope(weight_decay=0.0005):
  with slim.arg_scope([slim.conv2d, slim.fully_connected],
                      activation_fn=tf.nn.relu,
                      weights_regularizer=slim.l2_regularizer(weight_decay),
                      biases_initializer=tf.zeros_initializer):
    with slim.arg_scope([slim.conv2d], padding='SAME'):
      with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
        return arg_sc


def overfeat(inputs,
             num_classes=1000,
             is_training=True,
             dropout_keep_prob=0.5,
             spatial_squeeze=True,
             scope='overfeat'):
  """Contains the model definition for the OverFeat network.

  The definition for the network was obtained from:
    OverFeat: Integrated Recognition, Localization and Detection using
    Convolutional Networks
    Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and
    Yann LeCun, 2014
    http://arxiv.org/abs/1312.6229

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 231x231. To use in fully
        convolutional mode, set spatial_squeeze to false.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.

  """
  with tf.variable_scope(scope, 'overfeat', [inputs]) as sc:
    end_points_collection = sc.name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d
    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
                        scope='conv1')
      net = slim.max_pool2d(net, [2, 2], scope='pool1')
      net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2')
      net = slim.max_pool2d(net, [2, 2], scope='pool2')
      net = slim.conv2d(net, 512, [3, 3], scope='conv3')
      net = slim.conv2d(net, 1024, [3, 3], scope='conv4')
      net = slim.conv2d(net, 1024, [3, 3], scope='conv5')
      net = slim.max_pool2d(net, [2, 2], scope='pool5')
      with slim.arg_scope([slim.conv2d],
                          weights_initializer=trunc_normal(0.005),
                          biases_initializer=tf.constant_initializer(0.1)):
        # Use conv2d instead of fully_connected layers.
        net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6')
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                           scope='dropout6')
        net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                           scope='dropout7')
        net = slim.conv2d(net, num_classes, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          biases_initializer=tf.zeros_initializer,
                          scope='fc8')
      # Convert end_points_collection into a end_point dict.
      end_points = slim.utils.convert_collection_to_dict(end_points_collection)
      if spatial_squeeze:
        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
        end_points[sc.name + '/fc8'] = net
      return net, end_points
overfeat.default_image_size = 231


================================================
FILE: models/slim/nets/overfeat_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.nets.overfeat."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from nets import overfeat

slim = tf.contrib.slim


class OverFeatTest(tf.test.TestCase):

  def testBuild(self):
    batch_size = 5
    height, width = 231, 231
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = overfeat.overfeat(inputs, num_classes)
      self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed')
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])

  def testFullyConvolutional(self):
    batch_size = 1
    height, width = 281, 281
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False)
      self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd')
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, 2, 2, num_classes])

  def testEndPoints(self):
    batch_size = 5
    height, width = 231, 231
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      _, end_points = overfeat.overfeat(inputs, num_classes)
      expected_names = ['overfeat/conv1',
                        'overfeat/pool1',
                        'overfeat/conv2',
                        'overfeat/pool2',
                        'overfeat/conv3',
                        'overfeat/conv4',
                        'overfeat/conv5',
                        'overfeat/pool5',
                        'overfeat/fc6',
                        'overfeat/fc7',
                        'overfeat/fc8'
                       ]
      self.assertSetEqual(set(end_points.keys()), set(expected_names))

  def testModelVariables(self):
    batch_size = 5
    height, width = 231, 231
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      overfeat.overfeat(inputs, num_classes)
      expected_names = ['overfeat/conv1/weights',
                        'overfeat/conv1/biases',
                        'overfeat/conv2/weights',
                        'overfeat/conv2/biases',
                        'overfeat/conv3/weights',
                        'overfeat/conv3/biases',
                        'overfeat/conv4/weights',
                        'overfeat/conv4/biases',
                        'overfeat/conv5/weights',
                        'overfeat/conv5/biases',
                        'overfeat/fc6/weights',
                        'overfeat/fc6/biases',
                        'overfeat/fc7/weights',
                        'overfeat/fc7/biases',
                        'overfeat/fc8/weights',
                        'overfeat/fc8/biases',
                       ]
      model_variables = [v.op.name for v in slim.get_model_variables()]
      self.assertSetEqual(set(model_variables), set(expected_names))

  def testEvaluation(self):
    batch_size = 2
    height, width = 231, 231
    num_classes = 1000
    with self.test_session():
      eval_inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = overfeat.overfeat(eval_inputs, is_training=False)
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])
      predictions = tf.argmax(logits, 1)
      self.assertListEqual(predictions.get_shape().as_list(), [batch_size])

  def testTrainEvalWithReuse(self):
    train_batch_size = 2
    eval_batch_size = 1
    train_height, train_width = 231, 231
    eval_height, eval_width = 281, 281
    num_classes = 1000
    with self.test_session():
      train_inputs = tf.random_uniform(
          (train_batch_size, train_height, train_width, 3))
      logits, _ = overfeat.overfeat(train_inputs)
      self.assertListEqual(logits.get_shape().as_list(),
                           [train_batch_size, num_classes])
      tf.get_variable_scope().reuse_variables()
      eval_inputs = tf.random_uniform(
          (eval_batch_size, eval_height, eval_width, 3))
      logits, _ = overfeat.overfeat(eval_inputs, is_training=False,
                                    spatial_squeeze=False)
      self.assertListEqual(logits.get_shape().as_list(),
                           [eval_batch_size, 2, 2, num_classes])
      logits = tf.reduce_mean(logits, [1, 2])
      predictions = tf.argmax(logits, 1)
      self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])

  def testForward(self):
    batch_size = 1
    height, width = 231, 231
    with self.test_session() as sess:
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = overfeat.overfeat(inputs)
      sess.run(tf.global_variables_initializer())
      output = sess.run(logits)
      self.assertTrue(output.any())

if __name__ == '__main__':
  tf.test.main()


================================================
FILE: models/slim/nets/resnet_utils.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains building blocks for various versions of Residual Networks.

Residual networks (ResNets) were proposed in:
  Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015

More variants were introduced in:
  Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016

We can obtain different ResNet variants by changing the network depth, width,
and form of residual unit. This module implements the infrastructure for
building them. Concrete ResNet units and full ResNet networks are implemented in
the accompanying resnet_v1.py and resnet_v2.py modules.

Compared to https://github.com/KaimingHe/deep-residual-networks, in the current
implementation we subsample the output activations in the last residual unit of
each block, instead of subsampling the input activations in the first residual
unit of each block. The two implementations give identical results but our
implementation is more memory efficient.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import tensorflow as tf

slim = tf.contrib.slim


class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
  """A named tuple describing a ResNet block.

  Its parts are:
    scope: The scope of the `Block`.
    unit_fn: The ResNet unit function which takes as input a `Tensor` and
      returns another `Tensor` with the output of the ResNet unit.
    args: A list of length equal to the number of units in the `Block`. The list
      contains one (depth, depth_bottleneck, stride) tuple for each unit in the
      block to serve as argument to unit_fn.
  """


def subsample(inputs, factor, scope=None):
  """Subsamples the input along the spatial dimensions.

  Args:
    inputs: A `Tensor` of size [batch, height_in, width_in, channels].
    factor: The subsampling factor.
    scope: Optional variable_scope.

  Returns:
    output: A `Tensor` of size [batch, height_out, width_out, channels] with the
      input, either intact (if factor == 1) or subsampled (if factor > 1).
  """
  if factor == 1:
    return inputs
  else:
    return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)


def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):
  """Strided 2-D convolution with 'SAME' padding.

  When stride > 1, then we do explicit zero-padding, followed by conv2d with
  'VALID' padding.

  Note that

     net = conv2d_same(inputs, num_outputs, 3, stride=stride)

  is equivalent to

     net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME')
     net = subsample(net, factor=stride)

  whereas

     net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME')

  is different when the input's height or width is even, which is why we add the
  current function. For more details, see ResnetUtilsTest.testConv2DSameEven().

  Args:
    inputs: A 4-D tensor of size [batch, height_in, width_in, channels].
    num_outputs: An integer, the number of output filters.
    kernel_size: An int with the kernel_size of the filters.
    stride: An integer, the output stride.
    rate: An integer, rate for atrous convolution.
    scope: Scope.

  Returns:
    output: A 4-D tensor of size [batch, height_out, width_out, channels] with
      the convolution output.
  """
  if stride == 1:
    return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate,
                       padding='SAME', scope=scope)
  else:
    kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
    pad_total = kernel_size_effective - 1
    pad_beg = pad_total // 2
    pad_end = pad_total - pad_beg
    inputs = tf.pad(inputs,
                    [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
    return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride,
                       rate=rate, padding='VALID', scope=scope)


@slim.add_arg_scope
def stack_blocks_dense(net, blocks, output_stride=None,
                       outputs_collections=None):
  """Stacks ResNet `Blocks` and controls output feature density.

  First, this function creates scopes for the ResNet in the form of
  'block_name/unit_1', 'block_name/unit_2', etc.

  Second, this function allows the user to explicitly control the ResNet
  output_stride, which is the ratio of the input to output spatial resolution.
  This is useful for dense prediction tasks such as semantic segmentation or
  object detection.

  Most ResNets consist of 4 ResNet blocks and subsample the activations by a
  factor of 2 when transitioning between consecutive ResNet blocks. This results
  to a nominal ResNet output_stride equal to 8. If we set the output_stride to
  half the nominal network stride (e.g., output_stride=4), then we compute
  responses twice.

  Control of the output feature density is implemented by atrous convolution.

  Args:
    net: A `Tensor` of size [batch, height, width, channels].
    blocks: A list of length equal to the number of ResNet `Blocks`. Each
      element is a ResNet `Block` object describing the units in the `Block`.
    output_stride: If `None`, then the output will be computed at the nominal
      network stride. If output_stride is not `None`, it specifies the requested
      ratio of input to output spatial resolution, which needs to be equal to
      the product of unit strides from the start up to some level of the ResNet.
      For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1,
      then valid values for the output_stride are 1, 2, 6, 24 or None (which
      is equivalent to output_stride=24).
    outputs_collections: Collection to add the ResNet block outputs.

  Returns:
    net: Output tensor with stride equal to the specified output_stride.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
  # The current_stride variable keeps track of the effective stride of the
  # activations. This allows us to invoke atrous convolution whenever applying
  # the next residual unit would result in the activations having stride larger
  # than the target output_stride.
  current_stride = 1

  # The atrous convolution rate parameter.
  rate = 1

  for block in blocks:
    with tf.variable_scope(block.scope, 'block', [net]) as sc:
      for i, unit in enumerate(block.args):
        if output_stride is not None and current_stride > output_stride:
          raise ValueError('The target output_stride cannot be reached.')

        with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
          unit_depth, unit_depth_bottleneck, unit_stride = unit

          # If we have reached the target output_stride, then we need to employ
          # atrous convolution with stride=1 and multiply the atrous rate by the
          # current unit's stride for use in subsequent layers.
          if output_stride is not None and current_stride == output_stride:
            net = block.unit_fn(net,
                                depth=unit_depth,
                                depth_bottleneck=unit_depth_bottleneck,
                                stride=1,
                                rate=rate)
            rate *= unit_stride

          else:
            net = block.unit_fn(net,
                                depth=unit_depth,
                                depth_bottleneck=unit_depth_bottleneck,
                                stride=unit_stride,
                                rate=1)
            current_stride *= unit_stride
      net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)

  if output_stride is not None and current_stride != output_stride:
    raise ValueError('The target output_stride cannot be reached.')

  return net


def resnet_arg_scope(weight_decay=0.0001,
                     batch_norm_decay=0.997,
                     batch_norm_epsilon=1e-5,
                     batch_norm_scale=True):
  """Defines the default ResNet arg scope.

  TODO(gpapan): The batch-normalization related default values above are
    appropriate for use in conjunction with the reference ResNet models
    released at https://github.com/KaimingHe/deep-residual-networks. When
    training ResNets from scratch, they might need to be tuned.

  Args:
    weight_decay: The weight decay to use for regularizing the model.
    batch_norm_decay: The moving average decay when estimating layer activation
      statistics in batch normalization.
    batch_norm_epsilon: Small constant to prevent division by zero when
      normalizing activations by their variance in batch normalization.
    batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
      activations in the batch normalization layer.

  Returns:
    An `arg_scope` to use for the resnet models.
  """
  batch_norm_params = {
      'decay': batch_norm_decay,
      'epsilon': batch_norm_epsilon,
      'scale': batch_norm_scale,
      'updates_collections': tf.GraphKeys.UPDATE_OPS,
  }

  with slim.arg_scope(
      [slim.conv2d],
      weights_regularizer=slim.l2_regularizer(weight_decay),
      weights_initializer=slim.variance_scaling_initializer(),
      activation_fn=tf.nn.relu,
      normalizer_fn=slim.batch_norm,
      normalizer_params=batch_norm_params):
    with slim.arg_scope([slim.batch_norm], **batch_norm_params):
      # The following implies padding='SAME' for pool1, which makes feature
      # alignment easier for dense prediction tasks. This is also used in
      # https://github.com/facebook/fb.resnet.torch. However the accompanying
      # code of 'Deep Residual Learning for Image Recognition' uses
      # padding='VALID' for pool1. You can switch to that choice by setting
      # slim.arg_scope([slim.max_pool2d], padding='VALID').
      with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
        return arg_sc


================================================
FILE: models/slim/nets/resnet_v1.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains definitions for the original form of Residual Networks.

The 'v1' residual networks (ResNets) implemented in this module were proposed
by:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385

Other variants were introduced in:
[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Identity Mappings in Deep Residual Networks. arXiv: 1603.05027

The networks defined in this module utilize the bottleneck building block of
[1] with projection shortcuts only for increasing depths. They employ batch
normalization *after* every weight layer. This is the architecture used by
MSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and
ResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1'
architecture and the alternative 'v2' architecture of [2] which uses batch
normalization *before* every weight layer in the so-called full pre-activation
units.

Typical use:

   from tensorflow.contrib.slim.nets import resnet_v1

ResNet-101 for image classification into 1000 classes:

   # inputs has shape [batch, 224, 224, 3]
   with slim.arg_scope(resnet_v1.resnet_arg_scope()):
      net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False)

ResNet-101 for semantic segmentation into 21 classes:

   # inputs has shape [batch, 513, 513, 3]
   with slim.arg_scope(resnet_v1.resnet_arg_scope()):
      net, end_points = resnet_v1.resnet_v1_101(inputs,
                                                21,
                                                is_training=False,
                                                global_pool=False,
                                                output_stride=16)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from nets import resnet_utils


resnet_arg_scope = resnet_utils.resnet_arg_scope
slim = tf.contrib.slim


@slim.add_arg_scope
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
               outputs_collections=None, scope=None):
  """Bottleneck residual unit variant with BN after convolutions.

  This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
  its definition. Note that we use here the bottleneck variant which has an
  extra bottleneck layer.

  When putting together two consecutive ResNet blocks that use this unit, one
  should use stride = 2 in the last unit of the first block.

  Args:
    inputs: A tensor of size [batch, height, width, channels].
    depth: The depth of the ResNet unit output.
    depth_bottleneck: The depth of the bottleneck layers.
    stride: The ResNet unit's stride. Determines the amount of downsampling of
      the units output compared to its input.
    rate: An integer, rate for atrous convolution.
    outputs_collections: Collection to add the ResNet unit output.
    scope: Optional variable_scope.

  Returns:
    The ResNet unit's output.
  """
  with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
    depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
    if depth == depth_in:
      shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
    else:
      shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride,
                             activation_fn=None, scope='shortcut')

    residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1,
                           scope='conv1')
    residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
                                        rate=rate, scope='conv2')
    residual = slim.conv2d(residual, depth, [1, 1], stride=1,
                           activation_fn=None, scope='conv3')

    output = tf.nn.relu(shortcut + residual)

    return slim.utils.collect_named_outputs(outputs_collections,
                                            sc.original_name_scope,
                                            output)


def resnet_v1(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              train_top_bn=False,
              dropout_keep_prob=1.0,
              reuse=None,
              scope=None):
  """Generator for v1 ResNet models.

  This function generates a family of ResNet v1 models. See the resnet_v1_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce ResNets of various depths.

  Training for image classification on Imagenet is usually done with [224, 224]
  inputs, resulting in [7, 7] feature maps at the output of the last ResNet
  block for the ResNets defined in [1] that have nominal stride equal to 32.
  However, for dense prediction tasks we advise that one uses inputs with
  spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
  this case the feature maps at the ResNet output will have spatial shape
  [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
  and corners exactly aligned with the input image corners, which greatly
  facilitates alignment of the features to the image. Using as input [225, 225]
  images results in [8, 8] feature maps at the output of the last ResNet block.

  For dense prediction tasks, the ResNet needs to run in fully-convolutional
  (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
  have nominal stride equal to 32 and a good choice in FCN mode is to use
  output_stride=16 in order to increase the density of the computed features at
  small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    is_training: whether is training or not.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    include_root_block: If True, include the initial convolution followed by
      max-pooling, if False excludes it.
    train_top_bn: If True, then train batch norm for the root block, but make
      it testing mode for the rest of the network. If False (default), keep all
      the batch norms training.
    dropout_keep_prob: (0, 1]. If <1, will apply dropout on the final layer
      after avg pooling.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.

  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
  with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.name + '_end_points'
    with slim.arg_scope([slim.conv2d, bottleneck,
                         resnet_utils.stack_blocks_dense],
                        outputs_collections=end_points_collection):
      with slim.arg_scope(
        [slim.batch_norm],
        is_training=is_training if not train_top_bn else False,
        trainable=True if not train_top_bn else False):
        net = inputs
        if include_root_block:
          if output_stride is not None:
            if output_stride % 4 != 0:
              raise ValueError('The output_stride needs to be a multiple of 4.')
            output_stride /= 4
          with slim.arg_scope([slim.batch_norm],
                              is_training=is_training,
                              trainable=True):
            net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
          net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
        net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
        if global_pool:
          # Global average pooling.
          net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
        if dropout_keep_prob < 1.0:
          tf.logging.info('ResNet v1: Using dropout {}.'.format(
            1-dropout_keep_prob))
          net = slim.dropout(net, keep_prob=dropout_keep_prob,
                             is_training=is_training)
        if num_classes is not None:
          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                            normalizer_fn=None, scope='logits')
        # Convert end_points_collection into a dictionary of end_points.
        end_points = slim.utils.convert_collection_to_dict(end_points_collection)
        if num_classes is not None:
          end_points['predictions'] = slim.softmax(net, scope='predictions')
        return net, end_points
resnet_v1.default_image_size = 224


def resnet_v1_50(inputs,
                 num_classes=None,
                 is_training=True,
                 global_pool=True,
                 output_stride=None,
                 reuse=None,
                 scope='resnet_v1_50'):
  """ResNet-50 model of [1]. See resnet_v1() for arg and return description."""
  blocks = [
      resnet_utils.Block(
          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
      resnet_utils.Block(
          'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
      resnet_utils.Block(
          'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
      resnet_utils.Block(
          'block4', bottleneck, [(2048, 512, 1)] * 3)
  ]
  return resnet_v1(inputs, blocks, num_classes, is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, reuse=reuse, scope=scope)


def resnet_v1_101(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
                  reuse=None,
                  train_top_bn=None,
                  dropout_keep_prob=1.0,
                  scope='resnet_v1_101'):
  """ResNet-101 model of [1]. See resnet_v1() for arg and return description."""
  blocks = [
      resnet_utils.Block(
          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
      resnet_utils.Block(
          'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
      resnet_utils.Block(
          'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
      resnet_utils.Block(
          'block4', bottleneck, [(2048, 512, 1)] * 3)
  ]
  return resnet_v1(inputs, blocks, num_classes, is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, train_top_bn=train_top_bn,
                   dropout_keep_prob=dropout_keep_prob,
                   reuse=reuse, scope=scope)


def resnet_v1_152(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
                  reuse=None,
                  scope='resnet_v1_152'):
  """ResNet-152 model of [1]. See resnet_v1() for arg and return description."""
  blocks = [
      resnet_utils.Block(
          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
      resnet_utils.Block(
          'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
      resnet_utils.Block(
          'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
      resnet_utils.Block(
          'block4', bottleneck, [(2048, 512, 1)] * 3)]
  return resnet_v1(inputs, blocks, num_classes, is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, reuse=reuse, scope=scope)


def resnet_v1_200(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
                  reuse=None,
                  scope='resnet_v1_200'):
  """ResNet-200 model of [2]. See resnet_v1() for arg and return description."""
  blocks = [
      resnet_utils.Block(
          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
      resnet_utils.Block(
          'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
      resnet_utils.Block(
          'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
      resnet_utils.Block(
          'block4', bottleneck, [(2048, 512, 1)] * 3)]
  return resnet_v1(inputs, blocks, num_classes, is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, reuse=reuse, scope=scope)


================================================
FILE: models/slim/nets/resnet_v1_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.nets.resnet_v1."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from nets import resnet_utils
from nets import resnet_v1

slim = tf.contrib.slim


def create_test_input(batch_size, height, width, channels):
  """Create test input tensor.

  Args:
    batch_size: The number of images per batch or `None` if unknown.
    height: The height of each image or `None` if unknown.
    width: The width of each image or `None` if unknown.
    channels: The number of channels per image or `None` if unknown.

  Returns:
    Either a placeholder `Tensor` of dimension
      [batch_size, height, width, channels] if any of the inputs are `None` or a
    constant `Tensor` with the mesh grid values along the spatial dimensions.
  """
  if None in [batch_size, height, width, channels]:
    return tf.placeholder(tf.float32, (batch_size, height, width, channels))
  else:
    return tf.to_float(
        np.tile(
            np.reshape(
                np.reshape(np.arange(height), [height, 1]) +
                np.reshape(np.arange(width), [1, width]),
                [1, height, width, 1]),
            [batch_size, 1, 1, channels]))


class ResnetUtilsTest(tf.test.TestCase):

  def testSubsampleThreeByThree(self):
    x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1])
    x = resnet_utils.subsample(x, 2)
    expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1])
    with self.test_session():
      self.assertAllClose(x.eval(), expected.eval())

  def testSubsampleFourByFour(self):
    x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1])
    x = resnet_utils.subsample(x, 2)
    expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1])
    with self.test_session():
      self.assertAllClose(x.eval(), expected.eval())

  def testConv2DSameEven(self):
    n, n2 = 4, 2

    # Input image.
    x = create_test_input(1, n, n, 1)

    # Convolution kernel.
    w = create_test_input(1, 3, 3, 1)
    w = tf.reshape(w, [3, 3, 1, 1])

    tf.get_variable('Conv/weights', initializer=w)
    tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
    tf.get_variable_scope().reuse_variables()

    y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
    y1_expected = tf.to_float([[14, 28, 43, 26],
                               [28, 48, 66, 37],
                               [43, 66, 84, 46],
                               [26, 37, 46, 22]])
    y1_expected = tf.reshape(y1_expected, [1, n, n, 1])

    y2 = resnet_utils.subsample(y1, 2)
    y2_expected = tf.to_float([[14, 43],
                               [43, 84]])
    y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])

    y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
    y3_expected = y2_expected

    y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
    y4_expected = tf.to_float([[48, 37],
                               [37, 22]])
    y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1])

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      self.assertAllClose(y1.eval(), y1_expected.eval())
      self.assertAllClose(y2.eval(), y2_expected.eval())
      self.assertAllClose(y3.eval(), y3_expected.eval())
      self.assertAllClose(y4.eval(), y4_expected.eval())

  def testConv2DSameOdd(self):
    n, n2 = 5, 3

    # Input image.
    x = create_test_input(1, n, n, 1)

    # Convolution kernel.
    w = create_test_input(1, 3, 3, 1)
    w = tf.reshape(w, [3, 3, 1, 1])

    tf.get_variable('Conv/weights', initializer=w)
    tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
    tf.get_variable_scope().reuse_variables()

    y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
    y1_expected = tf.to_float([[14, 28, 43, 58, 34],
                               [28, 48, 66, 84, 46],
                               [43, 66, 84, 102, 55],
                               [58, 84, 102, 120, 64],
                               [34, 46, 55, 64, 30]])
    y1_expected = tf.reshape(y1_expected, [1, n, n, 1])

    y2 = resnet_utils.subsample(y1, 2)
    y2_expected = tf.to_float([[14, 43, 34],
                               [43, 84, 55],
                               [34, 55, 30]])
    y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])

    y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
    y3_expected = y2_expected

    y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
    y4_expected = y2_expected

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      self.assertAllClose(y1.eval(), y1_expected.eval())
      self.assertAllClose(y2.eval(), y2_expected.eval())
      self.assertAllClose(y3.eval(), y3_expected.eval())
      self.assertAllClose(y4.eval(), y4_expected.eval())

  def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):
    """A plain ResNet without extra layers before or after the ResNet blocks."""
    with tf.variable_scope(scope, values=[inputs]):
      with slim.arg_scope([slim.conv2d], outputs_collections='end_points'):
        net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride)
        end_points = dict(tf.get_collection('end_points'))
        return net, end_points

  def testEndPointsV1(self):
    """Test the end points of a tiny v1 bottleneck network."""
    bottleneck = resnet_v1.bottleneck
    blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
              resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])]
    inputs = create_test_input(2, 32, 16, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      _, end_points = self._resnet_plain(inputs, blocks, scope='tiny')
    expected = [
        'tiny/block1/unit_1/bottleneck_v1/shortcut',
        'tiny/block1/unit_1/bottleneck_v1/conv1',
        'tiny/block1/unit_1/bottleneck_v1/conv2',
        'tiny/block1/unit_1/bottleneck_v1/conv3',
        'tiny/block1/unit_2/bottleneck_v1/conv1',
        'tiny/block1/unit_2/bottleneck_v1/conv2',
        'tiny/block1/unit_2/bottleneck_v1/conv3',
        'tiny/block2/unit_1/bottleneck_v1/shortcut',
        'tiny/block2/unit_1/bottleneck_v1/conv1',
        'tiny/block2/unit_1/bottleneck_v1/conv2',
        'tiny/block2/unit_1/bottleneck_v1/conv3',
        'tiny/block2/unit_2/bottleneck_v1/conv1',
        'tiny/block2/unit_2/bottleneck_v1/conv2',
        'tiny/block2/unit_2/bottleneck_v1/conv3']
    self.assertItemsEqual(expected, end_points)

  def _stack_blocks_nondense(self, net, blocks):
    """A simplified ResNet Block stacker without output stride control."""
    for block in blocks:
      with tf.variable_scope(block.scope, 'block', [net]):
        for i, unit in enumerate(block.args):
          depth, depth_bottleneck, stride = unit
          with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
            net = block.unit_fn(net,
                                depth=depth,
                                depth_bottleneck=depth_bottleneck,
                                stride=stride,
                                rate=1)
    return net

  def _atrousValues(self, bottleneck):
    """Verify the values of dense feature extraction by atrous convolution.

    Make sure that dense feature extraction by stack_blocks_dense() followed by
    subsampling gives identical results to feature extraction at the nominal
    network output stride using the simple self._stack_blocks_nondense() above.

    Args:
      bottleneck: The bottleneck function.
    """
    blocks = [
        resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
        resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),
        resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),
        resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])
    ]
    nominal_stride = 8

    # Test both odd and even input dimensions.
    height = 30
    width = 31
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      with slim.arg_scope([slim.batch_norm], is_training=False):
        for output_stride in [1, 2, 4, 8, None]:
          with tf.Graph().as_default():
            with self.test_session() as sess:
              tf.set_random_seed(0)
              inputs = create_test_input(1, height, width, 3)
              # Dense feature extraction followed by subsampling.
              output = resnet_utils.stack_blocks_dense(inputs,
                                                       blocks,
                                                       output_stride)
              if output_stride is None:
                factor = 1
              else:
                factor = nominal_stride // output_stride

              output = resnet_utils.subsample(output, factor)
              # Make the two networks use the same weights.
              tf.get_variable_scope().reuse_variables()
              # Feature extraction at the nominal network rate.
              expected = self._stack_blocks_nondense(inputs, blocks)
              sess.run(tf.global_variables_initializer())
              output, expected = sess.run([output, expected])
              self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)

  def testAtrousValuesBottleneck(self):
    self._atrousValues(resnet_v1.bottleneck)


class ResnetCompleteNetworkTest(tf.test.TestCase):
  """Tests with complete small ResNet v1 networks."""

  def _resnet_small(self,
                    inputs,
                    num_classes=None,
                    is_training=True,
                    global_pool=True,
                    output_stride=None,
                    include_root_block=True,
                    reuse=None,
                    scope='resnet_v1_small'):
    """A shallow and thin ResNet v1 for faster tests."""
    bottleneck = resnet_v1.bottleneck
    blocks = [
        resnet_utils.Block(
            'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]),
        resnet_utils.Block(
            'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]),
        resnet_utils.Block(
            'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]),
        resnet_utils.Block(
            'block4', bottleneck, [(32, 8, 1)] * 2)]
    return resnet_v1.resnet_v1(inputs, blocks, num_classes,
                               is_training=is_training,
                               global_pool=global_pool,
                               output_stride=output_stride,
                               include_root_block=include_root_block,
                               reuse=reuse,
                               scope=scope)

  def testClassificationEndPoints(self):
    global_pool = True
    num_classes = 10
    inputs = create_test_input(2, 224, 224, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      logits, end_points = self._resnet_small(inputs, num_classes,
                                              global_pool=global_pool,
                                              scope='resnet')
    self.assertTrue(logits.op.name.startswith('resnet/logits'))
    self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])
    self.assertTrue('predictions' in end_points)
    self.assertListEqual(end_points['predictions'].get_shape().as_list(),
                         [2, 1, 1, num_classes])

  def testClassificationShapes(self):
    global_pool = True
    num_classes = 10
    inputs = create_test_input(2, 224, 224, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      _, end_points = self._resnet_small(inputs, num_classes,
                                         global_pool=global_pool,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 28, 28, 4],
          'resnet/block2': [2, 14, 14, 8],
          'resnet/block3': [2, 7, 7, 16],
          'resnet/block4': [2, 7, 7, 32]}
      for endpoint in endpoint_to_shape:
        shape = endpoint_to_shape[endpoint]
        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)

  def testFullyConvolutionalEndpointShapes(self):
    global_pool = False
    num_classes = 10
    inputs = create_test_input(2, 321, 321, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      _, end_points = self._resnet_small(inputs, num_classes,
                                         global_pool=global_pool,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 41, 41, 4],
          'resnet/block2': [2, 21, 21, 8],
          'resnet/block3': [2, 11, 11, 16],
          'resnet/block4': [2, 11, 11, 32]}
      for endpoint in endpoint_to_shape:
        shape = endpoint_to_shape[endpoint]
        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)

  def testRootlessFullyConvolutionalEndpointShapes(self):
    global_pool = False
    num_classes = 10
    inputs = create_test_input(2, 128, 128, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      _, end_points = self._resnet_small(inputs, num_classes,
                                         global_pool=global_pool,
                                         include_root_block=False,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 64, 64, 4],
          'resnet/block2': [2, 32, 32, 8],
          'resnet/block3': [2, 16, 16, 16],
          'resnet/block4': [2, 16, 16, 32]}
      for endpoint in endpoint_to_shape:
        shape = endpoint_to_shape[endpoint]
        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)

  def testAtrousFullyConvolutionalEndpointShapes(self):
    global_pool = False
    num_classes = 10
    output_stride = 8
    inputs = create_test_input(2, 321, 321, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      _, end_points = self._resnet_small(inputs,
                                         num_classes,
                                         global_pool=global_pool,
                                         output_stride=output_stride,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 41, 41, 4],
          'resnet/block2': [2, 41, 41, 8],
          'resnet/block3': [2, 41, 41, 16],
          'resnet/block4': [2, 41, 41, 32]}
      for endpoint in endpoint_to_shape:
        shape = endpoint_to_shape[endpoint]
        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)

  def testAtrousFullyConvolutionalValues(self):
    """Verify dense feature extraction with atrous convolution."""
    nominal_stride = 32
    for output_stride in [4, 8, 16, 32, None]:
      with slim.arg_scope(resnet_utils.resnet_arg_scope()):
        with tf.Graph().as_default():
          with self.test_session() as sess:
            tf.set_random_seed(0)
            inputs = create_test_input(2, 81, 81, 3)
            # Dense feature extraction followed by subsampling.
            output, _ = self._resnet_small(inputs, None, is_training=False,
                                           global_pool=False,
                                           output_stride=output_stride)
            if output_stride is None:
              factor = 1
            else:
              factor = nominal_stride // output_stride
            output = resnet_utils.subsample(output, factor)
            # Make the two networks use the same weights.
            tf.get_variable_scope().reuse_variables()
            # Feature extraction at the nominal network rate.
            expected, _ = self._resnet_small(inputs, None, is_training=False,
                                             global_pool=False)
            sess.run(tf.global_variables_initializer())
            self.assertAllClose(output.eval(), expected.eval(),
                                atol=1e-4, rtol=1e-4)

  def testUnknownBatchSize(self):
    batch = 2
    height, width = 65, 65
    global_pool = True
    num_classes = 10
    inputs = create_test_input(None, height, width, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      logits, _ = self._resnet_small(inputs, num_classes,
                                     global_pool=global_pool,
                                     scope='resnet')
    self.assertTrue(logits.op.name.startswith('resnet/logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [None, 1, 1, num_classes])
    images = create_test_input(batch, height, width, 3)
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(logits, {inputs: images.eval()})
      self.assertEqual(output.shape, (batch, 1, 1, num_classes))

  def testFullyConvolutionalUnknownHeightWidth(self):
    batch = 2
    height, width = 65, 65
    global_pool = False
    inputs = create_test_input(batch, None, None, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      output, _ = self._resnet_small(inputs, None, global_pool=global_pool)
    self.assertListEqual(output.get_shape().as_list(),
                         [batch, None, None, 32])
    images = create_test_input(batch, height, width, 3)
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(output, {inputs: images.eval()})
      self.assertEqual(output.shape, (batch, 3, 3, 32))

  def testAtrousFullyConvolutionalUnknownHeightWidth(self):
    batch = 2
    height, width = 65, 65
    global_pool = False
    output_stride = 8
    inputs = create_test_input(batch, None, None, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      output, _ = self._resnet_small(inputs,
                                     None,
                                     global_pool=global_pool,
                                     output_stride=output_stride)
    self.assertListEqual(output.get_shape().as_list(),
                         [batch, None, None, 32])
    images = create_test_input(batch, height, width, 3)
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(output, {inputs: images.eval()})
      self.assertEqual(output.shape, (batch, 9, 9, 32))


if __name__ == '__main__':
  tf.test.main()


================================================
FILE: models/slim/nets/resnet_v2.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains definitions for the preactivation form of Residual Networks.

Residual networks (ResNets) were originally proposed in:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385

The full preactivation 'v2' ResNet variant implemented in this module was
introduced by:
[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Identity Mappings in Deep Residual Networks. arXiv: 1603.05027

The key difference of the full preactivation 'v2' variant compared to the
'v1' variant in [1] is the use of batch normalization before every weight layer.
Another difference is that 'v2' ResNets do not include an activation function in
the main pathway. Also see [2; Fig. 4e].

Typical use:

   from tensorflow.contrib.slim.nets import resnet_v2

ResNet-101 for image classification into 1000 classes:

   # inputs has shape [batch, 224, 224, 3]
   with slim.arg_scope(resnet_v2.resnet_arg_scope()):
      net, end_points = resnet_v2.resnet_v2_101(inputs, 1000, is_training=False)

ResNet-101 for semantic segmentation into 21 classes:

   # inputs has shape [batch, 513, 513, 3]
   with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training)):
      net, end_points = resnet_v2.resnet_v2_101(inputs,
                                                21,
                                                is_training=False,
                                                global_pool=False,
                                                output_stride=16)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from nets import resnet_utils

slim = tf.contrib.slim
resnet_arg_scope = resnet_utils.resnet_arg_scope


@slim.add_arg_scope
def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
               outputs_collections=None, scope=None):
  """Bottleneck residual unit variant with BN before convolutions.

  This is the full preactivation residual unit variant proposed in [2]. See
  Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck
  variant which has an extra bottleneck layer.

  When putting together two consecutive ResNet blocks that use this unit, one
  should use stride = 2 in the last unit of the first block.

  Args:
    inputs: A tensor of size [batch, height, width, channels].
    depth: The depth of the ResNet unit output.
    depth_bottleneck: The depth of the bottleneck layers.
    stride: The ResNet unit's stride. Determines the amount of downsampling of
      the units output compared to its input.
    rate: An integer, rate for atrous convolution.
    outputs_collections: Collection to add the ResNet unit output.
    scope: Optional variable_scope.

  Returns:
    The ResNet unit's output.
  """
  with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
    depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
    preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')
    if depth == depth_in:
      shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
    else:
      shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,
                             normalizer_fn=None, activation_fn=None,
                             scope='shortcut')

    residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1,
                           scope='conv1')
    residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
                                        rate=rate, scope='conv2')
    residual = slim.conv2d(residual, depth, [1, 1], stride=1,
                           normalizer_fn=None, activation_fn=None,
                           scope='conv3')

    output = shortcut + residual

    return slim.utils.collect_named_outputs(outputs_collections,
                                            sc.original_name_scope,
                                            output)


def resnet_v2(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              reuse=None,
              scope=None):
  """Generator for v2 (preactivation) ResNet models.

  This function generates a family of ResNet v2 models. See the resnet_v2_*()
  methods for specific model instantiations, obtained by selecting different
  block instantiations that produce ResNets of various depths.

  Training for image classification on Imagenet is usually done with [224, 224]
  inputs, resulting in [7, 7] feature maps at the output of the last ResNet
  block for the ResNets defined in [1] that have nominal stride equal to 32.
  However, for dense prediction tasks we advise that one uses inputs with
  spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
  this case the feature maps at the ResNet output will have spatial shape
  [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
  and corners exactly aligned with the input image corners, which greatly
  facilitates alignment of the features to the image. Using as input [225, 225]
  images results in [8, 8] feature maps at the output of the last ResNet block.

  For dense prediction tasks, the ResNet needs to run in fully-convolutional
  (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
  have nominal stride equal to 32 and a good choice in FCN mode is to use
  output_stride=16 in order to increase the density of the computed features at
  small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

  Args:
    inputs: A tensor of size [batch, height_in, width_in, channels].
    blocks: A list of length equal to the number of ResNet blocks. Each element
      is a resnet_utils.Block object describing the units in the block.
    num_classes: Number of predicted classes for classification tasks. If None
      we return the features before the logit layer.
    is_training: whether is training or not.
    global_pool: If True, we perform global average pooling before computing the
      logits. Set to True for image classification, False for dense prediction.
    output_stride: If None, then the output will be computed at the nominal
      network stride. If output_stride is not None, it specifies the requested
      ratio of input to output spatial resolution.
    include_root_block: If True, include the initial convolution followed by
      max-pooling, if False excludes it. If excluded, `inputs` should be the
      results of an activation-less convolution.
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.


  Returns:
    net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
      If global_pool is False, then height_out and width_out are reduced by a
      factor of output_stride compared to the respective height_in and width_in,
      else both height_out and width_out equal one. If num_classes is None, then
      net is the output of the last ResNet block, potentially after global
      average pooling. If num_classes is not None, net contains the pre-softmax
      activations.
    end_points: A dictionary from components of the network to the corresponding
      activation.

  Raises:
    ValueError: If the target output_stride is not valid.
  """
  with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
    end_points_collection = sc.name + '_end_points'
    with slim.arg_scope([slim.conv2d, bottleneck,
                         resnet_utils.stack_blocks_dense],
                        outputs_collections=end_points_collection):
      with slim.arg_scope([slim.batch_norm], is_training=is_training):
        net = inputs
        if include_root_block:
          if output_stride is not None:
            if output_stride % 4 != 0:
              raise ValueError('The output_stride needs to be a multiple of 4.')
            output_stride /= 4
          # We do not include batch normalization or activation functions in
          # conv1 because the first ResNet unit will perform these. Cf.
          # Appendix of [2].
          with slim.arg_scope([slim.conv2d],
                              activation_fn=None, normalizer_fn=None):
            net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
          net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
        net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
        # This is needed because the pre-activation variant does not have batch
        # normalization or activation functions in the residual unit output. See
        # Appendix of [2].
        net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
        if global_pool:
          # Global average pooling.
          net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
        if num_classes is not None:
          net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                            normalizer_fn=None, scope='logits')
        # Convert end_points_collection into a dictionary of end_points.
        end_points = slim.utils.convert_collection_to_dict(end_points_collection)
        if num_classes is not None:
          end_points['predictions'] = slim.softmax(net, scope='predictions')
        return net, end_points
resnet_v2.default_image_size = 224


def resnet_v2_50(inputs,
                 num_classes=None,
                 is_training=True,
                 global_pool=True,
                 output_stride=None,
                 reuse=None,
                 scope='resnet_v2_50'):
  """ResNet-50 model of [1]. See resnet_v2() for arg and return description."""
  blocks = [
      resnet_utils.Block(
          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
      resnet_utils.Block(
          'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
      resnet_utils.Block(
          'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
      resnet_utils.Block(
          'block4', bottleneck, [(2048, 512, 1)] * 3)]
  return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, reuse=reuse, scope=scope)


def resnet_v2_101(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
                  reuse=None,
                  scope='resnet_v2_101'):
  """ResNet-101 model of [1]. See resnet_v2() for arg and return description."""
  blocks = [
      resnet_utils.Block(
          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
      resnet_utils.Block(
          'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
      resnet_utils.Block(
          'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
      resnet_utils.Block(
          'block4', bottleneck, [(2048, 512, 1)] * 3)]
  return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, reuse=reuse, scope=scope)


def resnet_v2_152(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
                  reuse=None,
                  scope='resnet_v2_152'):
  """ResNet-152 model of [1]. See resnet_v2() for arg and return description."""
  blocks = [
      resnet_utils.Block(
          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
      resnet_utils.Block(
          'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
      resnet_utils.Block(
          'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
      resnet_utils.Block(
          'block4', bottleneck, [(2048, 512, 1)] * 3)]
  return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, reuse=reuse, scope=scope)


def resnet_v2_200(inputs,
                  num_classes=None,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
                  reuse=None,
                  scope='resnet_v2_200'):
  """ResNet-200 model of [2]. See resnet_v2() for arg and return description."""
  blocks = [
      resnet_utils.Block(
          'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
      resnet_utils.Block(
          'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
      resnet_utils.Block(
          'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
      resnet_utils.Block(
          'block4', bottleneck, [(2048, 512, 1)] * 3)]
  return resnet_v2(inputs, blocks, num_classes, is_training=is_training,
                   global_pool=global_pool, output_stride=output_stride,
                   include_root_block=True, reuse=reuse, scope=scope)


================================================
FILE: models/slim/nets/resnet_v2_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.nets.resnet_v2."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import tensorflow as tf

from nets import resnet_utils
from nets import resnet_v2

slim = tf.contrib.slim


def create_test_input(batch_size, height, width, channels):
  """Create test input tensor.

  Args:
    batch_size: The number of images per batch or `None` if unknown.
    height: The height of each image or `None` if unknown.
    width: The width of each image or `None` if unknown.
    channels: The number of channels per image or `None` if unknown.

  Returns:
    Either a placeholder `Tensor` of dimension
      [batch_size, height, width, channels] if any of the inputs are `None` or a
    constant `Tensor` with the mesh grid values along the spatial dimensions.
  """
  if None in [batch_size, height, width, channels]:
    return tf.placeholder(tf.float32, (batch_size, height, width, channels))
  else:
    return tf.to_float(
        np.tile(
            np.reshape(
                np.reshape(np.arange(height), [height, 1]) +
                np.reshape(np.arange(width), [1, width]),
                [1, height, width, 1]),
            [batch_size, 1, 1, channels]))


class ResnetUtilsTest(tf.test.TestCase):

  def testSubsampleThreeByThree(self):
    x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1])
    x = resnet_utils.subsample(x, 2)
    expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1])
    with self.test_session():
      self.assertAllClose(x.eval(), expected.eval())

  def testSubsampleFourByFour(self):
    x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1])
    x = resnet_utils.subsample(x, 2)
    expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1])
    with self.test_session():
      self.assertAllClose(x.eval(), expected.eval())

  def testConv2DSameEven(self):
    n, n2 = 4, 2

    # Input image.
    x = create_test_input(1, n, n, 1)

    # Convolution kernel.
    w = create_test_input(1, 3, 3, 1)
    w = tf.reshape(w, [3, 3, 1, 1])

    tf.get_variable('Conv/weights', initializer=w)
    tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
    tf.get_variable_scope().reuse_variables()

    y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
    y1_expected = tf.to_float([[14, 28, 43, 26],
                               [28, 48, 66, 37],
                               [43, 66, 84, 46],
                               [26, 37, 46, 22]])
    y1_expected = tf.reshape(y1_expected, [1, n, n, 1])

    y2 = resnet_utils.subsample(y1, 2)
    y2_expected = tf.to_float([[14, 43],
                               [43, 84]])
    y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])

    y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
    y3_expected = y2_expected

    y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
    y4_expected = tf.to_float([[48, 37],
                               [37, 22]])
    y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1])

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      self.assertAllClose(y1.eval(), y1_expected.eval())
      self.assertAllClose(y2.eval(), y2_expected.eval())
      self.assertAllClose(y3.eval(), y3_expected.eval())
      self.assertAllClose(y4.eval(), y4_expected.eval())

  def testConv2DSameOdd(self):
    n, n2 = 5, 3

    # Input image.
    x = create_test_input(1, n, n, 1)

    # Convolution kernel.
    w = create_test_input(1, 3, 3, 1)
    w = tf.reshape(w, [3, 3, 1, 1])

    tf.get_variable('Conv/weights', initializer=w)
    tf.get_variable('Conv/biases', initializer=tf.zeros([1]))
    tf.get_variable_scope().reuse_variables()

    y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv')
    y1_expected = tf.to_float([[14, 28, 43, 58, 34],
                               [28, 48, 66, 84, 46],
                               [43, 66, 84, 102, 55],
                               [58, 84, 102, 120, 64],
                               [34, 46, 55, 64, 30]])
    y1_expected = tf.reshape(y1_expected, [1, n, n, 1])

    y2 = resnet_utils.subsample(y1, 2)
    y2_expected = tf.to_float([[14, 43, 34],
                               [43, 84, 55],
                               [34, 55, 30]])
    y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1])

    y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv')
    y3_expected = y2_expected

    y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv')
    y4_expected = y2_expected

    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      self.assertAllClose(y1.eval(), y1_expected.eval())
      self.assertAllClose(y2.eval(), y2_expected.eval())
      self.assertAllClose(y3.eval(), y3_expected.eval())
      self.assertAllClose(y4.eval(), y4_expected.eval())

  def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None):
    """A plain ResNet without extra layers before or after the ResNet blocks."""
    with tf.variable_scope(scope, values=[inputs]):
      with slim.arg_scope([slim.conv2d], outputs_collections='end_points'):
        net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride)
        end_points = dict(tf.get_collection('end_points'))
        return net, end_points

  def testEndPointsV2(self):
    """Test the end points of a tiny v2 bottleneck network."""
    bottleneck = resnet_v2.bottleneck
    blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
              resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])]
    inputs = create_test_input(2, 32, 16, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      _, end_points = self._resnet_plain(inputs, blocks, scope='tiny')
    expected = [
        'tiny/block1/unit_1/bottleneck_v2/shortcut',
        'tiny/block1/unit_1/bottleneck_v2/conv1',
        'tiny/block1/unit_1/bottleneck_v2/conv2',
        'tiny/block1/unit_1/bottleneck_v2/conv3',
        'tiny/block1/unit_2/bottleneck_v2/conv1',
        'tiny/block1/unit_2/bottleneck_v2/conv2',
        'tiny/block1/unit_2/bottleneck_v2/conv3',
        'tiny/block2/unit_1/bottleneck_v2/shortcut',
        'tiny/block2/unit_1/bottleneck_v2/conv1',
        'tiny/block2/unit_1/bottleneck_v2/conv2',
        'tiny/block2/unit_1/bottleneck_v2/conv3',
        'tiny/block2/unit_2/bottleneck_v2/conv1',
        'tiny/block2/unit_2/bottleneck_v2/conv2',
        'tiny/block2/unit_2/bottleneck_v2/conv3']
    self.assertItemsEqual(expected, end_points)

  def _stack_blocks_nondense(self, net, blocks):
    """A simplified ResNet Block stacker without output stride control."""
    for block in blocks:
      with tf.variable_scope(block.scope, 'block', [net]):
        for i, unit in enumerate(block.args):
          depth, depth_bottleneck, stride = unit
          with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
            net = block.unit_fn(net,
                                depth=depth,
                                depth_bottleneck=depth_bottleneck,
                                stride=stride,
                                rate=1)
    return net

  def _atrousValues(self, bottleneck):
    """Verify the values of dense feature extraction by atrous convolution.

    Make sure that dense feature extraction by stack_blocks_dense() followed by
    subsampling gives identical results to feature extraction at the nominal
    network output stride using the simple self._stack_blocks_nondense() above.

    Args:
      bottleneck: The bottleneck function.
    """
    blocks = [
        resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]),
        resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]),
        resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]),
        resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)])
    ]
    nominal_stride = 8

    # Test both odd and even input dimensions.
    height = 30
    width = 31
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      with slim.arg_scope([slim.batch_norm], is_training=False):
        for output_stride in [1, 2, 4, 8, None]:
          with tf.Graph().as_default():
            with self.test_session() as sess:
              tf.set_random_seed(0)
              inputs = create_test_input(1, height, width, 3)
              # Dense feature extraction followed by subsampling.
              output = resnet_utils.stack_blocks_dense(inputs,
                                                       blocks,
                                                       output_stride)
              if output_stride is None:
                factor = 1
              else:
                factor = nominal_stride // output_stride

              output = resnet_utils.subsample(output, factor)
              # Make the two networks use the same weights.
              tf.get_variable_scope().reuse_variables()
              # Feature extraction at the nominal network rate.
              expected = self._stack_blocks_nondense(inputs, blocks)
              sess.run(tf.global_variables_initializer())
              output, expected = sess.run([output, expected])
              self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4)

  def testAtrousValuesBottleneck(self):
    self._atrousValues(resnet_v2.bottleneck)


class ResnetCompleteNetworkTest(tf.test.TestCase):
  """Tests with complete small ResNet v2 networks."""

  def _resnet_small(self,
                    inputs,
                    num_classes=None,
                    is_training=True,
                    global_pool=True,
                    output_stride=None,
                    include_root_block=True,
                    reuse=None,
                    scope='resnet_v2_small'):
    """A shallow and thin ResNet v2 for faster tests."""
    bottleneck = resnet_v2.bottleneck
    blocks = [
        resnet_utils.Block(
            'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]),
        resnet_utils.Block(
            'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]),
        resnet_utils.Block(
            'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]),
        resnet_utils.Block(
            'block4', bottleneck, [(32, 8, 1)] * 2)]
    return resnet_v2.resnet_v2(inputs, blocks, num_classes,
                               is_training=is_training,
                               global_pool=global_pool,
                               output_stride=output_stride,
                               include_root_block=include_root_block,
                               reuse=reuse,
                               scope=scope)

  def testClassificationEndPoints(self):
    global_pool = True
    num_classes = 10
    inputs = create_test_input(2, 224, 224, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      logits, end_points = self._resnet_small(inputs, num_classes,
                                              global_pool=global_pool,
                                              scope='resnet')
    self.assertTrue(logits.op.name.startswith('resnet/logits'))
    self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])
    self.assertTrue('predictions' in end_points)
    self.assertListEqual(end_points['predictions'].get_shape().as_list(),
                         [2, 1, 1, num_classes])

  def testClassificationShapes(self):
    global_pool = True
    num_classes = 10
    inputs = create_test_input(2, 224, 224, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      _, end_points = self._resnet_small(inputs, num_classes,
                                         global_pool=global_pool,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 28, 28, 4],
          'resnet/block2': [2, 14, 14, 8],
          'resnet/block3': [2, 7, 7, 16],
          'resnet/block4': [2, 7, 7, 32]}
      for endpoint in endpoint_to_shape:
        shape = endpoint_to_shape[endpoint]
        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)

  def testFullyConvolutionalEndpointShapes(self):
    global_pool = False
    num_classes = 10
    inputs = create_test_input(2, 321, 321, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      _, end_points = self._resnet_small(inputs, num_classes,
                                         global_pool=global_pool,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 41, 41, 4],
          'resnet/block2': [2, 21, 21, 8],
          'resnet/block3': [2, 11, 11, 16],
          'resnet/block4': [2, 11, 11, 32]}
      for endpoint in endpoint_to_shape:
        shape = endpoint_to_shape[endpoint]
        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)

  def testRootlessFullyConvolutionalEndpointShapes(self):
    global_pool = False
    num_classes = 10
    inputs = create_test_input(2, 128, 128, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      _, end_points = self._resnet_small(inputs, num_classes,
                                         global_pool=global_pool,
                                         include_root_block=False,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 64, 64, 4],
          'resnet/block2': [2, 32, 32, 8],
          'resnet/block3': [2, 16, 16, 16],
          'resnet/block4': [2, 16, 16, 32]}
      for endpoint in endpoint_to_shape:
        shape = endpoint_to_shape[endpoint]
        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)

  def testAtrousFullyConvolutionalEndpointShapes(self):
    global_pool = False
    num_classes = 10
    output_stride = 8
    inputs = create_test_input(2, 321, 321, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      _, end_points = self._resnet_small(inputs,
                                         num_classes,
                                         global_pool=global_pool,
                                         output_stride=output_stride,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 41, 41, 4],
          'resnet/block2': [2, 41, 41, 8],
          'resnet/block3': [2, 41, 41, 16],
          'resnet/block4': [2, 41, 41, 32]}
      for endpoint in endpoint_to_shape:
        shape = endpoint_to_shape[endpoint]
        self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape)

  def testAtrousFullyConvolutionalValues(self):
    """Verify dense feature extraction with atrous convolution."""
    nominal_stride = 32
    for output_stride in [4, 8, 16, 32, None]:
      with slim.arg_scope(resnet_utils.resnet_arg_scope()):
        with tf.Graph().as_default():
          with self.test_session() as sess:
            tf.set_random_seed(0)
            inputs = create_test_input(2, 81, 81, 3)
            # Dense feature extraction followed by subsampling.
            output, _ = self._resnet_small(inputs, None,
                                           is_training=False,
                                           global_pool=False,
                                           output_stride=output_stride)
            if output_stride is None:
              factor = 1
            else:
              factor = nominal_stride // output_stride
            output = resnet_utils.subsample(output, factor)
            # Make the two networks use the same weights.
            tf.get_variable_scope().reuse_variables()
            # Feature extraction at the nominal network rate.
            expected, _ = self._resnet_small(inputs, None,
                                             is_training=False,
                                             global_pool=False)
            sess.run(tf.global_variables_initializer())
            self.assertAllClose(output.eval(), expected.eval(),
                                atol=1e-4, rtol=1e-4)

  def testUnknownBatchSize(self):
    batch = 2
    height, width = 65, 65
    global_pool = True
    num_classes = 10
    inputs = create_test_input(None, height, width, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      logits, _ = self._resnet_small(inputs, num_classes,
                                     global_pool=global_pool,
                                     scope='resnet')
    self.assertTrue(logits.op.name.startswith('resnet/logits'))
    self.assertListEqual(logits.get_shape().as_list(),
                         [None, 1, 1, num_classes])
    images = create_test_input(batch, height, width, 3)
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(logits, {inputs: images.eval()})
      self.assertEqual(output.shape, (batch, 1, 1, num_classes))

  def testFullyConvolutionalUnknownHeightWidth(self):
    batch = 2
    height, width = 65, 65
    global_pool = False
    inputs = create_test_input(batch, None, None, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      output, _ = self._resnet_small(inputs, None,
                                     global_pool=global_pool)
    self.assertListEqual(output.get_shape().as_list(),
                         [batch, None, None, 32])
    images = create_test_input(batch, height, width, 3)
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(output, {inputs: images.eval()})
      self.assertEqual(output.shape, (batch, 3, 3, 32))

  def testAtrousFullyConvolutionalUnknownHeightWidth(self):
    batch = 2
    height, width = 65, 65
    global_pool = False
    output_stride = 8
    inputs = create_test_input(batch, None, None, 3)
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      output, _ = self._resnet_small(inputs,
                                     None,
                                     global_pool=global_pool,
                                     output_stride=output_stride)
    self.assertListEqual(output.get_shape().as_list(),
                         [batch, None, None, 32])
    images = create_test_input(batch, height, width, 3)
    with self.test_session() as sess:
      sess.run(tf.global_variables_initializer())
      output = sess.run(output, {inputs: images.eval()})
      self.assertEqual(output.shape, (batch, 9, 9, 32))


if __name__ == '__main__':
  tf.test.main()


================================================
FILE: models/slim/nets/vgg.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains model definitions for versions of the Oxford VGG network.

These model definitions were introduced in the following technical report:

  Very Deep Convolutional Networks For Large-Scale Image Recognition
  Karen Simonyan and Andrew Zisserman
  arXiv technical report, 2015
  PDF: http://arxiv.org/pdf/1409.1556.pdf
  ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
  CC-BY-4.0

More information can be obtained from the VGG website:
www.robots.ox.ac.uk/~vgg/research/very_deep/

Usage:
  with slim.arg_scope(vgg.vgg_arg_scope()):
    outputs, end_points = vgg.vgg_a(inputs)

  with slim.arg_scope(vgg.vgg_arg_scope()):
    outputs, end_points = vgg.vgg_16(inputs)

@@vgg_a
@@vgg_16
@@vgg_19
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim


def vgg_arg_scope(weight_decay=0.0005):
  """Defines the VGG arg scope.

  Args:
    weight_decay: The l2 regularization coefficient.

  Returns:
    An arg_scope.
  """
  with slim.arg_scope([slim.conv2d, slim.fully_connected],
                      activation_fn=tf.nn.relu,
                      weights_regularizer=slim.l2_regularizer(weight_decay),
                      biases_initializer=tf.zeros_initializer()):
    with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
      return arg_sc


def vgg_a(inputs,
          num_classes=1000,
          is_training=True,
          dropout_keep_prob=0.5,
          spatial_squeeze=False,
          scope='vgg_a'):
  """Oxford Net VGG 11-Layers version A Example.

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
  with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc:
    end_points_collection = sc.name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with slim.arg_scope([slim.conv2d, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1')
      net = slim.max_pool2d(net, [2, 2], scope='pool1')
      net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2')
      net = slim.max_pool2d(net, [2, 2], scope='pool2')
      net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3')
      net = slim.max_pool2d(net, [2, 2], scope='pool3')
      net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4')
      net = slim.max_pool2d(net, [2, 2], scope='pool4')
      net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
      net = slim.max_pool2d(net, [2, 2], scope='pool5')
      # Use conv2d instead of fully_connected layers.
      net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                         scope='dropout6')
      net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                         scope='dropout7')
      net = slim.conv2d(net, num_classes, [1, 1],
                        activation_fn=None,
                        normalizer_fn=None,
                        scope='fc8')
      # Convert end_points_collection into a end_point dict.
      end_points = slim.utils.convert_collection_to_dict(end_points_collection)
      if spatial_squeeze:
        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
        end_points[sc.name + '/fc8'] = net
      return net, end_points
vgg_a.default_image_size = 224


def vgg_16(inputs,
           num_classes=1000,
           is_training=True,
           dropout_keep_prob=0.5,
           spatial_squeeze=False,
           train_top_bn=None,  # ignore, just for consistency
           scope='vgg_16'):
  """Oxford Net VGG 16-Layers version D Example.

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
  with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
    end_points_collection = sc.name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
      net = slim.max_pool2d(net, [2, 2], scope='pool1')
      net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
      net = slim.max_pool2d(net, [2, 2], scope='pool2')
      net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
      net = slim.max_pool2d(net, [2, 2], scope='pool3')
      net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
      net = slim.max_pool2d(net, [2, 2], scope='pool4')
      # net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
      # rgirdhar: remove the relu from last layer
      net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
      net = slim.conv2d(net, 512, [3, 3], activation_fn=None,
                        scope='conv5/conv5_3')
      conv5_output = net
      net = tf.nn.relu(net)
      net = slim.max_pool2d(net, [2, 2], scope='pool5')
      # Use conv2d instead of fully_connected layers.
      net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                         scope='dropout6')
      net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                         scope='dropout7')
      net = slim.conv2d(net, num_classes, [1, 1],
                        activation_fn=None,
                        normalizer_fn=None,
                        scope='fc8')
      # Convert end_points_collection into a end_point dict.
      end_points = slim.utils.convert_collection_to_dict(end_points_collection)
      end_points['vgg_16/conv5'] = conv5_output
      if spatial_squeeze:
        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
        end_points[sc.name + '/fc8'] = net
      return net, end_points
vgg_16.default_image_size = 224


def vgg_19(inputs,
           num_classes=1000,
           is_training=True,
           dropout_keep_prob=0.5,
           spatial_squeeze=False,
           scope='vgg_19'):
  """Oxford Net VGG 19-Layers version E Example.

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
  with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc:
    end_points_collection = sc.name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):
      net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
      net = slim.max_pool2d(net, [2, 2], scope='pool1')
      net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
      net = slim.max_pool2d(net, [2, 2], scope='pool2')
      net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3')
      net = slim.max_pool2d(net, [2, 2], scope='pool3')
      net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4')
      net = slim.max_pool2d(net, [2, 2], scope='pool4')
      net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5')
      net = slim.max_pool2d(net, [2, 2], scope='pool5')
      # Use conv2d instead of fully_connected layers.
      net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                         scope='dropout6')
      net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
      net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                         scope='dropout7')
      net = slim.conv2d(net, num_classes, [1, 1],
                        activation_fn=None,
                        normalizer_fn=None,
                        scope='fc8')
      # Convert end_points_collection into a end_point dict.
      end_points = slim.utils.convert_collection_to_dict(end_points_collection)
      if spatial_squeeze:
        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
        end_points[sc.name + '/fc8'] = net
      return net, end_points
vgg_19.default_image_size = 224

# Alias
vgg_d = vgg_16
vgg_e = vgg_19


================================================
FILE: models/slim/nets/vgg_test.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for slim.nets.vgg."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from nets import vgg

slim = tf.contrib.slim


class VGGATest(tf.test.TestCase):

  def testBuild(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = vgg.vgg_a(inputs, num_classes)
      self.assertEquals(logits.op.name, 'vgg_a/fc8/squeezed')
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])

  def testFullyConvolutional(self):
    batch_size = 1
    height, width = 256, 256
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = vgg.vgg_a(inputs, num_classes, spatial_squeeze=False)
      self.assertEquals(logits.op.name, 'vgg_a/fc8/BiasAdd')
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, 2, 2, num_classes])

  def testEndPoints(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      _, end_points = vgg.vgg_a(inputs, num_classes)
      expected_names = ['vgg_a/conv1/conv1_1',
                        'vgg_a/pool1',
                        'vgg_a/conv2/conv2_1',
                        'vgg_a/pool2',
                        'vgg_a/conv3/conv3_1',
                        'vgg_a/conv3/conv3_2',
                        'vgg_a/pool3',
                        'vgg_a/conv4/conv4_1',
                        'vgg_a/conv4/conv4_2',
                        'vgg_a/pool4',
                        'vgg_a/conv5/conv5_1',
                        'vgg_a/conv5/conv5_2',
                        'vgg_a/pool5',
                        'vgg_a/fc6',
                        'vgg_a/fc7',
                        'vgg_a/fc8'
                       ]
      self.assertSetEqual(set(end_points.keys()), set(expected_names))

  def testModelVariables(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      vgg.vgg_a(inputs, num_classes)
      expected_names = ['vgg_a/conv1/conv1_1/weights',
                        'vgg_a/conv1/conv1_1/biases',
                        'vgg_a/conv2/conv2_1/weights',
                        'vgg_a/conv2/conv2_1/biases',
                        'vgg_a/conv3/conv3_1/weights',
                        'vgg_a/conv3/conv3_1/biases',
                        'vgg_a/conv3/conv3_2/weights',
                        'vgg_a/conv3/conv3_2/biases',
                        'vgg_a/conv4/conv4_1/weights',
                        'vgg_a/conv4/conv4_1/biases',
                        'vgg_a/conv4/conv4_2/weights',
                        'vgg_a/conv4/conv4_2/biases',
                        'vgg_a/conv5/conv5_1/weights',
                        'vgg_a/conv5/conv5_1/biases',
                        'vgg_a/conv5/conv5_2/weights',
                        'vgg_a/conv5/conv5_2/biases',
                        'vgg_a/fc6/weights',
                        'vgg_a/fc6/biases',
                        'vgg_a/fc7/weights',
                        'vgg_a/fc7/biases',
                        'vgg_a/fc8/weights',
                        'vgg_a/fc8/biases',
                       ]
      model_variables = [v.op.name for v in slim.get_model_variables()]
      self.assertSetEqual(set(model_variables), set(expected_names))

  def testEvaluation(self):
    batch_size = 2
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      eval_inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = vgg.vgg_a(eval_inputs, is_training=False)
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])
      predictions = tf.argmax(logits, 1)
      self.assertListEqual(predictions.get_shape().as_list(), [batch_size])

  def testTrainEvalWithReuse(self):
    train_batch_size = 2
    eval_batch_size = 1
    train_height, train_width = 224, 224
    eval_height, eval_width = 256, 256
    num_classes = 1000
    with self.test_session():
      train_inputs = tf.random_uniform(
          (train_batch_size, train_height, train_width, 3))
      logits, _ = vgg.vgg_a(train_inputs)
      self.assertListEqual(logits.get_shape().as_list(),
                           [train_batch_size, num_classes])
      tf.get_variable_scope().reuse_variables()
      eval_inputs = tf.random_uniform(
          (eval_batch_size, eval_height, eval_width, 3))
      logits, _ = vgg.vgg_a(eval_inputs, is_training=False,
                            spatial_squeeze=False)
      self.assertListEqual(logits.get_shape().as_list(),
                           [eval_batch_size, 2, 2, num_classes])
      logits = tf.reduce_mean(logits, [1, 2])
      predictions = tf.argmax(logits, 1)
      self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])

  def testForward(self):
    batch_size = 1
    height, width = 224, 224
    with self.test_session() as sess:
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = vgg.vgg_a(inputs)
      sess.run(tf.global_variables_initializer())
      output = sess.run(logits)
      self.assertTrue(output.any())


class VGG16Test(tf.test.TestCase):

  def testBuild(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = vgg.vgg_16(inputs, num_classes)
      self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed')
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])

  def testFullyConvolutional(self):
    batch_size = 1
    height, width = 256, 256
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False)
      self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd')
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, 2, 2, num_classes])

  def testEndPoints(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      _, end_points = vgg.vgg_16(inputs, num_classes)
      expected_names = ['vgg_16/conv1/conv1_1',
                        'vgg_16/conv1/conv1_2',
                        'vgg_16/pool1',
                        'vgg_16/conv2/conv2_1',
                        'vgg_16/conv2/conv2_2',
                        'vgg_16/pool2',
                        'vgg_16/conv3/conv3_1',
                        'vgg_16/conv3/conv3_2',
                        'vgg_16/conv3/conv3_3',
                        'vgg_16/pool3',
                        'vgg_16/conv4/conv4_1',
                        'vgg_16/conv4/conv4_2',
                        'vgg_16/conv4/conv4_3',
                        'vgg_16/pool4',
                        'vgg_16/conv5/conv5_1',
                        'vgg_16/conv5/conv5_2',
                        'vgg_16/conv5/conv5_3',
                        'vgg_16/pool5',
                        'vgg_16/fc6',
                        'vgg_16/fc7',
                        'vgg_16/fc8'
                       ]
      self.assertSetEqual(set(end_points.keys()), set(expected_names))

  def testModelVariables(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      vgg.vgg_16(inputs, num_classes)
      expected_names = ['vgg_16/conv1/conv1_1/weights',
                        'vgg_16/conv1/conv1_1/biases',
                        'vgg_16/conv1/conv1_2/weights',
                        'vgg_16/conv1/conv1_2/biases',
                        'vgg_16/conv2/conv2_1/weights',
                        'vgg_16/conv2/conv2_1/biases',
                        'vgg_16/conv2/conv2_2/weights',
                        'vgg_16/conv2/conv2_2/biases',
                        'vgg_16/conv3/conv3_1/weights',
                        'vgg_16/conv3/conv3_1/biases',
                        'vgg_16/conv3/conv3_2/weights',
                        'vgg_16/conv3/conv3_2/biases',
                        'vgg_16/conv3/conv3_3/weights',
                        'vgg_16/conv3/conv3_3/biases',
                        'vgg_16/conv4/conv4_1/weights',
                        'vgg_16/conv4/conv4_1/biases',
                        'vgg_16/conv4/conv4_2/weights',
                        'vgg_16/conv4/conv4_2/biases',
                        'vgg_16/conv4/conv4_3/weights',
                        'vgg_16/conv4/conv4_3/biases',
                        'vgg_16/conv5/conv5_1/weights',
                        'vgg_16/conv5/conv5_1/biases',
                        'vgg_16/conv5/conv5_2/weights',
                        'vgg_16/conv5/conv5_2/biases',
                        'vgg_16/conv5/conv5_3/weights',
                        'vgg_16/conv5/conv5_3/biases',
                        'vgg_16/fc6/weights',
                        'vgg_16/fc6/biases',
                        'vgg_16/fc7/weights',
                        'vgg_16/fc7/biases',
                        'vgg_16/fc8/weights',
                        'vgg_16/fc8/biases',
                       ]
      model_variables = [v.op.name for v in slim.get_model_variables()]
      self.assertSetEqual(set(model_variables), set(expected_names))

  def testEvaluation(self):
    batch_size = 2
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      eval_inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = vgg.vgg_16(eval_inputs, is_training=False)
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])
      predictions = tf.argmax(logits, 1)
      self.assertListEqual(predictions.get_shape().as_list(), [batch_size])

  def testTrainEvalWithReuse(self):
    train_batch_size = 2
    eval_batch_size = 1
    train_height, train_width = 224, 224
    eval_height, eval_width = 256, 256
    num_classes = 1000
    with self.test_session():
      train_inputs = tf.random_uniform(
          (train_batch_size, train_height, train_width, 3))
      logits, _ = vgg.vgg_16(train_inputs)
      self.assertListEqual(logits.get_shape().as_list(),
                           [train_batch_size, num_classes])
      tf.get_variable_scope().reuse_variables()
      eval_inputs = tf.random_uniform(
          (eval_batch_size, eval_height, eval_width, 3))
      logits, _ = vgg.vgg_16(eval_inputs, is_training=False,
                             spatial_squeeze=False)
      self.assertListEqual(logits.get_shape().as_list(),
                           [eval_batch_size, 2, 2, num_classes])
      logits = tf.reduce_mean(logits, [1, 2])
      predictions = tf.argmax(logits, 1)
      self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])

  def testForward(self):
    batch_size = 1
    height, width = 224, 224
    with self.test_session() as sess:
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = vgg.vgg_16(inputs)
      sess.run(tf.global_variables_initializer())
      output = sess.run(logits)
      self.assertTrue(output.any())


class VGG19Test(tf.test.TestCase):

  def testBuild(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = vgg.vgg_19(inputs, num_classes)
      self.assertEquals(logits.op.name, 'vgg_19/fc8/squeezed')
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])

  def testFullyConvolutional(self):
    batch_size = 1
    height, width = 256, 256
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = vgg.vgg_19(inputs, num_classes, spatial_squeeze=False)
      self.assertEquals(logits.op.name, 'vgg_19/fc8/BiasAdd')
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, 2, 2, num_classes])

  def testEndPoints(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      _, end_points = vgg.vgg_19(inputs, num_classes)
      expected_names = [
          'vgg_19/conv1/conv1_1',
          'vgg_19/conv1/conv1_2',
          'vgg_19/pool1',
          'vgg_19/conv2/conv2_1',
          'vgg_19/conv2/conv2_2',
          'vgg_19/pool2',
          'vgg_19/conv3/conv3_1',
          'vgg_19/conv3/conv3_2',
          'vgg_19/conv3/conv3_3',
          'vgg_19/conv3/conv3_4',
          'vgg_19/pool3',
          'vgg_19/conv4/conv4_1',
          'vgg_19/conv4/conv4_2',
          'vgg_19/conv4/conv4_3',
          'vgg_19/conv4/conv4_4',
          'vgg_19/pool4',
          'vgg_19/conv5/conv5_1',
          'vgg_19/conv5/conv5_2',
          'vgg_19/conv5/conv5_3',
          'vgg_19/conv5/conv5_4',
          'vgg_19/pool5',
          'vgg_19/fc6',
          'vgg_19/fc7',
          'vgg_19/fc8'
      ]
      self.assertSetEqual(set(end_points.keys()), set(expected_names))

  def testModelVariables(self):
    batch_size = 5
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      inputs = tf.random_uniform((batch_size, height, width, 3))
      vgg.vgg_19(inputs, num_classes)
      expected_names = [
          'vgg_19/conv1/conv1_1/weights',
          'vgg_19/conv1/conv1_1/biases',
          'vgg_19/conv1/conv1_2/weights',
          'vgg_19/conv1/conv1_2/biases',
          'vgg_19/conv2/conv2_1/weights',
          'vgg_19/conv2/conv2_1/biases',
          'vgg_19/conv2/conv2_2/weights',
          'vgg_19/conv2/conv2_2/biases',
          'vgg_19/conv3/conv3_1/weights',
          'vgg_19/conv3/conv3_1/biases',
          'vgg_19/conv3/conv3_2/weights',
          'vgg_19/conv3/conv3_2/biases',
          'vgg_19/conv3/conv3_3/weights',
          'vgg_19/conv3/conv3_3/biases',
          'vgg_19/conv3/conv3_4/weights',
          'vgg_19/conv3/conv3_4/biases',
          'vgg_19/conv4/conv4_1/weights',
          'vgg_19/conv4/conv4_1/biases',
          'vgg_19/conv4/conv4_2/weights',
          'vgg_19/conv4/conv4_2/biases',
          'vgg_19/conv4/conv4_3/weights',
          'vgg_19/conv4/conv4_3/biases',
          'vgg_19/conv4/conv4_4/weights',
          'vgg_19/conv4/conv4_4/biases',
          'vgg_19/conv5/conv5_1/weights',
          'vgg_19/conv5/conv5_1/biases',
          'vgg_19/conv5/conv5_2/weights',
          'vgg_19/conv5/conv5_2/biases',
          'vgg_19/conv5/conv5_3/weights',
          'vgg_19/conv5/conv5_3/biases',
          'vgg_19/conv5/conv5_4/weights',
          'vgg_19/conv5/conv5_4/biases',
          'vgg_19/fc6/weights',
          'vgg_19/fc6/biases',
          'vgg_19/fc7/weights',
          'vgg_19/fc7/biases',
          'vgg_19/fc8/weights',
          'vgg_19/fc8/biases',
      ]
      model_variables = [v.op.name for v in slim.get_model_variables()]
      self.assertSetEqual(set(model_variables), set(expected_names))

  def testEvaluation(self):
    batch_size = 2
    height, width = 224, 224
    num_classes = 1000
    with self.test_session():
      eval_inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = vgg.vgg_19(eval_inputs, is_training=False)
      self.assertListEqual(logits.get_shape().as_list(),
                           [batch_size, num_classes])
      predictions = tf.argmax(logits, 1)
      self.assertListEqual(predictions.get_shape().as_list(), [batch_size])

  def testTrainEvalWithReuse(self):
    train_batch_size = 2
    eval_batch_size = 1
    train_height, train_width = 224, 224
    eval_height, eval_width = 256, 256
    num_classes = 1000
    with self.test_session():
      train_inputs = tf.random_uniform(
          (train_batch_size, train_height, train_width, 3))
      logits, _ = vgg.vgg_19(train_inputs)
      self.assertListEqual(logits.get_shape().as_list(),
                           [train_batch_size, num_classes])
      tf.get_variable_scope().reuse_variables()
      eval_inputs = tf.random_uniform(
          (eval_batch_size, eval_height, eval_width, 3))
      logits, _ = vgg.vgg_19(eval_inputs, is_training=False,
                             spatial_squeeze=False)
      self.assertListEqual(logits.get_shape().as_list(),
                           [eval_batch_size, 2, 2, num_classes])
      logits = tf.reduce_mean(logits, [1, 2])
      predictions = tf.argmax(logits, 1)
      self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])

  def testForward(self):
    batch_size = 1
    height, width = 224, 224
    with self.test_session() as sess:
      inputs = tf.random_uniform((batch_size, height, width, 3))
      logits, _ = vgg.vgg_19(inputs)
      sess.run(tf.global_variables_initializer())
      output = sess.run(logits)
      self.assertTrue(output.any())

if __name__ == '__main__':
  tf.test.main()


================================================
FILE: models/slim/preprocessing/__init__.py
================================================


================================================
FILE: models/slim/preprocessing/cifarnet_preprocessing.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides utilities to preprocess images in CIFAR-10.

"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

_PADDING = 4

slim = tf.contrib.slim


def preprocess_for_train(image,
                         output_height,
                         output_width,
                         padding=_PADDING):
  """Preprocesses the given image for training.

  Note that the actual resizing scale is sampled from
    [`resize_size_min`, `resize_size_max`].

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    padding: The amound of padding before and after each dimension of the image.

  Returns:
    A preprocessed image.
  """
  tf.image_summary('image', tf.expand_dims(image, 0))

  # Transform the image to floats.
  image = tf.to_float(image)
  if padding > 0:
    image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]])
  # Randomly crop a [height, width] section of the image.
  distorted_image = tf.random_crop(image,
                                   [output_height, output_width, 3])

  # Randomly flip the image horizontally.
  distorted_image = tf.image.random_flip_left_right(distorted_image)

  tf.image_summary('distorted_image', tf.expand_dims(distorted_image, 0))

  # Because these operations are not commutative, consider randomizing
  # the order their operation.
  distorted_image = tf.image.random_brightness(distorted_image,
                                               max_delta=63)
  distorted_image = tf.image.random_contrast(distorted_image,
                                             lower=0.2, upper=1.8)
  # Subtract off the mean and divide by the variance of the pixels.
  return tf.image.per_image_whitening(distorted_image)


def preprocess_for_eval(image, output_height, output_width):
  """Preprocesses the given image for evaluation.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.

  Returns:
    A preprocessed image.
  """
  tf.image_summary('image', tf.expand_dims(image, 0))
  # Transform the image to floats.
  image = tf.to_float(image)

  # Resize and crop if needed.
  resized_image = tf.image.resize_image_with_crop_or_pad(image,
                                                         output_width,
                                                         output_height)
  tf.image_summary('resized_image', tf.expand_dims(resized_image, 0))

  # Subtract off the mean and divide by the variance of the pixels.
  return tf.image.per_image_whitening(resized_image)


def preprocess_image(image, output_height, output_width, is_training=False):
  """Preprocesses the given image.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    is_training: `True` if we're preprocessing the image for training and
      `False` otherwise.

  Returns:
    A preprocessed image.
  """
  if is_training:
    return preprocess_for_train(image, output_height, output_width)
  else:
    return preprocess_for_eval(image, output_height, output_width)


================================================
FILE: models/slim/preprocessing/inception_preprocessing.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides utilities to preprocess images for the Inception networks."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from tensorflow.python.ops import control_flow_ops


def apply_with_random_selector(x, func, num_cases):
  """Computes func(x, sel), with sel sampled from [0...num_cases-1].

  Args:
    x: input Tensor.
    func: Python function to apply.
    num_cases: Python int32, number of cases to sample sel from.

  Returns:
    The result of func(x, sel), where func receives the value of the
    selector as a python integer, but sel is sampled dynamically.
  """
  sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
  # Pass the real x only to one of the func calls.
  return control_flow_ops.merge([
      func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
      for case in range(num_cases)])[0]


def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
  """Distort the color of a Tensor image.

  Each color distortion is non-commutative and thus ordering of the color ops
  matters. Ideally we would randomly permute the ordering of the color ops.
  Rather then adding that level of complication, we select a distinct ordering
  of color ops for each preprocessing thread.

  Args:
    image: 3-D Tensor containing single image in [0, 1].
    color_ordering: Python int, a type of distortion (valid values: 0-3).
    fast_mode: Avoids slower ops (random_hue and random_contrast)
    scope: Optional scope for name_scope.
  Returns:
    3-D Tensor color-distorted image on range [0, 1]
  Raises:
    ValueError: if color_ordering not in [0, 3]
  """
  with tf.name_scope(scope, 'distort_color', [image]):
    if fast_mode:
      if color_ordering == 0:
        image = tf.image.random_brightness(image, max_delta=32. / 255.)
        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
      else:
        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
        image = tf.image.random_brightness(image, max_delta=32. / 255.)
    else:
      if color_ordering == 0:
        image = tf.image.random_brightness(image, max_delta=32. / 255.)
        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
        image = tf.image.random_hue(image, max_delta=0.2)
        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
      elif color_ordering == 1:
        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
        image = tf.image.random_brightness(image, max_delta=32. / 255.)
        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
        image = tf.image.random_hue(image, max_delta=0.2)
      elif color_ordering == 2:
        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
        image = tf.image.random_hue(image, max_delta=0.2)
        image = tf.image.random_brightness(image, max_delta=32. / 255.)
        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
      elif color_ordering == 3:
        image = tf.image.random_hue(image, max_delta=0.2)
        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
        image = tf.image.random_brightness(image, max_delta=32. / 255.)
      else:
        raise ValueError('color_ordering must be in [0, 3]')

    # The random_* ops do not necessarily clamp.
    return tf.clip_by_value(image, 0.0, 1.0)


def distorted_bounding_box_crop(image,
                                bbox,
                                min_object_covered=0.1,
                                aspect_ratio_range=(0.75, 1.33),
                                # area_range=(0.05, 1.0),
                                area_range=(0.85, 1.0),
                                max_attempts=100,
                                scope=None):
  """Generates cropped_image using a one of the bboxes randomly distorted.

  See `tf.image.sample_distorted_bounding_box` for more documentation.

  Args:
    image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
      where each coordinate is [0, 1) and the coordinates are arranged
      as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
      image.
    min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
      area of the image must contain at least this fraction of any bounding box
      supplied.
    aspect_ratio_range: An optional list of `floats`. The cropped area of the
      image must have an aspect ratio = width / height within this range.
    area_range: An optional list of `floats`. The cropped area of the image
      must contain a fraction of the supplied image within in this range.
    max_attempts: An optional `int`. Number of attempts at generating a cropped
      region of the image of the specified constraints. After `max_attempts`
      failures, return the entire image.
    scope: Optional scope for name_scope.
  Returns:
    A tuple, a 3-D Tensor cropped_image and the distorted bbox
  """
  with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]):
    # Each bounding box has shape [1, num_boxes, box coords] and
    # the coordinates are ordered [ymin, xmin, ymax, xmax].

    # A large fraction of image datasets contain a human-annotated bounding
    # box delineating the region of the image containing the object of interest.
    # We choose to create a new bounding box for the object which is a randomly
    # distorted version of the human-annotated bounding box that obeys an
    # allowed range of aspect ratios, sizes and overlap with the human-annotated
    # bounding box. If no box is supplied, then we assume the bounding box is
    # the entire image.
    sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
        tf.shape(image),
        bounding_boxes=bbox,
        min_object_covered=min_object_covered,
        aspect_ratio_range=aspect_ratio_range,
        area_range=area_range,
        max_attempts=max_attempts,
        use_image_if_no_bounding_boxes=True)
    bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box

    # Crop the image to the specified bounding box.
    cropped_image = tf.slice(image, bbox_begin, bbox_size)
    return cropped_image, distort_bbox


def preprocess_for_train(image, height, width, bbox,
                         fast_mode=True,
                         scope=None):
  """Distort one image for training a network.

  Distorting images provides a useful technique for augmenting the data
  set during training in order to make the network invariant to aspects
  of the image that do not effect the label.

  Additionally it would create image_summaries to display the different
  transformations applied to the image.

  Args:
    image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
      [0, 1], otherwise it would converted to tf.float32 assuming that the range
      is [0, MAX], where MAX is largest positive representable number for
      int(8/16/32) data type (see `tf.image.convert_image_dtype` for details).
    height: integer
    width: integer
    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
      where each coordinate is [0, 1) and the coordinates are arranged
      as [ymin, xmin, ymax, xmax].
    fast_mode: Optional boolean, if True avoids slower transformations (i.e.
      bi-cubic resizing, random_hue or random_contrast).
    scope: Optional scope for name_scope.
  Returns:
    3-D float Tensor of distorted image used for training with range [-1, 1].
  """
  with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
    if bbox is None:
      bbox = tf.constant([0.0, 0.0, 1.0, 1.0],
                         dtype=tf.float32,
                         shape=[1, 1, 4])
    if image.dtype != tf.float32:
      image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    # Each bounding box has shape [1, num_boxes, box coords] and
    # the coordinates are ordered [ymin, xmin, ymax, xmax].
    # image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
    #                                               bbox)
    # tf.summary.image('image_with_bounding_boxes', image_with_box)
    
    image_channels = image.get_shape().as_list()[-1]
    distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox)
    # Restore the shape since the dynamic slice based upon the bbox_size loses
    # the third dimension.
    distorted_image.set_shape([None, None, image_channels])
    # image_with_distorted_box = tf.image.draw_bounding_boxes(
    #     tf.expand_dims(image, 0), distorted_bbox)
    # tf.summary.image('images_with_distorted_bounding_box',
    #                  image_with_distorted_box)

    # This resizing operation may distort the images because the aspect
    # ratio is not respected. We select a resize method in a round robin
    # fashion based on the thread number.
    # Note that ResizeMethod contains 4 enumerated resizing methods.

    # We select only 1 case for fast_mode bilinear.
    num_resize_cases = 1 if fast_mode else 4
    distorted_image = apply_with_random_selector(
        distorted_image,
        lambda x, method: tf.image.resize_images(x, [height, width], method=method),
        num_cases=num_resize_cases)

    # tf.summary.image('cropped_resized_image',
    #                  tf.expand_dims(distorted_image, 0))

    # Randomly flip the image horizontally.
    distorted_image = tf.image.random_flip_left_right(distorted_image)

    # Randomly distort the colors. There are 4 ways to do it.
    # rgirdhar: Stop distorting colors
    # distorted_image = apply_with_random_selector(
    #     distorted_image,
    #     lambda x, ordering: distort_color(x, ordering, fast_mode),
    #     num_cases=4)

    # tf.summary.image('final_distorted_image',
    #                  tf.expand_dims(distorted_image, 0))
    distorted_image -= 0.5
    distorted_image *= 2.0
    return distorted_image


def preprocess_for_eval(image, height, width,
                        central_fraction=0.875, scope=None):
  """Prepare one image for evaluation.

  If height and width are specified it would output an image with that size by
  applying resize_bilinear.

  If central_fraction is specified it would cropt the central fraction of the
  input image.

  Args:
    image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
      [0, 1], otherwise it would converted to tf.float32 assuming that the range
      is [0, MAX], where MAX is largest positive representable number for
      int(8/16/32) data type (see `tf.image.convert_image_dtype` for details)
    height: integer
    width: integer
    central_fraction: Optional Float, fraction of the image to crop.
    scope: Optional scope for name_scope.
  Returns:
    3-D float Tensor of prepared image.
  """
  with tf.name_scope(scope, 'eval_image', [image, height, width]):
    if image.dtype != tf.float32:
      image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    # Crop the central region of the image with an area containing 87.5% of
    # the original image.
    if central_fraction:
      image = tf.image.central_crop(image, central_fraction=central_fraction)

    if height and width:
      # Resize the image to the specified height and width.
      image = tf.expand_dims(image, 0)
      image = tf.image.resize_bilinear(image, [height, width],
                                       align_corners=False)
      image = tf.squeeze(image, [0])
    image -= 0.5
    image *= 2.0
    return image


def preprocess_image(image, height, width,
                     is_training=False,
                     resize_side_min=None,  # this and next are only cos VGG
                                            # uses these. No effect here.
                     resize_side_max=None,
                     bbox=None,
                     fast_mode=True):
  """Pre-process one image for training or evaluation.

  Args:
    image: 3-D Tensor [height, width, channels] with the image.
    height: integer, image expected height.
    width: integer, image expected width.
    is_training: Boolean. If true it would transform an image for train,
      otherwise it would transform it for evaluation.
    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
      where each coordinate is [0, 1) and the coordinates are arranged as
      [ymin, xmin, ymax, xmax].
    fast_mode: Optional boolean, if True avoids slower transformations.

  Returns:
    3-D float Tensor containing an appropriately scaled image

  Raises:
    ValueError: if user does not provide bounding box
  """
  if is_training:
    return preprocess_for_train(image, height, width, bbox, fast_mode)
  else:
    return preprocess_for_eval(image, height, width)


================================================
FILE: models/slim/preprocessing/lenet_preprocessing.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides utilities for preprocessing."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim


def preprocess_image(image, output_height, output_width, is_training):
  """Preprocesses the given image.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    is_training: `True` if we're preprocessing the image for training and
      `False` otherwise.

  Returns:
    A preprocessed image.
  """
  image = tf.to_float(image)
  image = tf.image.resize_image_with_crop_or_pad(
      image, output_width, output_height)
  image = tf.sub(image, 128.0)
  image = tf.div(image, 128.0)
  return image


================================================
FILE: models/slim/preprocessing/preprocessing_factory.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains a factory for building various models."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from preprocessing import cifarnet_preprocessing
from preprocessing import inception_preprocessing
from preprocessing import lenet_preprocessing
from preprocessing import vgg_preprocessing

slim = tf.contrib.slim


def get_preprocessing(name, is_training=False):
  """Returns preprocessing_fn(image, height, width, **kwargs).

  Args:
    name: The name of the preprocessing function.
    is_training: `True` if the model is being used for training and `False`
      otherwise.

  Returns:
    preprocessing_fn: A function that preprocessing a single image (pre-batch).
      It has the following signature:
        image = preprocessing_fn(image, output_height, output_width, ...).

  Raises:
    ValueError: If Preprocessing `name` is not recognized.
  """
  preprocessing_fn_map = {
      'cifarnet': cifarnet_preprocessing,
      'inception': inception_preprocessing,
      'inception_v1': inception_preprocessing,
      'inception_v2': inception_preprocessing,
      'inception_v2_tsn': vgg_preprocessing,  # Its wts are copied from caffe
      'inception_v3': inception_preprocessing,
      'inception_v4': inception_preprocessing,
      'inception_resnet_v2': inception_preprocessing,
      'lenet': lenet_preprocessing,
      'resnet_v1_50': vgg_preprocessing,
      'resnet_v1_101': vgg_preprocessing,
      'resnet_v1_152': vgg_preprocessing,
      'vgg': vgg_preprocessing,
      'vgg_a': vgg_preprocessing,
      'vgg_16': vgg_preprocessing,
      'vgg_19': vgg_preprocessing,
  }

  if name not in preprocessing_fn_map:
    raise ValueError('Preprocessing name [%s] was not recognized' % name)

  def preprocessing_fn(image, output_height, output_width, **kwargs):
    # preprocess 4D images (with [frames_per_vid, ht, wd, c])
    expanded_dim = False
    if image.get_shape().ndims == 3:
      expanded_dim = True
      image = tf.expand_dims(image, 0)
    res = tf.stack([preprocessing_fn_map[name].preprocess_image(
      el, output_height, output_width, is_training=is_training, **kwargs)
      for el in tf.unstack(image)])
    if expanded_dim:
      res = res[0]
    return res

  return preprocessing_fn


================================================
FILE: models/slim/preprocessing/vgg_preprocessing.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides utilities to preprocess images.

The preprocessing steps for VGG were introduced in the following technical
report:

  Very Deep Convolutional Networks For Large-Scale Image Recognition
  Karen Simonyan and Andrew Zisserman
  arXiv technical report, 2015
  PDF: http://arxiv.org/pdf/1409.1556.pdf
  ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
  CC-BY-4.0

More information can be obtained from the VGG website:
www.robots.ox.ac.uk/~vgg/research/very_deep/
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import numpy as np

from tensorflow.python.ops import control_flow_ops

slim = tf.contrib.slim

# _R_MEAN = 123.68
# _G_MEAN = 116.78
# _B_MEAN = 103.94
_MEAN = 128.0  # rgirdhar: changing to this for easier handling of label channel

_RESIZE_SIDE_MIN = 512
# _RESIZE_SIDE_MAX = 512
_RESIZE_SIDE_MAX = 512  # for pose, I don't want to loose too much


def _crop(image, offset_height, offset_width, crop_height, crop_width):
  """Crops the given image using the provided offsets and sizes.

  Note that the method doesn't assume we know the input image size but it does
  assume we know the input image rank.

  Args:
    image: an image of shape [height, width, channels].
    offset_height: a scalar tensor indicating the height offset.
    offset_width: a scalar tensor indicating the width offset.
    crop_height: the height of the cropped image.
    crop_width: the width of the cropped image.

  Returns:
    the cropped (and resized) image.

  Raises:
    InvalidArgumentError: if the rank is not 3 or if the image dimensions are
      less than the crop size.
  """
  original_shape = tf.shape(image)

  rank_assertion = tf.Assert(
      tf.equal(tf.rank(image), 3),
      ['Rank of image must be equal to 3.'])
  cropped_shape = control_flow_ops.with_dependencies(
      [rank_assertion],
      tf.stack([crop_height, crop_width, original_shape[2]]))

  size_assertion = tf.Assert(
      tf.logical_and(
          tf.greater_equal(original_shape[0], crop_height),
          tf.greater_equal(original_shape[1], crop_width)),
      ['Crop size greater than the image size.'])

  offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))

  # Use tf.slice instead of crop_to_bounding box as it accepts tensors to
  # define the crop size.
  image = control_flow_ops.with_dependencies(
      [size_assertion],
      tf.slice(image, offsets, cropped_shape))
  return tf.reshape(image, cropped_shape)


def _random_crop(image_list, crop_height, crop_width, preproc_info):
  """Crops the given list of images.

  The function applies the same crop to each image in the list. This can be
  effectively applied when there are multiple image inputs of the same
  dimension such as:

    image, depths, normals = _random_crop([image, depths, normals], 120, 150)

  Args:
    image_list: a list of image tensors of the same dimension but possibly
      varying channel.
    crop_height: the new height.
    crop_width: the new width.

  Returns:
    the image_list with cropped images.

  Raises:
    ValueError: if there are multiple image inputs provided with different size
      or the images are smaller than the crop dimensions.
  """
  if not image_list:
    raise ValueError('Empty image_list.')

  # Compute the rank assertions.
  rank_assertions = []
  for i in range(len(image_list)):
    image_rank = tf.rank(image_list[i])
    rank_assert = tf.Assert(
        tf.equal(image_rank, 3),
        ['Wrong rank for tensor  %s [expected] [actual]',
         image_list[i].name, 3, image_rank])
    rank_assertions.append(rank_assert)

  image_shape = control_flow_ops.with_dependencies(
      [rank_assertions[0]],
      tf.shape(image_list[0]))
  image_height = image_shape[0]
  image_width = image_shape[1]
  crop_size_assert = tf.Assert(
      tf.logical_and(
          tf.greater_equal(image_height, crop_height),
          tf.greater_equal(image_width, crop_width)),
      ['Crop size greater than the image size.'])

  asserts = [rank_assertions[0], crop_size_assert]

  for i in range(1, len(image_list)):
    image = image_list[i]
    asserts.append(rank_assertions[i])
    shape = control_flow_ops.with_dependencies([rank_assertions[i]],
                                               tf.shape(image))
    height = shape[0]
    width = shape[1]

    height_assert = tf.Assert(
        tf.equal(height, image_height),
        ['Wrong height for tensor %s [expected][actual]',
         image.name, height, image_height])
    width_assert = tf.Assert(
        tf.equal(width, image_width),
        ['Wrong width for tensor %s [expected][actual]',
         image.name, width, image_width])
    asserts.extend([height_assert, width_assert])

  # Create a random bounding box.
  #
  # Use tf.random_uniform and not numpy.random.rand as doing the former would
  # generate random numbers at graph eval time, unlike the latter which
  # generates random numbers at graph definition time.
  max_offset_height = control_flow_ops.with_dependencies(
      asserts, tf.reshape(image_height - crop_height + 1, []))
  max_offset_width = control_flow_ops.with_dependencies(
      asserts, tf.reshape(image_width - crop_width + 1, []))
  offset_height = tf.random_uniform(
      [], maxval=max_offset_height, dtype=tf.int32)
  offset_width = tf.random_uniform(
      [], maxval=max_offset_width, dtype=tf.int32)

  preproc_info['crop_info'] = [
    offset_height, offset_width, crop_height, crop_width]
  return [_crop(image, offset_height, offset_width,
                crop_height, crop_width) for image in image_list]


def _central_crop(image_list, crop_height, crop_width):
  """Performs central crops of the given image list.

  Args:
    image_list: a list of image tensors of the same dimension but possibly
      varying channel.
    crop_height: the height of the image following the crop.
    crop_width: the width of the image following the crop.

  Returns:
    the list of cropped images.
  """
  outputs = []
  for image in image_list:
    image_height = tf.shape(image)[0]
    image_width = tf.shape(image)[1]

    offset_height = (image_height - crop_height) / 2
    offset_width = (image_width - crop_width) / 2

    outputs.append(_crop(image, offset_height, offset_width,
                         crop_height, crop_width))
  return outputs


def _mean_image_subtraction(image, means):
  """Subtracts the given means from each image channel.

  For example:
    means = [123.68, 116.779, 103.939]
    image = _mean_image_subtraction(image, means)

  Note that the rank of `image` must be known.

  Args:
    image: a tensor of size [height, width, C].
    means: a C-vector of values to subtract from each channel.

  Returns:
    the centered image.

  Raises:
    ValueError: If the rank of `image` is unknown, if `image` has a rank other
      than three or if the number of channels in `image` doesn't match the
      number of values in `means`.
  """
  # if image.get_shape().ndims != 3:
  #   raise ValueError('Input must be of size [height, width, C>0]')
  num_channels = image.get_shape().as_list()[-1]
  if len(means) != num_channels:
    raise ValueError('len(means) must match the number of channels')

  channels = tf.split(image, num_channels, 2)
  for i in range(num_channels):
    channels[i] -= means[i]
  return tf.concat(channels, 2)


def _smallest_size_at_least(height, width, smallest_side):
  """Computes new shape with the smallest side equal to `smallest_side`.

  Computes new shape with the smallest side equal to `smallest_side` while
  preserving the original aspect ratio.

  Args:
    height: an int32 scalar tensor indicating the current height.
    width: an int32 scalar tensor indicating the current width.
    smallest_side: A python integer or scalar `Tensor` indicating the size of
      the smallest side after resize.

  Returns:
    new_height: an int32 scalar tensor indicating the new height.
    new_width: and int32 scalar tensor indicating the new width.
  """
  smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)

  height = tf.to_float(height)
  width = tf.to_float(width)
  smallest_side = tf.to_float(smallest_side)

  scale = tf.cond(tf.greater(height, width),
                  lambda: smallest_side / width,
                  lambda: smallest_side / height)
  new_height = tf.to_int32(height * scale)
  new_width = tf.to_int32(width * scale)
  return new_height, new_width


def _aspect_preserving_resize(image, smallest_side):
  """Resize images preserving the original aspect ratio.

  Args:
    image: A 3-D image `Tensor`.
    smallest_side: A python integer or scalar `Tensor` indicating the size of
      the smallest side after resize.

  Returns:
    resized_image: A 3-D tensor containing the resized image.
  """
  num_channels = image.get_shape().as_list()[-1]
  smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)

  shape = tf.shape(image)
  height = shape[0]
  width = shape[1]
  new_height, new_width = _smallest_size_at_least(height, width, smallest_side)
  image = tf.expand_dims(image, 0)
  resized_image = tf.image.resize_bilinear(image, [new_height, new_width],
                                           align_corners=False)
  resized_image = tf.squeeze(resized_image)
  resized_image.set_shape([None, None, num_channels])
  return resized_image


def preprocess_for_train(image,
                         output_height,
                         output_width,
                         resize_side_min=_RESIZE_SIDE_MIN,
                         resize_side_max=_RESIZE_SIDE_MAX,
                         preproc_info={}, modality='rgb'):
  """Preprocesses the given image for training.

  Note that the actual resizing scale is sampled from
    [`resize_size_min`, `resize_size_max`].

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    resize_side_min: The lower bound for the smallest side of the image for
      aspect-preserving resizing.
    resize_side_max: The upper bound for the smallest side of the image for
      aspect-preserving resizing.

  Returns:
    A preprocessed image.
  """
  num_channels = image.get_shape().as_list()[-1]
  resize_side = tf.random_uniform(
      [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32)

  image = _aspect_preserving_resize(image, resize_side)
  preproc_info['image_shape'] = tf.shape(image)
  image = _random_crop([image], output_height, output_width, preproc_info)[0]
  image.set_shape([output_height, output_width, num_channels])
  image = tf.to_float(image)
  image, whether_flip = tf.cond(
    tf.greater(tf.random_uniform((), 0, 1, tf.float32), 0.5),
    lambda: tf.tuple([tf.image.flip_left_right(image), tf.constant(True)]),
    lambda: tf.tuple([image, tf.constant(False)]))
  if modality.startswith('flow'):
    tf.logging.info('Subtracting 255-x from X-flow for flips. Flow input.')
    assert(num_channels % 2 == 0)
    flow_img_flip = image
    alt_mat = np.ones([
      image.get_shape().as_list()[-3],
      image.get_shape().as_list()[-2],
      num_channels])
    alt_mat[..., np.arange(0, num_channels, 2)] *= -1
    IMG_SCALER = 256.0
    flow_img_flip = (flow_img_flip - IMG_SCALER/2) * alt_mat + IMG_SCALER/2
    image = tf.cond(
      whether_flip,
      lambda: flow_img_flip,
      lambda: image)
  preproc_info['whether_flip'] = whether_flip
  # tf.image.random_flip_left_right(image)

  # return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])
  return _mean_image_subtraction(image, [_MEAN] * num_channels)


def preprocess_for_eval(image, output_height, output_width, resize_side):
  """Preprocesses the given image for evaluation.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    resize_side: The smallest side of the image for aspect-preserving resizing.

  Returns:
    A preprocessed image.
  """
  num_channels = image.get_shape().as_list()[-1]
  image = _aspect_preserving_resize(image, resize_side)
  image = _central_crop([image], output_height, output_width)[0]
  image.set_shape([output_height, output_width, num_channels])
  image = tf.to_float(image)
  return _mean_image_subtraction(image, [_MEAN] * num_channels)
  # return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])


def preprocess_image(image, output_height, output_width, is_training=False,
                     resize_side_min=_RESIZE_SIDE_MIN,
                     resize_side_max=_RESIZE_SIDE_MAX,
                     preproc_info={}, modality='rgb'):
  """Preprocesses the given image.

  Args:
    image: A `Tensor` representing an image of arbitrary size.
    output_height: The height of the image after preprocessing.
    output_width: The width of the image after preprocessing.
    is_training: `True` if we're preprocessing the image for training and
      `False` otherwise.
    resize_side_min: The lower bound for the smallest side of the image for
      aspect-preserving resizing. If `is_training` is `False`, then this value
      is used for rescaling.
    resize_side_max: The upper bound for the smallest side of the image for
      aspect-preserving resizing. If `is_training` is `False`, this value is
      ignored. Otherwise, the resize side is sampled from
        [resize_size_min, resize_size_max].
    preproc_info: will return all the information for the preprocessing,
      including sizes, positions, flip or not etc, which can then be replayed
      onto other images (specifically to be used for target heatmaps).
      It should contain:
        - whether_flip: Bool tensor: whether the image was flipped or not
        - image_shape: [3,1] tensor: size of the original image after resize
        - crop_info: [offset_ht, offset_wd, crop_ht, crop_wd]

  Returns:
    A preprocessed image.
  """
  if is_training:
    return preprocess_for_train(image, output_height, output_width,
                                resize_side_min, resize_side_max,
                                preproc_info, modality)
  else:
    return preprocess_for_eval(image, output_height, output_width,
                               resize_side_min)


================================================
FILE: src/config.py
================================================
"""Config System
"""

import os
import os.path as osp
import numpy as np
from easydict import EasyDict as edict

__C = edict()
# Consumers can get config by:
#   from fast_rcnn_config import cfg
cfg = __C


#
# Input options
#


__C.INPUT = edict()

# normal: normal image
# rendered-pose: rendered pose on black bg
# rendered-pose-on-image: rendered onto the image
__C.INPUT.INPUT_IMAGE_FORMAT = 'normal'

# pose renders can be 'rgb' or 'split-channel'
__C.INPUT.INPUT_IMAGE_FORMAT_POSE_RENDER_TYPE = 'rgb'

# input glimpse options
__C.INPUT.POSE_GLIMPSE_CONTEXT_RATIO = 0.0  # ratio of glimpse area to pad around
# set the following to true to resize the output to [IMAGE_SIZE, IMAGE_SIZE]
# square
__C.INPUT.POSE_GLIMPSE_RESIZE = False
# list part sof the pose to keep in glimpse. Empty => all parts to keep
__C.INPUT.POSE_GLIMPSE_PARTS_KEEP = []


__C.INPUT.SPLIT_ID = 1  # for dataset with multiple splits (hmdb)

# FOR VIDEO
__C.INPUT.VIDEO = edict()
__C.INPUT.VIDEO.MODALITY = 'rgb'  # rgb/flow5/flow10 etc

#
# Training options
#

__C.TRAIN = edict()

# Minibatch size
__C.TRAIN.BATCH_SIZE = 10

__C.TRAIN.WEIGHT_DECAY = 0.0005

# set to a positive value to clip the gradients at that l2 norm
__C.TRAIN.CLIP_GRADIENTS = -1.0

# the following should have been in the INPUT, but are here for historical
# reasons
__C.TRAIN.IMAGE_SIZE = 450  # final cropped image size
__C.TRAIN.RESIZE_SIDE = 480  # resize the input image to this size for preproc
## The RESIZE_SIDE is the size for the smallest side, so be careful,
## MPII has images with extreme ratios
## Note that if the difference RESIZE_SIDE to IMAGE_SIZE is too high,
## most of the image being fed into the network will be small parts of the
## image

# This is the side of the heatmap before putting into queues
# Ideally, resize it to the final target size so that there is no
# need for a resize before computing loss. For inception-v2 with 450 input, the
# output is 15x15
__C.TRAIN.FINAL_POSE_HMAP_SIDE = 15

__C.TRAIN.LABEL_SMOOTHING = False

__C.TRAIN.MOVING_AVERAGE_VARIABLES = None

__C.TRAIN.LEARNING_RATE = 0.01
__C.TRAIN.LEARNING_RATE_DECAY_RATE = 0.33
__C.TRAIN.END_LEARNING_RATE = 0.00001

__C.TRAIN.NUM_STEPS_PER_DECAY = 0  # if this is not 0, the NUM_EPOCHS_PER_DECAY
                                   # is ignored and this is used
__C.TRAIN.NUM_EPOCHS_PER_DECAY = 40.0

__C.TRAIN.LEARNING_RATE_DECAY_TYPE = 'exponential'


__C.TRAIN.OPTIMIZER = 'momentum'
__C.TRAIN.MOMENTUM = 0.9
__C.TRAIN.ADAM_BETA1 = 0.9
__C.TRAIN.ADAM_BETA2 = 0.999
__C.TRAIN.OPT_EPSILON = 1.0

__C.TRAIN.TRAINABLE_SCOPES = ''

__C.TRAIN.MAX_NUMBER_OF_STEPS = 100000

__C.TRAIN.LOG_EVERY_N_STEPS = 10

__C.TRAIN.SAVE_SUMMARIES_SECS = 300

__C.TRAIN.SAVE_INTERVAL_SECS = 1800

__C.TRAIN.IGNORE_MISSING_VARS = True

__C.TRAIN.CHECKPOINT_PATH = 'data/pretrained_models/inception_v3.ckpt'

# __C.TRAIN.CHECKPOINT_EXCLUDE_SCOPES = 'InceptionV3/Logits,InceptionV3/AuxLogits,PoseLogits'
__C.TRAIN.CHECKPOINT_EXCLUDE_SCOPES = ''

__C.TRAIN.DATASET_SPLIT_NAME = 'trainval_train'

# loss fn can be from the list or empty '', i.e. no loss on that modality
__C.TRAIN.LOSS_FN_POSE = 'l2'  # can be 'l2'/'log-loss'/'sigmoid-log-loss'/'cosine-loss'
__C.TRAIN.LOSS_FN_POSE_WT = 1.0
__C.TRAIN.LOSS_FN_POSE_SAMPLED = False  # Harder loss, sample the negatives
__C.TRAIN.LOSS_FN_ACTION = 'softmax-xentropy'  # can be 'softmax-xentropy'
__C.TRAIN.LOSS_FN_ACTION_WT = 1.0

__C.TRAIN.VAR_NAME_MAPPER = ''  # to be used when loading from npy checkpoints
                                # see options in restore/var_name_mapper.py

__C.TRAIN.VIDEO_FRAMES_PER_VIDEO = 1

# If true, divide the video into segments and read
# a random frame from that segment
__C.TRAIN.READ_SEGMENT_STYLE = False

__C.TRAIN.ITER_SIZE = 1  # accumulate gradients over this many iterations

__C.TRAIN.OTHER_IMG_SUMMARIES_TO_ADD = ['PosePrelogitsBasedAttention']

#
# Testing options
#

__C.TEST = edict()

__C.TEST.BATCH_SIZE = 10

__C.TEST.DATASET_SPLIT_NAME = 'trainval_val'

__C.TEST.MAX_NUM_BATCHES = None

__C.TEST.CHECKPOINT_PATH = b''

__C.TEST.MOVING_AVERAGE_DECAY = None

__C.TEST.VIDEO_FRAMES_PER_VIDEO = 1  # single image dataset. Set 25 for hmdb

__C.TEST.EVAL_METRIC = ''  # normal eval. Set ='mAP' to compute that.


#
# Network properties
#

__C.NET = edict()
# The following replaces the action logits with one computed by weighting the
# output using pose heatmaps
__C.NET.USE_POSE_ATTENTION_LOGITS = False
__C.NET.USE_POSE_ATTENTION_LOGITS_DIMS = [-1]  # by default use all parts
# set following true to have a heatmap as the avg of all heatmaps
__C.NET.USE_POSE_ATTENTION_LOGITS_AVGED_HMAP = False


# The following will replace the action logits with one computed over the last
# pose logits
__C.NET.USE_POSE_LOGITS_DIRECTLY = False
# set true to also have the actual logits concatenated to the output
__C.NET.USE_POSE_LOGITS_DIRECTLY_PLUS_LOGITS = False
# Another version, after talking to Deva on March 20, 2017. Concat before avg
# pool and remove the extra layer.
# The following by default contain the image logits
__C.NET.USE_POSE_LOGITS_DIRECTLY_v2 = False
__C.NET.USE_POSE_LOGITS_DIRECTLY_v2_EXTRA_LAYER = False

# The following will replace the action logits with a one computed using an
# unconstrained attention predictor based on the pose output
__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION = False
# REMOVED THIS TO DEPRECATE
# # setting the following to true basically just reproduces the original system
# # (doesnot use any attention). I just used it to debug that this can reproduce
# # the original numbers (nothing else got screwed up)
# __C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_DEBUG = False
# set the following to more to have more layers predicting the unconstrained
# attention map
# DEPRECATING the following, commented out for now, will be removed later.
# __C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_NLAYERS = 1
# set True to enforce the attention map that is learnt to be passed  through a
# spatial softmax
__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_SOFTMAX_ATT = False
# Pass the attention through a relu
__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_RELU_ATT = False
# 21 April 2017: This is not DEPRECATED because it didn't help, so it won't
# work with code now. This was to simplify code for TopDownAttention endpoint
# # Create an attention map for each class
# adding it again on July 26, 2017 for NIPS17 rebuttal
__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_PER_CLASS = False
# Train attention directly over image features
__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_SINGLE_LAYER_ATT = False
# Add the predicted pose to the logits features
__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_WITH_POSE_FEAT = False
# 2-layers over the pose logits
__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_WITH_POSE_FEAT_2LAYER = False
# Allow for Rank > 1 approximation. Other options might not work with this
__C.NET.USE_POSE_PRELOGITS_BASED_ATTENTION_RANK = 1

# Do attention on temporal pooling as well
__C.NET.USE_TEMPORAL_ATT = False

# Bilinear pooling baselines
__C.NET.USE_COMPACT_BILINEAR_POOLING = False

# Set which endpoint serves as the output for pose
__C.NET.LAST_CONV_MAP_FOR_POSE = edict()
__C.NET.LAST_CONV_MAP_FOR_POSE.inception_v2_tsn = 'InceptionV2_TSN/inception_5a'
__C.NET.LAST_CONV_MAP_FOR_POSE.inception_v3 = 'Mixed_7c'
__C.NET.LAST_CONV_MAP_FOR_POSE.resnet_v1_101 = 'resnet_v1_101/block4'
__C.NET.LAST_CONV_MAP_FOR_POSE.vgg_16 = 'vgg_16/conv5'


# Train the top BN. Useful when training flow/multi-channel inputs other than
# RGB. In case of ResNet, this means "train only top_bn", and keep others
# fixed.
__C.NET.TRAIN_TOP_BN = False
# Dropout
# -1 (<0) => Use the network default. Else, use this value
__C.NET.DROPOUT = -1.0

#
# MISC
#

# For reproducibility
__C.RNG_SEED = 42

# A small number that's used many times
__C.EPS = 1e-14

# Root directory of project
__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))

# Data directory
__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))

# Model directory
__C.EXP_DIR = 'expt_outputs/'

__C.DATASET_NAME = 'mpii'

__C.DATASET_DIR = 'data/mpii/mpii_tfrecords'

# Set the following if using the train_test files from non-std location
__C.DATASET_LIST_DIR = ''

__C.MODEL_NAME = 'inception_v3'

__C.NUM_READERS = 4

__C.NUM_PREPROCESSING_THREADS = 4

__C.GPUS = '2'

__C.HEATMAP_MARKER_WD_RATIO = 0.1

__C.MAX_INPUT_IMAGE_SIZE = 512  # to avoid arbitrarily huge input images

# ['one-label'/'multi-label%d']
__C.INPUT_FILE_STYLE_LABEL = ''


def get_output_dir(config_file_name):
    """Return the directory where experimental artifacts are placed.
    If the directory does not exist, it is created.

    A canonical path is built using the name from an imdb and a network
    (if not None).
    """
    outdir = osp.abspath(osp.join(__C.EXP_DIR, osp.basename(config_file_name)))
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    return outdir

def _merge_a_into_b(a, b):
    """Merge config dictionary a into config dictionary b, clobbering the
    options in b whenever they are also specified in a.
    """
    if type(a) is not edict:
        return

    for k, v in a.iteritems():
        # a must specify keys that are in b
        if not b.has_key(k):
            raise KeyError('{} is not a valid config key'.format(k))

        # the types must match, too
        old_type = type(b[k])
        if old_type is not type(v):
            if isinstance(b[k], np.ndarray):
                v = np.array(v, dtype=b[k].dtype)
            else:
                raise ValueError(('Type mismatch ({} vs. {}) '
                                'for config key: {}').format(type(b[k]),
                                                            type(v), k))

        # recursively merge dicts
        if type(v) is edict:
            try:
                _merge_a_into_b(a[k], b[k])
            except:
                print('Error under config key: {}'.format(k))
                raise
        else:
            b[k] = v

def cfg_from_file(filename):
    """Load a config file and merge it into the default options."""
    import yaml
    with open(filename, 'r') as f:
        yaml_cfg = edict(yaml.load(f))

    _merge_a_into_b(yaml_cfg, __C)

def cfg_from_list(cfg_list):
    """Set config keys via list (e.g., from command line)."""
    from ast import literal_eval
    assert len(cfg_list) % 2 == 0
    for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
        key_list = k.split('.')
        d = __C
        for subkey in key_list[:-1]:
            assert d.has_key(subkey)
            d = d[subkey]
        subkey = key_list[-1]
        assert d.has_key(subkey)
        try:
            value = literal_eval(v)
        except:
            # handle the case when v is a string literal
            value = v
        assert type(value) == type(d[subkey]), \
            'type {} does not match original type {}'.format(
            type(value), type(d[subkey]))
        d[subkey] = value


================================================
FILE: src/custom_ops/Makefile
================================================
BOOST_DIR := /home/rgirdhar/Software/basic/boost/install2/
BOOST_LIB_DIR := $(BOOST_DIR)/lib
BOOST_INC_DIR := $(BOOST_DIR)/include
TF_INC := $(shell python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
LDFLAGS := -Wl,-rpath,$(BOOST_LIB_DIR)  # this ensures it will look in the correct BOOST directory for libs (not the system path)

all: pose_to_heatmap.so zero_out_channels.so render_pose.so render_objects.so

pose_to_heatmap.so: pose_to_heatmap.cc
	g++ -std=c++11 $(LDFLAGS) -shared -I$(BOOST_INC_DIR) `pkg-config --cflags --libs opencv` pose_to_heatmap.cc -o pose_to_heatmap.so -fPIC -I $(TF_INC) -O2 -L$(BOOST_LIB_DIR) -lboost_system -lboost_filesystem -lboost_thread

zero_out_channels.so: zero_out_channels.cc
	g++ -std=c++11 $(LDFLAGS) -shared zero_out_channels.cc -o zero_out_channels.so -fPIC -I $(TF_INC) -O2

render_pose.so: render_pose.cc pose_utils.hpp
	g++ -std=c++11 $(LDFLAGS) -shared -I$(BOOST_INC_DIR) `pkg-config --cflags --libs opencv` render_pose.cc -o render_pose.so -fPIC -I $(TF_INC) -O2 -L$(BOOST_LIB_DIR) -lboost_system -lboost_filesystem -lboost_thread

render_objects.so: render_objects.cc
	g++ -std=c++11 $(LDFLAGS) -shared -I$(BOOST_INC_DIR) `pkg-config --cflags --libs opencv` render_objects.cc -o render_objects.so -fPIC -I $(TF_INC) -O2 -L$(BOOST_LIB_DIR) -lboost_system -lboost_filesystem -lboost_thread


================================================
FILE: src/custom_ops/__init__.py
================================================


================================================
FILE: src/custom_ops/custom_ops_factory.py
================================================
import os
import json
from collections import OrderedDict
import numpy as np
import tensorflow as tf

cur_path = os.path.realpath(__file__)
ROOT_PATH = os.path.dirname(cur_path)

# add any new ops under the following
pose_to_heatmap_fn = tf.load_op_library(
  os.path.join(ROOT_PATH, 'pose_to_heatmap.so')).pose_to_heatmap
zero_out_channels_fn = tf.load_op_library(
  os.path.join(ROOT_PATH, 'zero_out_channels.so')).zero_out_channels
render_pose_fn = tf.load_op_library(
  os.path.join(ROOT_PATH, 'render_pose.so')).render_pose
render_objects_fn = tf.load_op_library(
  os.path.join(ROOT_PATH, 'render_objects.so')).render_objects

def pose_to_heatmap(*args, **kwargs):
  with tf.variable_scope('pose_to_heatmap_pyWrapper'):
    pose_img, pose_valid = pose_to_heatmap_fn(*args, **kwargs)
    out_channels = kwargs['out_channels']
    pose_img.set_shape((None, None, out_channels))
    pose_valid.set_shape((out_channels,))
    pose_img *= 255.0
    pose_img = tf.cast(pose_img, tf.uint8)
  return pose_img, pose_valid

def zero_out_channels(*args, **kwargs):
  with tf.variable_scope('zero_out_channels_pyWrapper'):
    return zero_out_channels_fn(*args, **kwargs)

def render_pose(*args, **kwargs):
  with tf.variable_scope('render_pose_pyWrapper'):
    out_channels = 3
    if kwargs['out_type'] == 'rgb':
      kwargs['out_type'] = 1
      out_channels = 3
    elif kwargs['out_type'] == 'split-channel':
      kwargs['out_type'] = 2
      out_channels = 18  # number of limbs
    img = render_pose_fn(*args, **kwargs)
    img *= 255.0
    img = tf.cast(img, tf.uint8)
    img.set_shape((None, None, out_channels))
  return img

# from render_pose.cc
mpii_to_coco = OrderedDict([
  (9, 0),
  (8, 1),
  (12, 2),
  (11, 3),
  (10, 4),
  (13, 5),
  (14, 6),
  (15, 7),
  (2, 8),
  (1, 9),
  (0, 10),
  (3, 11),
  (4, 12),
  (5, 13),
])
def read_json_pose_fn(fpath):
  try:
    with open(fpath, 'r') as fin:
      data = json.load(fin)
  except:
    print('Unable to open file {}'.format(fpath))
    return -np.ones((16*3,)).astype('int64')
  res = []
  for body in data['bodies']:
    mpii_joints = -np.ones((16, 3))
    joints = np.array(body['joints'])
    joints = np.reshape(joints, (-1, 3))
    joints[joints[..., :] <= 0] = -1
    mpii_joints[np.array(mpii_to_coco.keys()), :] = \
      joints[np.array(mpii_to_coco.values()), :]
    res += mpii_joints.reshape((-1,)).tolist()
  res = np.array(res).astype('int64')
  return res

def read_json_pose(*args):
  return tf.py_func(read_json_pose_fn, args, tf.int64)

def render_objects(*args, **kwargs):
  with tf.variable_scope('render_objects_pyWrapper'):
    img = render_objects_fn(*args, **kwargs)
    img *= 255.0
    img = tf.cast(img, tf.uint8)
    img.set_shape((None, None, kwargs['out_channels']))
  return img

def extract_glimpse(image, pose_label, orig_im_ht, orig_im_wd,
                    out_side, pad_ratio, parts_keep):
  # pose label is a [3x16xn,] vector
  # for now just take the first pose and crop out the human
  with tf.name_scope('ExtractGlimpse'):
    pose_label = pose_label[:16*3]
    pose_label = tf.reshape(pose_label, [16, 3])
    if len(parts_keep) > 0:
      pose_label = tf.gather(pose_label, parts_keep)
    if len(parts_keep) == 1:
      # now only one point, but need at least two to make a crop region
      delta = tf.to_int64(
        [tf.to_float(tf.shape(image)[-2]) * 0.1,
         tf.to_float(tf.shape(image)[-3]) * 0.1, 0])
      pose_label = tf.stack([
        pose_label[0] - delta, pose_label[0] + delta])
    pose_label_x = tf.to_float(pose_label[:, 0]) * \
        tf.to_float(tf.shape(image)[-2]) / tf.to_float(orig_im_wd)
    pose_label_y = tf.to_float(pose_label[:, 1]) * \
        tf.to_float(tf.shape(image)[-3]) / tf.to_float(orig_im_ht)
    pose_label = tf.stack([pose_label_y, pose_label_x])
    mx_pts = tf.to_int32(tf.reduce_max(pose_label, axis=1))
    mn_pts = tf.to_int32(tf.reduce_min(
      tf.where(tf.greater_equal(pose_label, 0), pose_label,
               tf.ones(pose_label.get_shape()) * 999999), axis=1))
    delta_0 = tf.to_int32(tf.to_float((mx_pts[0] - mn_pts[0])) * pad_ratio)
    delta_1 = tf.to_int32(tf.to_float((mx_pts[1] - mn_pts[1])) * pad_ratio)
    mx_pts = mx_pts + [delta_0, delta_1]
    mn_pts = mn_pts - [delta_0, delta_1]

    offset_ht = tf.maximum(mn_pts[0], 0)
    offset_wd = tf.maximum(mn_pts[1], 0)
    target_ht = tf.minimum(mx_pts[0]-offset_ht, tf.shape(image)[-3]-offset_ht-1)
    target_wd = tf.minimum(mx_pts[1]-offset_wd, tf.shape(image)[-2]-offset_wd-1)
    # image = tf.Print(image, [offset_ht, offset_wd, target_ht, target_wd,
    #                          tf.shape(image)], "stuff:")
    image = tf.cond(tf.logical_and(
      tf.greater(mx_pts[1], mn_pts[1]),
      tf.greater(mx_pts[0], mn_pts[0])),
      lambda: tf.image.crop_to_bounding_box(
        image, offset_ht, offset_wd, target_ht, target_wd),
      lambda: image)
    if out_side > 0:
      image = tf.image.resize_images(
        image, [out_side, out_side])
    return image

def read_sparse_label_fn(sparse_label, nclasses):
  """sparse_label is a string and return a 1D vector with the dense label
  """
  res = np.zeros((nclasses,), dtype='int32')
  res[np.array([int(el.split(':')[0]) for el in sparse_label.split(',')])] = \
      np.array([int(el.split(':')[1]) for el in sparse_label.split(',')])
  res[res < 0] = 0  # get rid of -1 label for now
  return res

def read_sparse_label(*args):
  return tf.py_func(read_sparse_label_fn, args, tf.int32)


================================================
FILE: src/custom_ops/pose_to_heatmap.cc
================================================
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/framework/op_kernel.h"

#include <iostream>
#include <tuple>

#include <opencv2/opencv.hpp>

using namespace tensorflow;
using namespace std;

REGISTER_OP("PoseToHeatmap")
  .Attr("out_channels: int = 16")
  .Attr("marker_wd_ratio: float = 0.1")
  .Attr("do_gauss_blur: bool = True")
  .Input("pose_label: int64")
  .Input("im_ht: int64")
  .Input("im_wd: int64")
  .Input("out_wd: int64")  // out_height decided using this and aspect ratio of image
  .Output("heatmap: float")
  .Output("is_valid: bool");  // a bit for each channel, if that pose label is valid or not

class PoseToHeatmapOp : public OpKernel {
 public:
  explicit PoseToHeatmapOp(OpKernelConstruction* context) : OpKernel(context) {
    OP_REQUIRES_OK(
        context, context->GetAttr("out_channels", &out_channels_));
    OP_REQUIRES_OK(
        context, context->GetAttr("marker_wd_ratio", &marker_wd_ratio_));
    OP_REQUIRES_OK(
        context, context->GetAttr("do_gauss_blur", &do_gauss_blur_));
  }

  void Compute(OpKernelContext* context) override {
    // Grab the input tensor
    const Tensor& pose_label_tensor = context->input(0);
    auto pose_label = pose_label_tensor.flat<long long>();
    const Tensor& im_ht_tensor = context->input(1);
    auto im_ht = im_ht_tensor.flat<long long>()(0);
    const Tensor& im_wd_tensor = context->input(2);
    auto im_wd = im_wd_tensor.flat<long long>()(0);
    const Tensor& out_wd_tensor = context->input(3);
    auto out_wd = out_wd_tensor.flat<long long>()(0);
    int out_ht = ((im_ht * out_wd * 1.0) / im_wd);

    // The pose label should be 16 keypoints, with X,Y,is_visible
    int num_keypoints = out_channels_;
    assert(pose_label.size() % (3 * num_keypoints) == 0);
    int n_rects = pose_label.size() / (3 * num_keypoints);

    // Create output tensors
    TensorShape out_shape {out_ht, out_wd, out_channels_};
    Tensor* output_tensor = NULL;
    OP_REQUIRES_OK(
        context, 
        context->allocate_output(
          0,
          out_shape,
          &output_tensor));
    auto output = output_tensor->tensor<float, 3>();
    TensorShape out_shape_valid {out_channels_};
    Tensor* output_tensor_valid = NULL;
    OP_REQUIRES_OK(
        context, 
        context->allocate_output(
          1,
          out_shape_valid,
          &output_tensor_valid));
    auto output_valid = output_tensor_valid->tensor<bool, 1>();

    int elts_per_pose = num_keypoints * 3;
    for (int i = 0; i < num_keypoints; i++) {
      cv::Mat channel(out_ht, out_wd, CV_32FC1, 0.0);
      output_valid(i) = false;
      for (int rid = 0; rid < n_rects; rid++) {  // for each rectangle
        int x = pose_label(rid * elts_per_pose + i * 3) * out_wd / im_wd;
        int y = pose_label(rid * elts_per_pose + i * 3 + 1) * out_ht / im_ht;
        int is_visible = pose_label(rid * elts_per_pose + i * 3 + 2);  // ignore this
        if (pose_label(rid * elts_per_pose + i * 3) >= 0 &&
            pose_label(rid * elts_per_pose + i * 3 + 1) >= 0) {
          output_valid(i) = true;
          circle(channel, cv::Point(x, y),
                 (int) out_wd * marker_wd_ratio_,
                 cv::Scalar(1.0, 1.0, 1.0), -1);
          if (do_gauss_blur_)
            GaussianBlur(channel, channel, cv::Size(7, 7), 0);
        }
      }
      for (int r = 0; r < channel.rows; r++) {
        for (int c = 0; c < channel.cols; c++) {
          output(r, c, i) = channel.at<float>(r, c);
        }
      }
    }
  }
  
 private:
  int out_channels_;
  float marker_wd_ratio_;
  bool do_gauss_blur_;
};

REGISTER_KERNEL_BUILDER(Name("PoseToHeatmap").Device(DEVICE_CPU), PoseToHeatmapOp);


================================================
FILE: src/custom_ops/pose_utils.hpp
================================================
#include <opencv2/opencv.hpp>

#include <tuple>
#include <iostream>
#include <sstream>

// Very important to add the following #define
// boost json parser depends on boost::spirit
// which is not thread safe by default.
// It was giving Segmentation Faults.
// Also, this means I need to compile with -lboost_thread
// ref: http://stackoverflow.com/a/22089792/1492614
// This was tested to work fine with multi-threaded training
#define BOOST_SPIRIT_THREADSAFE
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>

using namespace std;
using namespace cv;
namespace pt = boost::property_tree;


vector<float> joint_color {1, 0, 0,
                           1, 0.33, 0,
                           1, 0.66, 0,
                           1, 1, 0,
                           0.66, 1, 0,
                           0.33, 1, 0,
                           0, 1, 0,
                           0, 1, 0.33,
                           0, 1, 0.66,
                           0, 1, 1,
                           0, 0.66, 1,
                           0, 0.33, 1,
                           0, 0, 1,
                           0.33, 0, 1,
                           0.66, 0, 1,
                           1, 0, 1,
                           1, 0, 0.66,
                           1, 0, 0.33};
//                           1, 1, 1};
vector<int> limbSeq {2, 3,
                     2, 6,
                     3, 4,
                     4, 5,
                     6, 7,
                     7, 8,
                     2, 9,
                     9, 10,
                     10, 11,
                     2, 12,
                     12, 13,
                     13, 14,
                     2, 1,
                     1, 15,
                     15, 17,
                     1, 16,
                     16, 18,
                     3, 17};
//                     6, 18};

#define RENDER_POSE_OUT_TYPE_RGB 1
#define RENDER_POSE_OUT_TYPE_SPLITCHANNEL 2
Mat render_pose(vector<vector<tuple<float,float,float>>> poses,
    int out_ht, int out_wd,
    int max_ht, int max_wd, int marker_wd,
    int out_type=RENDER_POSE_OUT_TYPE_RGB) {
  int nLimbs = limbSeq.size() / 2;
  int nchannels = 3;
  if (out_type == RENDER_POSE_OUT_TYPE_RGB) {
    nchannels = 3;
  } else if (out_type == RENDER_POSE_OUT_TYPE_SPLITCHANNEL) {
    nchannels = nLimbs;
  } else {
    cerr << "render_pose: Unknown output type." << endl;
  }
  Mat output(out_ht, out_wd, CV_32FC(nchannels), 0.0);
  vector<Mat> output_channels;
  if (nchannels != 3) {
    split(output, output_channels);
  }
  // assert(limbSeq.size() / 2 == joint_color.size() / 3);
  for (int body_id = 0; body_id < poses.size(); body_id++) {
    for (int i = 0; i < nLimbs; i++) {
      float scal_ht = out_ht * 1.0 / max_ht;
      float scal_wd = out_wd * 1.0 / max_wd;
      tuple<float, float, float> pt1 = poses[body_id][limbSeq[2*i]-1];
      tuple<float, float, float> pt2 = poses[body_id][limbSeq[2*i+1]-1];
      float pt1_conf = get<2>(pt1);
      float pt2_conf = get<2>(pt2);
      if (pt1_conf < 0.1 || pt2_conf < 0.1) {
        continue;
      }
      Mat render_img;
      Scalar color;
      if (nchannels == 3) {
        render_img = output;
        color = CV_RGB(joint_color[i*3], joint_color[i*3+1], joint_color[i*3+2]);
      } else {
        render_img = output_channels[i];
        color = Scalar(1);
      }
      line(
          render_img,
          Point(get<0>(pt1) * scal_wd, get<1>(pt1) * scal_ht),
          Point(get<0>(pt2) * scal_wd, get<1>(pt2) * scal_ht),
          color, marker_wd);
    }
  }
  if (nchannels != 3) {
    merge(output_channels, output);
  }
  return output;
}


vector<vector<tuple<float,float,float>>> read_pose_xml(string xml_str, int &pose_dim) {
  vector<vector<tuple<float,float,float>>> poses;
  if (xml_str.size() > 0) {
    stringstream ss(xml_str);
    pt::ptree root;
    pt::read_json(ss, root);
    for (pt::ptree::value_type &body : root.get_child("bodies")) {
      vector<float> elts;
      for (pt::ptree::value_type &joints : body.second.get_child("joints")) {
        elts.push_back((float) stof(joints.second.data()));
      }
      pose_dim = elts.size() / 3;  // x,y,score format
      if (pose_dim * 3 != elts.size()) {
        cerr << "Invalid number of numbers in pose dim ("
          << pose_dim * 3 << " vs " << elts.size() << endl;
        poses.clear();
        break;
      }
      vector<tuple<float,float,float>> pose;
      for (int i = 0; i < pose_dim; i++) {
        pose.push_back(make_tuple(elts[i*3], elts[i*3+1], elts[i*3+2]));
      }
      poses.push_back(pose);
    }
  } else {
    cerr << "json_to_pose: Empty string passed in." << endl;
  }
  return poses;
}


================================================
FILE: src/custom_ops/render_objects.cc
================================================
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/framework/op_kernel.h"

#include <iostream>
#include <tuple>

#include <opencv2/opencv.hpp>

using namespace tensorflow;
using namespace std;

REGISTER_OP("RenderObjects")
  .Attr("out_channels: int = 80")
  .Input("objects_label: string")
  .Input("im_ht: int64")
  .Input("im_wd: int64")
  .Input("out_wd: int64")  // out_height decided using this and aspect ratio of image
  .Output("image: float");


void read_detections(
    string objects_label,
    vector<tuple<int,float,float,float,float,float>> &detections) {
  istringstream ss(objects_label);
  int ob_label, id;  // ignore the id
  float conf, xmin, ymin, xmax, ymax;
  detections.clear();
  while (ss >> id >> ob_label >> conf >> xmin >> ymin >> xmax >> ymax) {
    detections.push_back(make_tuple(ob_label, conf, xmin, ymin, xmax, ymax));
  }
}


class RenderObjectsOp : public OpKernel {
 public:
  explicit RenderObjectsOp(OpKernelConstruction* context) : OpKernel(context) {
    OP_REQUIRES_OK(
        context, context->GetAttr("out_channels", &out_channels_));
  }

  void Compute(OpKernelContext* context) override {
    // Grab the input tensor
    const Tensor& objects_label_tensor = context->input(0);
    auto objects_label = objects_label_tensor.flat<string>()(0);
    const Tensor& im_ht_tensor = context->input(1);
    auto im_ht = im_ht_tensor.flat<long long>()(0);
    const Tensor& im_wd_tensor = context->input(2);
    auto im_wd = im_wd_tensor.flat<long long>()(0);
    const Tensor& out_wd_tensor = context->input(3);
    auto out_wd = out_wd_tensor.flat<long long>()(0);
    int out_ht = ((im_ht * out_wd * 1.0) / im_wd);

    // Create output tensors
    TensorShape out_shape {out_ht, out_wd, out_channels_};
    Tensor* output_tensor = NULL;
    OP_REQUIRES_OK(
        context,
        context->allocate_output(
          0,
          out_shape,
          &output_tensor));
    auto output = output_tensor->tensor<float, 3>();
    vector<tuple<int,float,float,float,float,float>> detections;
    read_detections(objects_label, detections);
    for (int i = 0; i < out_wd; i++) {
      for (int j = 0; j < out_ht; j++) {
        for (int k = 0; k < out_channels_; k++) {
          output(j, i, k) = 0;
        }
      }
    }

    if (out_channels_ != 3) {  // i.e. not doing a RGB output
      for (unsigned int i = 0; i < detections.size(); i++) {
        int xmin = get<2>(detections[i]) * out_wd;
        int ymin = get<3>(detections[i]) * out_ht;
        int xmax = get<4>(detections[i]) * out_wd;
        int ymax = get<5>(detections[i]) * out_ht;
        int ob_label = get<0>(detections[i]);
        float conf = get<1>(detections[i]);
        for (int c = max(0, (int) xmin); c < min(xmax, (int) out_wd); c++) {
          for (int r = max(0, (int) ymin); r < min(ymax, (int) out_ht); r++) {
            output(r, c, ob_label) = conf;
          }
        }
      }
    } else {
      cerr << "render_objects: unable to render RGB currently." << endl;
    }
  }

 private:
  int out_channels_;
};

REGISTER_KERNEL_BUILDER(Name("RenderObjects").Device(DEVICE_CPU), RenderObjectsOp);


================================================
FILE: src/custom_ops/render_pose.cc
================================================
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/framework/op_kernel.h"

#include <iostream>
#include <tuple>

#include <opencv2/opencv.hpp>

#include "pose_utils.hpp"

using namespace tensorflow;
using namespace std;
namespace pt = boost::property_tree;


REGISTER_OP("RenderPose")
  .Attr("marker_wd_ratio: float = 0.01")  // ratio of output image width
  .Attr("out_type: int = 1")  // RENDER_POSE_OUT_TYPE_RGB or RENDER_POSE_OUT_TYPE_SPLITCHANNEL
  .Input("pose_label: int64")
  .Input("im_ht: int64")
  .Input("im_wd: int64")
  .Input("out_wd: int64")
  .Output("image: float");

class RenderPoseOp : public OpKernel {
 public:
  explicit RenderPoseOp(OpKernelConstruction* context) : OpKernel(context) {
    OP_REQUIRES_OK(
        context, context->GetAttr("marker_wd_ratio", &marker_wd_ratio_));
    OP_REQUIRES_OK(
        context, context->GetAttr("out_type", &out_type_));
  }

  void Compute(OpKernelContext* context) override {
    // Grab the input tensor
    const Tensor& pose_label_tensor = context->input(0);
    auto pose_label = pose_label_tensor.flat<long long>();
    const Tensor& im_ht_tensor = context->input(1);
    auto im_ht = im_ht_tensor.flat<long long>()(0);
    const Tensor& im_wd_tensor = context->input(2);
    auto im_wd = im_wd_tensor.flat<long long>()(0);
    const Tensor& out_wd_tensor = context->input(3);
    auto out_wd = out_wd_tensor.flat<long long>()(0);
    int out_ht = ((im_ht * out_wd * 1.0) / im_wd);

    int num_keypoints = 16;  // MPII poses
    assert(pose_label.size() % (3 * num_keypoints) == 0);
    int n_people = pose_label.size() / (3 * num_keypoints);
    vector<vector<tuple<float,float,float>>> poses;
    int elts_per_pose = 3 * num_keypoints;
    for (int i = 0; i < n_people; i++) {
      vector<tuple<float,float,float>> person;
      for (int j = 0; j < num_keypoints; j++) {
        int x = pose_label(elts_per_pose * i + 3 * j);
        int y = pose_label(elts_per_pose * i + 3 * j + 1);
        int is_visible = pose_label(elts_per_pose * i + 3 * j + 2);
        // TODO (rgirdhar): Maybe this needs be fixed
        if (x == -1 && y == -1) {
          is_visible = 0;
        } else {
          is_visible = 1;
        }
        person.push_back(make_tuple(x, y, is_visible));
      }
      poses.push_back(convert_pose_mpii_to_coco(person));
    }

    cv::Mat render = render_pose(
        poses, out_ht, out_wd,
        im_ht, im_wd, out_wd * marker_wd_ratio_,
        out_type_);
    // Create an output tensor
    TensorShape out_shape {out_ht, out_wd, render.channels()};
    Tensor* output_tensor = NULL;
    OP_REQUIRES_OK(
        context,
        context->allocate_output(
          0,
          out_shape,
          &output_tensor));
    auto output = output_tensor->tensor<float, 3>();

    for (int i = 0; i < render.rows; i++) {
      for (int j = 0; j < render.cols; j++) {
        float *pixel = render.ptr<float>(i, j);
        for (int k = 0; k < render.channels(); k++) {
          output(i, j, k) = pixel[render.channels()-k-1];
        }
      }
    }
  }

 private:

  vector<tuple<float,float,float>> convert_pose_mpii_to_coco(
      vector<tuple<float,float,float>> poses) {
    // Using the coco definition from https://github.com/CMU-Perceptual-Computing-Lab/caffe_rtpose
    // Using the MPII definition from http://human-pose.mpi-inf.mpg.de/#download
    vector<tuple<float,float,float>> res;
    auto dummy = make_tuple(0, 0, 0);  // for the parts I don't have in MPII
    map<int, int> coco_to_mpii = {
      {0, 9},  // Nose, head_top (approx)
      {1, 8},
      {2, 12},
      {3, 11},
      {4, 10},
      {5, 13},
      {6, 14},
      {7, 15},
      {8, 2},
      {9, 1},
      {10, 0},
      {11, 3},
      {12, 4},
      {13, 5},
      {14, -1},
      {15, -1},
      {16, -1},
      {17, -1},
      {18, -1}
    };
    for (int i = 0; i < coco_to_mpii.size(); i++) {
      if (coco_to_mpii[i] == -1) {
        res.push_back(dummy);
      } else {
        res.push_back(poses[coco_to_mpii[i]]);
      }
    }
    return res;
  }

  float marker_wd_ratio_;
  int out_type_;
};

REGISTER_KERNEL_BUILDER(Name("RenderPose").Device(DEVICE_CPU), RenderPoseOp);


================================================
FILE: src/custom_ops/test/pose_to_heatmap_op_test.py
================================================
import tensorflow as tf
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from custom_ops.custom_ops_factory import pose_to_heatmap

with tf.Session(''):
  pose = [50, 50, 1] * 3 +\
      [0, 0, 1] * 2 +\
      [-1, -1, 1] * 11
  pose += [90, 90, 1] * 3 +\
      [0, 0, 1] * 2 +\
      [-1, -1, 1] * 11

  T, T_valid = pose_to_heatmap(
    pose,
    100,
    200,
    100,
    out_channels=16
  )
  A = T.eval()
  A_valid = T_valid.eval()
  plt.imsave('temp.jpg', np.mean(A, axis=-1))
  print A_valid
  import pdb
  pdb.set_trace()
  a = 1


================================================
FILE: src/custom_ops/test/render_objects_op_test.py
================================================
import tensorflow as tf
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from custom_ops.custom_ops_factory import render_objects

with tf.Session(''):
  T = render_objects(
    '1 1 0.743129 0.031770 0.151354 0.448363 0.994178\n'
    '1 1 0.813451 0.517574 0.303005 0.957526 0.975016',
    100,
    200,
    100,
    out_channels=80
  )
  A = T.eval()
  plt.imsave('temp.jpg', np.mean(A, axis=-1))
  import pdb
  pdb.set_trace()
  a = 1


================================================
FILE: src/custom_ops/test/zero_out_channels_op_test.py
================================================
import tensorflow as tf
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from custom_ops.custom_ops_factory import zero_out_channels

with tf.Session(''):
  A = np.ones((1, 3, 3, 5))
  channels = [True, False, True, True, True]
  B = zero_out_channels(A, channels)
  print B
  C = B.eval()
  assert(np.all(C[:, :, :, 0] == 1))
  assert(np.all(C[:, :, :, 1] == 0))
  assert(np.all(C[:, :, :, 2] == 1))
  assert(np.all(C[:, :, :, 3] == 1))
  import pdb
  pdb.set_trace()
  a = 1


================================================
FILE: src/custom_ops/zero_out_channels.cc
================================================
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/framework/op_kernel.h"

using namespace tensorflow;

REGISTER_OP("ZeroOutChannels")
  .Attr("T: {float32, float64, int32, int64}")
  .Input("to_zero: T")  // must be 4-dim images
  .Input("channels: bool")  // list of true/false, false=>zero out that channel
  .Output("zeroed: T")
  .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
    c->set_output(0, c->input(0));
    return Status::OK();
  });

template <typename T>
class ZeroOutChannelsOp : public OpKernel {
 public:
  explicit ZeroOutChannelsOp(OpKernelConstruction* context) : OpKernel(context) {}

  void Compute(OpKernelContext* context) override {
    // Grab the input tensor
    const Tensor& input_tensor = context->input(0);
    const Tensor& input_tensor_channel = context->input(1);
    auto input = input_tensor.tensor<T, 4>();
    auto input_channel = input_tensor_channel.flat<bool>();

    assert(input_tensor.shape().dims() == 4);
    int num_channels = input_tensor.shape().dim_size(3);
    assert(num_channels == input_tensor_channel.shape().dim_size(0));
    Tensor *output = NULL;
    OP_REQUIRES_OK(
        context,
        context->allocate_output(0, input_tensor.shape(), &output));
    auto output_flat = output->tensor<T, 4>();
    for (int i = 0; i < input_tensor.shape().dim_size(0); i++) {
      for (int j = 0; j < input_tensor.shape().dim_size(1); j++) {
        for (int k = 0; k < input_tensor.shape().dim_size(2); k++) {
          for (int l = 0; l < input_tensor.shape().dim_size(3); l++) {
            if (input_channel(l) == false) {
              output_flat(i, j, k, l) = 0;
            } else {
              output_flat(i, j, k, l) = input(i, j, k, l);
            }
          }
        }
      }
    }
  }
};

REGISTER_KERNEL_BUILDER(
    Name("ZeroOutChannels")
    .Device(DEVICE_CPU)
    .TypeConstraint<double>("T"),
    ZeroOutChannelsOp<double>);
REGISTER_KERNEL_BUILDER(
    Name("ZeroOutChannels")
    .Device(DEVICE_CPU)
    .TypeConstraint<float>("T"),
    ZeroOutChannelsOp<float>);
REGISTER_KERNEL_BUILDER(
    Name("ZeroOutChannels")
    .Device(DEVICE_CPU)
    .TypeConstraint<int>("T"),
    ZeroOutChannelsOp<int>);
REGISTER_KERNEL_BUILDER(
    Name("ZeroOutChannels")
    .Device(DEVICE_CPU)
    .TypeConstraint<long long>("T"),
    ZeroOutChannelsOp<long long>);


================================================
FILE: src/datasets/__init__.py
================================================


================================================
FILE: src/datasets/charades.py
================================================
"""Provides data for the HMDB51 dataset.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datasets.video_data_utils import gen_dataset
import tensorflow as tf

_CHARADES_TRINITY_LIST_DIR = '/data/rgirdhar/Data2/Projects/2016/002_VideoRepresentation/StandardData/001_Charades/v1/Lists/train_test_lists/'
_CHARADES_TRINITY_POSE_LABEL_DIR = '/scratch/rgirdhar/Datasets/Video/004_Charades/Processed/002_Pose_CPM_v2/'

def get_split(split_name, dataset_dir,
              file_pattern=None,
              reader=None, **kwargs):

  _NUM_CLASSES = 157
  # There are no pose labels, but need to keep this to load models from MPII
  # trained
  # Also, now the processing can still avoided by having no loss on pose
  _NUM_POSE_KEYPOINTS = 16
  # Need to do this otherwise the lambda function defined below will not work
  # It evaluates the kwargs['..'] also when evaluated
  if 'dataset_list_dir' not in kwargs:
    dataset_list_dir = _CHARADES_TRINITY_LIST_DIR
  else:
    dataset_list_dir = kwargs['dataset_list_dir']
  _LIST_FN = lambda split, id: \
      '%s/%s_split%d.txt' % (
        dataset_list_dir,
        split, id)

  kwargs['num_pose_keypoints'] = _NUM_POSE_KEYPOINTS
  kwargs['num_classes'] = _NUM_CLASSES
  kwargs['list_fn'] = _LIST_FN
  with open(_LIST_FN(split_name, kwargs['split_id']), 'r') as fin:
    ncols = len(fin.readline().strip().split())
  if ncols == 4:
    input_file_style = '4-col'
  elif ncols == 3:
    input_file_style = '3-col'  # since video level testing with mAP
  else:
    raise ValueError('Invalid file style')
  tf.logging.info('Using input_file_style {}'.format(input_file_style))

  # need to remove some things from kwargs (if they exist) before passing on
  kwargs.pop('dataset_list_dir', [])
  return gen_dataset(split_name, dataset_dir, file_pattern,
                     reader,
                     pose_dataset_dir=_CHARADES_TRINITY_POSE_LABEL_DIR,
                     input_file_style=input_file_style,
                     **kwargs), _NUM_POSE_KEYPOINTS


================================================
FILE: src/datasets/dataset_factory.py
================================================
"""A factory-pattern class which returns classification image/label pairs."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datasets import mpii
from datasets import hmdb51
from datasets import charades
from datasets import hico
from datasets import jhmdb21

datasets_map = {
  'mpii': mpii,
  'hmdb51': hmdb51,
  'charades': charades,
  'hico': hico,
  'jhmdb21': jhmdb21
}


def get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None,
                **kwargs):  # added by rgirdhar: allow other options
  """Given a dataset name and a split_name returns a Dataset.

  Args:
    name: String, the name of the dataset.
    split_name: A train/test split name.
    dataset_dir: The directory where the dataset files are stored.
    file_pattern: The file pattern to use for matching the dataset source files.
    reader: The subclass of tf.ReaderBase. If left as `None`, then the default
      reader defined by each dataset is used.

  Returns:
    A `Dataset` class.

  Raises:
    ValueError: If the dataset `name` is unknown.
  """
  if name not in datasets_map:
    raise ValueError('Name of dataset unknown %s' % name)
  return datasets_map[name].get_split(
      split_name,
      dataset_dir,
      file_pattern,
      reader, **kwargs)


================================================
FILE: src/datasets/dataset_utils.py
================================================
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains utilities for downloading and converting datasets."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import sys
import tarfile

from six.moves import urllib
import tensorflow as tf

LABELS_FILENAME = 'labels.txt'


def int64_feature(values):
  """Returns a TF-Feature of int64s.

  Args:
    values: A scalar or list of values.

  Returns:
    a TF-Feature.
  """
  if not isinstance(values, (tuple, list)):
    values = [values]
  return tf.train.Feature(int64_list=tf.train.Int64List(value=values))


def bytes_feature(values):
  """Returns a TF-Feature of bytes.

  Args:
    values: A string.

  Returns:
    a TF-Feature.
  """
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))


def image_to_tfexample(image_data, image_format, height, width, class_id):
  return tf.train.Example(features=tf.train.Features(feature={
      'image/encoded': bytes_feature(image_data),
      'image/format': bytes_feature(image_format),
      'image/class/label': int64_feature(class_id),
      'image/height': int64_feature(height),
      'image/width': int64_feature(width),
  }))


def download_and_uncompress_tarball(tarball_url, dataset_dir):
  """Downloads the `tarball_url` and uncompresses it locally.

  Args:
    tarball_url: The URL of a tarball file.
    dataset_dir: The directory where the temporary files are stored.
  """
  filename = tarball_url.split('/')[-1]
  filepath = os.path.join(dataset_dir, filename)

  def _progress(count, block_size, total_size):
    sys.stdout.write('\r>> Downloading %s %.1f%%' % (
        filename, float(count * block_size) / float(total_size) * 100.0))
    sys.stdout.flush()
  filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress)
  print()
  statinfo = os.stat(filepath)
  print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
  tarfile.open(filepath, 'r:gz').extractall(dataset_dir)


def write_label_file(labels_to_class_names, dataset_dir,
                     filename=LABELS_FILENAME):
  """Writes a file with the list of class names.

  Args:
    labels_to_class_names: A map of (integer) labels to class names.
    dataset_dir: The directory in which the labels file should be written.
    filename: The filename where the class names are written.
  """
  labels_filename = os.path.join(dataset_dir, filename)
  with tf.gfile.Open(labels_filename, 'w') as f:
    for label in labels_to_class_names:
      class_name = labels_to_class_names[label]
      f.write('%d:%s\n' % (label, class_name))


def has_labels(dataset_dir, filename=LABELS_FILENAME):
  """Specifies whether or not the dataset directory contains a label map file.

  Args:
    dataset_dir: The directory in which the labels file is found.
    filename: The filename where the class names are written.

  Returns:
    `True` if the labels file exists and `False` otherwise.
  """
  return tf.gfile.Exists(os.path.join(dataset_dir, filename))


def read_label_file(dataset_dir, filename=LABELS_FILENAME):
  """Reads the labels file and returns a mapping from ID to class name.

  Args:
    dataset_dir: The directory in which the labels file is found.
    filename: The filename where the class names are written.

  Returns:
    A map from a label (integer) to class name.
  """
  labels_filename = os.path.join(dataset_dir, filename)
  with tf.gfile.Open(labels_filename, 'r') as f:
    lines = f.read().decode()
  lines = lines.split('\n')
  lines = filter(None, lines)

  labels_to_class_names = {}
  for line in lines:
    index = line.index(':')
    labels_to_class_names[int(line[:index])] = line[index+1:]
  return labels_to_class_names


================================================
FILE: src/datasets/hico.py
================================================
"""Provides data for the HMDB51 dataset.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datasets.video_data_utils import gen_dataset

_HICO_TRINITY_LIST_DIR = '/data/rgirdhar/Data2/Projects/2016/002_VideoRepresentation/StandardData/005_HICO/data_videoFormat/001_Basic/train_test_lists'
_HICO_TRINITY_POSE_LABEL_DIR = '/scratch/rgirdhar/Datasets/Image/003_HICO/data_videoFormat/001_Basic/features/001_CPMPose/'
_HICO_DATASET_DIR = '/scratch/rgirdhar/Datasets/Image/003_HICO/data_videoFormat/001_Basic/frames'

def get_split(split_name, dataset_dir,
              file_pattern=None,
              reader=None, **kwargs):

  _NUM_CLASSES = 600
  # There are no pose labels, but need to keep this to load models from MPII
  # trained
  # Also, now the processing can still avoided by having no loss on pose
  _NUM_POSE_KEYPOINTS = 16
  if 'dataset_list_dir' not in kwargs:
    dataset_list_dir = _HICO_TRINITY_LIST_DIR
  else:
    dataset_list_dir = kwargs['dataset_list_dir']
  _LIST_FN = lambda split, id: \
      '%s/%s_split%d.txt' % (
        dataset_list_dir,
        split, id)

  kwargs['num_pose_keypoints'] = _NUM_POSE_KEYPOINTS
  kwargs['num_classes'] = _NUM_CLASSES
  kwargs['list_fn'] = _LIST_FN
  input_file_style = '3-col'
  kwargs.pop('dataset_list_dir', [])
  return gen_dataset(split_name, dataset_dir,
                     file_pattern, reader,
                     pose_dataset_dir=_HICO_TRINITY_POSE_LABEL_DIR,
                     input_file_style=input_file_style,
                     **kwargs), _NUM_POSE_KEYPOINTS


================================================
FILE: src/datasets/hmdb51.py
================================================
"""Provides data for the HMDB51 dataset.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datasets.video_data_utils import gen_dataset

_HMDB51_TRINITY_LIST_DIR = '/data/rgirdhar/Data2/Projects/2016/001_NetVLADVideo/raw/HMDB51/lists/train_test_lists2'
_HMDB51_TRINITY_POSE_LABEL_DIR = '/scratch/rgirdhar/Datasets/Video/002_HMDB51/processed/features/002_CPM_Pose/'
_HMDB51_TRINITY_OBJECTS_LABEL_DIR = '/scratch/rgirdhar/Datasets/Video/002_HMDB51/processed/features/001_YOLO9K_cocoDets_denseFilledIn'

def get_split(split_name, dataset_dir,
              file_pattern=None,
              reader=None, **kwargs):
              # dataset_list_dir=_HMDB51_TRINITY_LIST_DIR,
              # modality='rgb', num_samples=1,
              # split_id=1, **kwargs):

  _NUM_CLASSES = 51
  # There are no pose labels, but need to keep this to load models from MPII
  # trained
  # Also, now the processing can still avoided by having no loss on pose
  _NUM_POSE_KEYPOINTS = 16
  _LIST_FN = lambda split, id: \
      '%s/%s_split%d.txt' % (
        kwargs['dataset_list_dir'] if 'dataset_list_dir' in kwargs
        else _HMDB51_TRINITY_LIST_DIR,
        split, id)

  kwargs['num_pose_keypoints'] = _NUM_POSE_KEYPOINTS
  kwargs['num_classes'] = _NUM_CLASSES
  kwargs['list_fn'] = _LIST_FN
  return gen_dataset(split_name, dataset_dir, file_pattern,
                     reader,
                     pose_dataset_dir=_HMDB51_TRINITY_POSE_LABEL_DIR,
                     objects_dataset_dir=_HMDB51_TRINITY_OBJECTS_LABEL_DIR,
                     **kwargs), _NUM_POSE_KEYPOINTS
                     # modality, num_samples, split_id,
                     # _NUM_CLASSES, _LIST_FN, **kwargs), _NUM_POSE_KEYPOINTS


================================================
FILE: src/datasets/image_read_utils.py
================================================
import tensorflow as tf

# TODO: move this to the main train script if useful. Not a good idea to have this inside.
tf.app.flags.DEFINE_string(
    'pose_style', 'heatmap',
    'Select style for pose to be rendered [heatmap/render].')
FLAGS = tf.app.flags.FLAGS

IM_HT = 256
IM_WD = 340

def _get_frame_sublist(start_frame, duration, num_samples, num_consec_frames,
                       randomFromSegmentStyle=None):
  # follow segmental architecture
  res = []
  step = tf.cast((duration - tf.constant(num_consec_frames)) / 
                 (tf.constant(num_samples)), 'int32')
  step = tf.maximum(step, 1)
  cur_end_point = 0
  if randomFromSegmentStyle is None:
    if num_samples == 1:
      randomFromSegmentStyle = True  # because otherwise would not make sense
    else:
      randomFromSegmentStyle = False
  # start_frame = tf.Print(start_frame, [start_frame], 'Using start frame: ')
  # The following will be printed as many times as the number of read threads
  if randomFromSegmentStyle:
    tf.logging.info('Reading in random segment style')
  else:
    tf.logging.info('IMP NOTE:: Reading uniform frames')
  for i in range(num_samples):
    if randomFromSegmentStyle:
      res.append(tf.random_uniform([1],
                                   tf.minimum(start_frame + step * i,
                                              duration-num_consec_frames-1),
                                   tf.minimum(start_frame + step * (i+1),
                                              duration-num_consec_frames),
                                   dtype='int32')[0])
    else:
      res.append(tf.minimum(start_frame + step * i, duration - 1))
  # To debug
  # res[0] = tf.Print(res[0], res, 'Offsets:' )
  [el.set_shape(()) for el in res]
  return res

def _get_frame_sublist_SAME_AS_CAFFE(
  start_frame, duration, num_samples, num_consec_frames,
  randomFromSegmentStyle=None):
  # follow segmental architecture
  res = []
  avg_duration = tf.cast(duration / tf.constant(num_samples), 'int32')
  cur_end_point = 0
  if randomFromSegmentStyle is None:
    if num_samples == 1:
      randomFromSegmentStyle = True  # because otherwise would not make sense
    else:
      randomFromSegmentStyle = False
  # start_frame = tf.Print(start_frame, [start_frame], 'Using start frame: ')
  # The following will be printed as many times as the number of read threads
  if randomFromSegmentStyle:
    tf.logging.info('Reading in random segment style')
  else:
    tf.logging.info('IMP NOTE:: Reading uniform frames')
  for i in range(num_samples):
    if randomFromSegmentStyle:
      offset = tf.random_uniform([1], 0, avg_duration-num_consec_frames+1,
                                 dtype=tf.int32)
      T = tf.cond(tf.greater_equal(avg_duration, num_consec_frames),
                  lambda: offset + i * avg_duration,
                  lambda: tf.constant([1]))
      res.append(T[0])
    else:
      T = tf.cond(tf.greater_equal(avg_duration, num_consec_frames),
                  lambda: (
                    avg_duration-num_consec_frames+1)/2 + i*avg_duration,
                  lambda: tf.constant([1]))
      res.append(T[0])
  # To debug
  # res[0] = tf.Print(res[0], res, 'Offsets:' )
  return res

def _read_from_disk_spatial(fpath, nframes, num_samples=25, start_frame=0,
                            file_prefix='', file_zero_padding=4, file_index=1,
                            dataset_dir='', frame_sublist=None,
                            randomFromSegmentStyle=None):
    if frame_sublist is None:
      frame_sublist = _get_frame_sublist(start_frame, nframes, num_samples, 1,
                                        randomFromSegmentStyle)
    allimgs = []
    with tf.variable_scope('read_rgb_video'):
        for i in range(num_samples):
            with tf.variable_scope('read_rgb_image'):
                prefix = file_prefix + '_' if file_prefix else ''
                impath = tf.string_join([
                    tf.constant(dataset_dir + '/'),
                    fpath, tf.constant('/'),
                    prefix,
                    tf.as_string(frame_sublist[i] + file_index,
                      width=file_zero_padding, fill='0'),
                    tf.constant('.jpg')])
                # To debug
                # impath = tf.Print(impath, [impath], message='Reading image:')
                img_str = tf.read_file(impath)
            allimgs.append(img_str)
    return allimgs


def _read_from_disk_temporal(
    fpath, nframes, num_samples=25,
    optical_flow_frames=10, start_frame=0,
    file_prefix='', file_zero_padding=4, file_index=1,
    dataset_dir='', frame_sublist=None, randomFromSegmentStyle=None):
    if frame_sublist is None:
      frame_sublist = _get_frame_sublist(start_frame, nframes, num_samples,
                                         optical_flow_frames,
                                         randomFromSegmentStyle)
    allimgs = []
    with tf.variable_scope('read_flow_video'):
        for i in range(num_samples):
            with tf.variable_scope('read_flow_image'):
              flow_img = []
              for j in range(optical_flow_frames):
                # To protect for small videos, avoid overshooting the filelist
                frame_id = frame_sublist[i] + j
                frame_id = tf.cond(
                  tf.greater(frame_id, nframes-2),
                  lambda: nframes-2,
                  lambda: frame_id)

                with tf.variable_scope('read_flow_channels'):
                  for dr in ['x', 'y']:
                    prefix = file_prefix + '_' if file_prefix else ''
                    impath = tf.string_join([
                        tf.constant(dataset_dir + '/'),
                        fpath, tf.constant('/'),
                        prefix, '%s_' % dr,
                        tf.as_string(frame_id + file_index,
                          width=file_zero_padding, fill='0'),
                        tf.constant('.jpg')])
                    # impath = tf.Print(impath, [impath], "Read file: ")
                    img_str = tf.read_file(impath)
                    flow_img.append(img_str)
              allimgs.append(flow_img)
    return allimgs


def _read_from_disk_pose(
    fpath, nframes, num_samples=25,
    pose_frames=5, start_frame=0,
    file_prefix='', file_zero_padding=4, file_index=1,
    dataset_dir='', frame_sublist=None, randomFromSegmentStyle=None,
    file_ext='.jpg'):
    from custom_ops.custom_ops_factory import read_file_safe
    if frame_sublist is None:
      frame_sublist = _get_frame_sublist(start_frame, nframes, num_samples,
                                         pose_frames,
                                         randomFromSegmentStyle)
    allimgs = []
    with tf.variable_scope('read_pose_video'):
      for i in range(num_samples):
        with tf.variable_scope('read_pose_image'):
          pose_img = []
          for j in range(pose_frames):
            # To protect for small videos, avoid overshooting the filelist
            frame_id = frame_sublist[i] + j
            frame_id = tf.cond(
              tf.greater(frame_id, nframes-1),  # there are nframes-1 flow
              lambda: nframes-1,
              lambda: frame_id)

            prefix = file_prefix + '_' if file_prefix else ''
            impath = tf.string_join([
              tf.constant(dataset_dir + '/'),
              fpath, tf.constant('/'),
              prefix,
              tf.as_string(frame_id + file_index,
              width=file_zero_padding, fill='0'),
              tf.constant(file_ext)])
            # img_str = tf.read_file(impath)
            img_str = read_file_safe(impath)
            pose_img.append(img_str)
          allimgs.append(pose_img)
    return allimgs


def decode_rgb(img_str):
  with tf.variable_scope('decode_rgb_frame'):
    img = tf.image.decode_jpeg(img_str, channels=3)
    # Always convert before resize, this is a bug in TF
    # https://github.com/tensorflow/tensorflow/issues/1763
    # IMPORTANT NOTE: The original netvlad model was trained with the convert
    # happening after the resize, and hence it's trained with the large values.
    # It still works if I do that, but I'm training a new netvlad RGB model
    # with the current setup.
    img = tf.image.convert_image_dtype(img, dtype=tf.float32)
  return [img]


def decode_flow(img_str, perImageChannels=1):
  # IMPORTANT NOTE: I am now resizing the flow frames before running through
  # the preprocessing. I was not doing that earlier (in the master). This leads
  # to the 66 number to drop to 63 on HMDB. But it should be fixable by
  # re-training with this setup
  with tf.variable_scope('decode_flow_frame'):
    img = tf.concat([tf.image.decode_jpeg(el, channels=perImageChannels)
      for el in tf.unstack(img_str)], axis=2)
    # Always convert before resize, this is a bug in TF
    # https://github.com/tensorflow/tensorflow/issues/1763
    img = tf.image.convert_image_dtype(img, dtype=tf.float32)
  return [img]


def decode_poseJson(img_str, perImageChannels=1):
  from custom_ops.custom_ops_factory import json_to_pose
  with tf.variable_scope('decode_poseJson_frame'):
    pose_style = FLAGS.pose_style
    img = tf.concat([json_to_pose(
      el, out_height=IM_HT, out_width=IM_WD,
      marker_wid=5 if pose_style=='render' else 20,
      out_style=pose_style)
      for el in img_str], axis=2)
    # img = tf.image.resize_images(img, [IM_HT, IM_WD]) # not any faster
    # TODO: remove the following checks once sure
    # with tf.control_dependencies(
    #   [tf.assert_less_equal(img, tf.constant(1.5)),
    #    tf.assert_greater_equal(img, tf.constant(-0.5))]):
    #   img = tf.identity(img)
    # img = tf.image.convert_image_dtype(img, dtype=tf.float32)
  return [img]


def _decode_from_string(img_str, modality):
  if modality == 'rgb':
    img = decode_rgb(img_str)
  elif modality.startswith('flow'):
    img = decode_flow(img_str)
  elif modality.startswith('rgb+flow'):
    with tf.name_scope('decode_rgbNflow'):
      img_rgb = decode_rgb(img_str[..., 0])
      img_flow = decode_flow(img_str[..., 1:])
      img = [img_rgb[0], img_flow[0]]
  elif modality.startswith('posejson'):
    img = decode_poseJson(img_str)
  elif modality.startswith('pose'):
    img = decode_flow(img_str, perImageChannels=3)
  im_ht = tf.reduce_max([tf.shape(el)[-3] for el in img])
  im_wd = tf.reduce_max([tf.shape(el)[-2] for el in img])
  img = [tf.image.resize_images(el, [IM_HT, IM_WD]) for el in img]
  return img, im_ht, im_wd


================================================
FILE: src/datasets/jhmdb21.py
================================================
"""Provides data for the JHMDB21 dataset.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from datasets.video_data_utils import gen_dataset

_JHMDB21_TRINITY_LIST_DIR = '/data/rgirdhar/Data2/Projects/2016/002_VideoRepresentation/StandardData/002_JHMDB/Processed/Lists/train_test_lists/'
_JHMDB21_TRINITY_POSE_LABEL_DIR = '/data/rgirdhar/Data2/Projects/2016/002_VideoRepresentation/StandardData/002_JHMDB/Processed/Features/001_CPM_Pose/'
_JHMDB21_TRINITY_OBJECTS_LABEL_DIR = ''

def get_split(split_name, dataset_dir,
              file_pattern=None,
              reader=None, **kwargs):
              # dataset_list_dir=_JHMDB21_TRINITY_LIST_DIR,
              # modality='rgb', num_samples=1,
              # split_id=1, **kwargs):

  _NUM_CLASSES = 21
  # There are no pose labels, but need to keep this to load models from MPII
  # trained
  # Also, now the processing can still avoided by having no loss on pose
  _NUM_POSE_KEYPOINTS = 16
  _LIST_FN = lambda split, id: \
      '%s/%s_split%d.txt' % (
        kwargs['dataset_list_dir'] if 'dataset_list_dir' in kwargs
        else _JHMDB21_TRINITY_LIST_DIR,
        split, id)

  kwargs['num_pose_keypoints'] = _NUM_POSE_KEYPOINTS
  kwargs['num_classes'] = _NUM_CLASSES
  kwargs['list_fn'] = _LIST_FN
  return gen_dataset(split_name, dataset_dir, file_pattern,
                     reader,
                     pose_dataset_dir=_JHMDB21_TRINITY_POSE_LABEL_DIR,
                     objects_dataset_dir=_JHMDB21_TRINITY_OBJECTS_LABEL_DIR,
                     **kwargs), _NUM_POSE_KEYPOINTS
                     # modality, num_samples, split_id,
                     # _NUM_CLASSES, _LIST_FN, **kwargs), _NUM_POSE_KEYPOINTS


================================================
FILE: src/datasets/mpii.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tensorflow as tf

slim = tf.contrib.slim

_FILE_PATTERN = 'mpii_%s_*.tfrecord'

SPLITS_TO_SIZES = {'trainval_train': 8219, 'trainval_val': 6988,
                   'trainval': 15207,  # 8219 + 6988
                   'test': 5709}

_NUM_CLASSES = 393  # activities

_NUM_POSE_KEYPOINTS = 16

_ITEMS_TO_DESCRIPTIONS = {
    'image': 'A color image of varying size.',
    'label': 'A pose representation, [x1,y1,is_visible1,...]',
}

def _tfrecord_file_pattern_to_list(pattern):
  res = glob.glob(pattern)
  return sorted(res)


def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
  """Gets a dataset tuple with instructions for reading flowers.

  Args:
    split_name: A train/validation split name.
    dataset_dir: The base directory of the dataset sources.
    file_pattern: The file pattern to use when matching the dataset sources.
      It is assumed that the pattern contains a '%s' string so that the split
      name can be inserted.
    reader: The TensorFlow reader type.

  Returns:
    A `Dataset` namedtuple.

  Raises:
    ValueError: if `split_name` is not a valid train/validation split.
  """
  if split_name not in SPLITS_TO_SIZES:
    raise ValueError('split name %s was not recognized.' % split_name)

  if not file_pattern:
    file_pattern = _FILE_PATTERN
  file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
  # The following is important to ensure the files are read in order, because
  # otherwise test time output can be generated in any random order
  file_pattern = _tfrecord_file_pattern_to_list(file_pattern)

  # Allowing None in the signature so that dataset_factory can use the default.
  if reader is None:
    reader = tf.TFRecordReader

  keys_to_features = {
      'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
      'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
      'image/class/pose': tf.VarLenFeature(dtype=tf.int64),
      'image/class/action_label': tf.FixedLenFeature(
        (), tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
      'image/height': tf.FixedLenFeature(
        (), tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
      'image/width': tf.FixedLenFeature(
        (), tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
  }

  items_to_handlers = {
      'image': slim.tfexample_decoder.Image(),
      'pose': slim.tfexample_decoder.Tensor('image/class/pose'),
      'action_label': slim.tfexample_decoder.Tensor('image/class/action_label'),
      'im_ht': slim.tfexample_decoder.Tensor('image/height'),
      'im_wd': slim.tfexample_decoder.Tensor('image/width'),
  }

  decoder = slim.tfexample_decoder.TFExampleDecoder(
      keys_to_features, items_to_handlers)

  labels_to_names = None

  return slim.dataset.Dataset(
      data_sources=file_pattern,
      reader=reader,
      decoder=decoder,
      num_samples=SPLITS_TO_SIZES[split_name],
      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
      num_classes=_NUM_CLASSES,
      labels_to_names=labels_to_names), _NUM_POSE_KEYPOINTS


================================================
FILE: src/datasets/video_data_utils.py
================================================
"""Provides data for the UCF101 dataset.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import numpy as np
import tensorflow as tf
import sys

from datasets import dataset_utils
from datasets.image_read_utils import _read_from_disk_spatial, \
    _decode_from_string, _read_from_disk_temporal, _get_frame_sublist, \
    _read_from_disk_pose
from tensorflow.python.platform import tf_logging as logging
from custom_ops.custom_ops_factory import read_json_pose, read_sparse_label

slim = tf.contrib.slim


class PreReadTextLineReader(tf.ReaderBase):
  @staticmethod
  def read(lines_queue):
    # just return the line from this queue.
    # The queue will be randomized if training and not if not.
    # Standard tf.TextLineReader will open the file and return line by line, we
    # don't want that, but want to randomize the whole file. Hence, this solves
    # this by first reading the whole file into the queue and then just picking
    # stuff from the queue.
    video_information = lines_queue.dequeue()
    return [video_information, video_information]  # make the video_info as the
                                                   # key for this datapoint as
                                                   # well


def decode_train_file_line(line, input_file_style='3-col',
                           input_file_style_label='one-label'):
  start_frame = 0
  if input_file_style == '3-col':
    fpath, nframes, label = tf.decode_csv(
        line, record_defaults=[[''], [-1], ['']],
        field_delim=' ')
  elif input_file_style == '4-col':
    fpath, start_frame, nframes, label = tf.decode_csv(
        line, record_defaults=[[''], [-1], [-1], ['']],
        field_delim=' ')
  else:
    raise ValueError('Unknown input file style: {0}'.format(
      input_file_style))

  if input_file_style_label == 'one-label':
    label = tf.string_to_number(label, out_type=tf.int32)
    label.set_shape(())
  elif input_file_style_label.startswith('multi-label'):
    nclasses = int(input_file_style_label[len('multi-label'):])
    label = read_sparse_label(label, nclasses)
    label.set_shape((nclasses,))
  return fpath, start_frame, nframes, label


def getReaderFn(num_samples, modality='rgb', dataset_dir='',
                randomFromSegmentStyle=None,
                input_file_style='3-col',
                input_file_style_label='one-label'):
  def readerFn():
    class reader_func(tf.ReaderBase):
      @staticmethod
      # def read(filename_queue):
      def read(value):
        # value = filename_queue.dequeue()
        fpath, start_frame, nframes, label = decode_train_file_line(
          value, input_file_style, input_file_style_label)
        # TODO(rgirdhar): Release the file_prefix='', file_zero_padding=4,
        # file_index=1 options to the bash script
        # TODO: Fix the optical_flow_frame number...
        optical_flow_frames = 1
        frame_sublist = _get_frame_sublist(0, nframes, num_samples,
                                           optical_flow_frames,
                                           randomFromSegmentStyle=randomFromSegmentStyle)
        # frame_sublist = tf.Print(frame_sublist, frame_sublist, "frame sublist:")
        if modality == 'rgb':
          assert(len(dataset_dir) >= 1)
          image_buffer = _read_from_disk_spatial(
              fpath, nframes, num_samples=num_samples,
              start_frame=start_frame,
              file_prefix='image', file_zero_padding=5, file_index=1,
              dataset_dir=dataset_dir[0],
              frame_sublist=frame_sublist,
              randomFromSegmentStyle=randomFromSegmentStyle)
        elif modality.startswith('flow'):
          assert(len(dataset_dir) >= 1)
          optical_flow_frames = int(modality[4:])
          image_buffer = _read_from_disk_temporal(
              fpath, nframes, num_samples=num_samples,
              start_frame=start_frame,
              optical_flow_frames=optical_flow_frames,
              file_prefix='flow', file_zero_padding=5, file_index=1,
              dataset_dir=dataset_dir[0],
              frame_sublist=frame_sublist,
              randomFromSegmentStyle=randomFromSegmentStyle)
        elif modality.startswith('rgb+flow'):
          assert(len(dataset_dir) >= 2)
          # in this case, fix the step for both the streams to ensure correspondence
          optical_flow_frames = int(modality[8:])
          rgb_image_buffer = _read_from_disk_spatial(
              fpath, nframes, num_samples=num_samples,
              start_frame=start_frame,
              file_prefix='image', file_zero_padding=5, file_index=1,
              dataset_dir=dataset_dir[0],
              frame_sublist=frame_sublist)
          flow_image_buffer = _read_from_disk_temporal(
              fpath, nframes, num_samples=num_samples,
              start_frame=start_frame,
              optical_flow_frames=optical_flow_frames,
              file_prefix='flow', file_zero_padding=5, file_index=1,
              dataset_dir=dataset_dir[1],
              frame_sublist=frame_sublist)
          image_buffer = zip(rgb_image_buffer, flow_image_buffer)
          image_buffer = [[el[0]] + el[1] for el in image_buffer]
        elif modality.startswith('pose'):
          assert(len(dataset_dir) >= 1)
          if modality.startswith('posejson'):
            pose_frames = int(modality[8:])
            file_ext = '.json'
          elif modality.startswith('pose'):
            pose_frames = int(modality[4:])
            file_ext = '.jpg'
          image_buffer = _read_from_disk_pose(
              fpath, nframes, num_samples=num_samples,
              start_frame=start_frame,
              pose_frames=pose_frames,
              file_prefix='image', file_zero_padding=5, file_index=1,
              dataset_dir=dataset_dir[0],
              frame_sublist=frame_sublist,
              randomFromSegmentStyle=randomFromSegmentStyle,
              file_ext=file_ext)
        else:
          logging.error('Unknown modality %s\n' % modality)
          raise ValueError()
        return [image_buffer, label, fpath, frame_sublist, start_frame]
    return reader_func
  return readerFn


def decoderFn(
  reader, num_samples=1, modality='rgb', dataset_dir='',
  randomFromSegmentStyle=True, num_pose_keypoints=16,
  pose_dataset_dir=None,
  num_object_catagories=80, objects_dataset_dir=None):
  class decoder_func(slim.data_decoder.DataDecoder):
    @staticmethod
    def list_items():
      return ['image', 'action_label', 'pose', 'im_ht', 'im_wd', 'objects']

    @staticmethod
    def decode(data, items):
      out = {}
      # Arguments:
      # data: Can be 3-col or 4-col CSV. A 3-col would look like "filepath
      # nframes class_id", 4-col will be similar for Charades like dataset
      # items: The different items to be returned.
      with tf.name_scope('decode_video'):
        if modality == 'rgb' or \
           modality.startswith('flow') or \
           modality.startswith('rgb+flow') or \
           modality.startswith('pose'):
          image_buffer, label, fpath, frame_sublist, start_frame = reader.read(data)
          # stacking required due to the way queues in main train loop work
          # image_buffer = tf.stack([tf.stack(_decode_from_string(el, modality)) for
          #                 el in image_buffer])
          image_lst = []
          image_hts = []
          image_wds = []
          for im_buf in image_buffer:
            temp = _decode_from_string(im_buf, modality)
            image_lst += temp[0]
            image_hts.append(temp[1])
            image_wds.append(temp[2])
          image_buffer = tf.stack(image_lst)
          im_ht = tf.reduce_max(image_hts)
          im_wd = tf.reduce_max(image_wds)
          # image_buffer = tf.stack([
          #   _decode_from_string(el, modality)[0] for el in image_buffer])
        else:
          logging.error('Unknown modality %s\n' % modality)
        # since my code gives a 0-1 image, change it back
        out['image'] = tf.cast(image_buffer * 255.0, tf.uint8)
        if 'pose' in items:
          if pose_dataset_dir is None:
            out['pose'] = [-tf.ones([num_pose_keypoints * 3,], dtype=tf.int64)]
          else:
            out['pose'] = [read_json_pose(tf.string_join([
              pose_dataset_dir, '/', fpath, '/',
              'image_',
              tf.as_string(frame_sublist_i+1, width=5, fill='0'),
              '.json'])) for frame_sublist_i in tf.unstack(frame_sublist)]
        if 'objects' in items:
          if objects_dataset_dir is None:
            out['objects'] = []
          else:
            out['objects'] = [tf.read_file(tf.string_join([
              objects_dataset_dir, '/', fpath, '/',
              'image_',
              tf.as_string(frame_sublist_i+1, width=5, fill='0'),
              '.txt'])) for frame_sublist_i in tf.unstack(frame_sublist)]
        out['action_label'] = label
        # The following is the original image size on disk,
        # on which pose etc would have been computed
        out['im_wd'] = tf.cast(im_wd, tf.int64)
        out['im_ht'] = tf.cast(im_ht, tf.int64)
        return [out[el] for el in items]
  return decoder_func


def count_frames_file(fpath, frameLevel=True):
  res = 0
  with open(fpath, 'r') as fin:
    for line in fin:
      if frameLevel:
        res += int(line.split()[1])
      else:
        res += 1
  return res


def gen_dataset(split_name, dataset_dir, file_pattern=None,
                reader=None,
                pose_dataset_dir=None,
                objects_dataset_dir=None,
                modality='rgb', num_samples=1,
                split_id=1, num_classes=0, list_fn=None,
                input_file_style='3-col',
                randomFromSegmentStyle=None, num_pose_keypoints=16,
                num_object_catagories=80,
                input_file_style_label='one-label'):
  """
  input_file_style_label: ['one-label'/'multi-label%d' % integer]
  """
  SPLITS_TO_SIZES = {
    'train': count_frames_file(list_fn('train', split_id), frameLevel=(num_samples==1)),
    'test': count_frames_file(list_fn('test', split_id), frameLevel=(num_samples==1)),
  }
  if split_name not in SPLITS_TO_SIZES:
    raise ValueError('split name %s was not recognized.' % split_name)

  _ITEMS_TO_DESCRIPTIONS = {
    'image': 'A [? x ? x 3] color image.',
    'label': 'A single integer between 0 and %d' % num_classes,
  }
  LIST_FILE = list_fn(split_name, split_id)
  logging.info('Using file %s' % LIST_FILE)
  with open(LIST_FILE, 'r') as fin:
    data_sources = fin.read().splitlines()  # don't randomize here, in testing
                                            # I'll run without randomizing, and
                                            # the queue is going to randomize
                                            # automatically anyway

  # Allowing None in the signature so that dataset_factory can use the default.
  if not reader:
    reader = getReaderFn(num_samples, modality, [dataset_dir],
                         randomFromSegmentStyle, input_file_style,
                         input_file_style_label)

  labels_to_names = None
  # if dataset_utils.has_labels(dataset_dir):
  #   labels_to_names = dataset_utils.read_label_file(dataset_dir)

  return slim.dataset.Dataset(
      data_sources=data_sources,
      reader=lambda: PreReadTextLineReader,
      decoder=decoderFn(reader(), num_samples, modality, [dataset_dir],
                        randomFromSegmentStyle, num_pose_keypoints,
                        pose_dataset_dir,
                        num_object_catagories,
                        objects_dataset_dir),
      num_samples=SPLITS_TO_SIZES[split_name],
      items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
      num_classes=num_classes,
      labels_to_names=labels_to_names)


================================================
FILE: src/eval/__init__.py
================================================


================================================
FILE: src/eval/cap_eval_utils.py
================================================
# --------------------------------------------------------
# Written by Saurabh Gupta
# Modified by Ishan Misra
# rgirdhar: Obtained on March-09-2017 from
# https://github.com/imisra/latent-noise-icnm/blob/master/cap_eval_utils.py
# --------------------------------------------------------
import numpy as np
from scipy.interpolate import interp1d

from IPython.core.debugger import Tracer
import code

def calc_pr_ovr(counts, out, K):
  """
  [P, R, score, ap] = calc_pr_ovr(counts, out, K)
  Input    :
    counts : number of occurrences of this word in the ith image
    out    : score for this image
    K      : number of references
  Output   :
    P, R   : precision and recall
    score  : score which corresponds to the particular precision and recall
    ap     : average precision
  """
  K = np.float64(K)
  tog = np.hstack((counts[:,np.newaxis].astype(np.float64), out[:, np.newaxis].astype(np.float64)))
  ind = np.argsort(out)
  ind = ind[::-1]
  score = np.array([tog[i,1] for i in ind])
  sortcounts = np.array([tog[i,0] for i in ind])

  tp = sortcounts*(1.-1./K);
  fp = sortcounts.copy();
  for i in xrange(sortcounts.shape[0]):
    if sortcounts[i] > 1:
      fp[i] = 0.;
    elif sortcounts[i] == 0:
      fp[i] = 1.;
    elif sortcounts[i] == 1:
      fp[i] = 1./K;
  
  P = np.cumsum(tp)/(np.cumsum(tp) + np.cumsum(fp));

  # c = accumarray(sortcounts(:)+1, 1);
  c = [np.sum(np.array(sortcounts) == i) for i in xrange(int(max(sortcounts)+1))]
  ind = np.array(range(0, len(c)));
  numinst = ind*c*(K-1.)/K;
  numinst = np.sum(numinst, axis = 0)
  R = np.cumsum(tp)/numinst
  
  ap = voc_ap(R,P)
  return P, R, score, ap


def calc_pr_ovr_noref(counts, out):
  """
  [P, R, score, ap] = calc_pr_ovr(counts, out, K)
  Input    :
    counts : number of occurrences of this word in the ith image
    out    : score for this image
    K      : number of references
  Output   :
    P, R   : precision and recall
    score  : score which corresponds to the particular precision and recall
    ap     : average precision
  """ 
  #binarize counts
  counts = np.array(counts > 0, dtype=np.float32);
  tog = np.hstack((counts[:,np.newaxis].astype(np.float64), out[:, np.newaxis].astype(np.float64)))
  ind = np.argsort(out)
  ind = ind[::-1]
  score = np.array([tog[i,1] for i in ind])
  sortcounts = np.array([tog[i,0] for i in ind])

  tp = sortcounts;
  fp = sortcounts.copy();
  for i in xrange(sortcounts.shape[0]):
    if sortcounts[i] >= 1:
      fp[i] = 0.;
    elif sortcounts[i] < 1:
      fp[i] = 1.;
  P = np.cumsum(tp)/(np.cumsum(tp) + np.cumsum(fp));

  numinst = np.sum(counts);

  R = np.cumsum(tp)/numinst

  ap = voc_ap(R,P)
  return P, R, score, ap


def voc_ap(rec, prec):
  """
  ap = voc_ap(rec, prec)
  Computes the AP under the precision recall curve.
  """

  rec = rec.reshape(rec.size,1); prec = prec.reshape(prec.size,1)
  z = np.zeros((1,1)); o = np.ones((1,1));
  mrec = np.vstack((z, rec, o))
  mpre = np.vstack((z, prec, z))
  for i in range(len(mpre)-2, -1, -1):
    mpre[i] = max(mpre[i], mpre[i+1])

  I = np.where(mrec[1:] != mrec[0:-1])[0]+1;
  ap = 0;
  for i in I:
    ap = ap + (mrec[i] - mrec[i-1])*mpre[i];
  return ap

def compute_precision_score_mapping(thresh, prec, score):
  ind = np.argsort(thresh);
  thresh = thresh[ind];
  prec = prec[ind];
  for i in xrange(1, len(prec)):
    prec[i] = max(prec[i], prec[i-1]);
  
  indexes = np.unique(thresh, return_index=True)[1]
  indexes = np.sort(indexes);
  thresh = thresh[indexes]
  prec = prec[indexes]
  
  thresh = np.vstack((min(-1000, min(thresh)-1), thresh[:, np.newaxis], max(1000, max(thresh)+1)));
  prec = np.vstack((prec[0], prec[:, np.newaxis], prec[-1]));
  
  f = interp1d(thresh[:,0], prec[:,0])
  val = f(score)
  return val

def human_agreement(gt, K):
  """
  function [prec, recall] = human_agreement(gt, K)
  """
  c = np.zeros((K+1,1), dtype=np.float64)
  # namespace = globals().copy()
  # namespace.update(locals())
  # code.interact(local=namespace)

  for i in xrange(len(gt)):
    if gt[i]<K+1:
      c[gt[i]] += 1;
  #maxRun = len(gt);  
  # if len(gt) > K+1:
  #   print 'warning: '
  #   maxRun = K+1;
  # for i in xrange(maxRun):
  #   c[gt[i]] += 1;
  
  c = c/np.sum(c);
  ind = np.array(range(len(c)))[:, np.newaxis]

  n_tp = sum(ind*(ind-1)*c)/K;
  n_fp = c[1]/K;
  numinst = np.sum(c * (K-1) * ind) / K;
  prec = n_tp / (n_tp+n_fp);
  recall = n_tp / numinst;
  
  
  return prec, recall

#follows from http://arxiv.org/pdf/1312.4894v2.pdf (Sec 4.2)
def compute_warpstyle_pr(gtLabel, predMat, topK):
  assert gtLabel.shape == predMat.shape, 'gt {}; pred {}'.format(gtLabel.shape, predMat.shape)
  gtLabel = gtLabel.astype(np.float64)
  predMat = predMat.astype(np.float64)
  numTags = gtLabel.shape[1];
  numIm = gtLabel.shape[0];

  #first look at topK predictions per image
  topPreds = np.zeros_like(predMat);
  for imInd in range(numIm):
    topKInds = im_utils.maxk(predMat[imInd,...], topK);
    topPreds[imInd, topKInds] = 1;
  # tb.print_stack();namespace = globals().copy();namespace.update(locals());code.interact(local=namespace)
  gtLabel = (gtLabel > 0).astype(np.float64)
  topPreds = (topPreds > 0).astype(np.float64)
  corrMat = np.logical_and(gtLabel, topPreds).astype(np.float64)
  nc_per_tag = corrMat.sum(axis=0).astype(np.float64);
  ng_per_tag = gtLabel.sum(axis=0).astype(np.float64);
  np_per_tag = topPreds.sum(axis=0).astype(np.float64);
  #mean per-class
  perclass_recall = 0.0;
  perclass_precision = 0.0;
  eps = 1e-6;
  for t in range(numTags):
    cr = nc_per_tag[t]/(ng_per_tag[t]+eps);
    cp = nc_per_tag[t]/(np_per_tag[t]+eps);
    perclass_precision += cp;
    perclass_recall += cr;
  perclass_precision = (1.0/numTags) * perclass_precision;
  perclass_recall = (1.0/numTags) * perclass_recall;

  #overall
  overall_recall = nc_per_tag.sum()/(ng_per_tag.sum()+eps);
  overall_precision = nc_per_tag.sum()/(np_per_tag.sum()+eps);
  return perclass_precision, perclass_recall, overall_precision, overall_recall;

def print_benchmark_latex(evalFile, vocab = None, sortBy = "words", \
  printWords = False, printPos = True, printAgg = False, possOrder=None):
  #evalFile has the following ['details', 'agg', 'vocab', 'imdb'] 
  evalData = sg_utils.load_variables(evalFile);
  if vocab==None:
    vocab = evalData['vocab'];
  if 'details' in evalData:
    details = evalData['details'];
  else:
    details = evalData;
  ap = details['ap'];
  prec_at_human_rec = details['prec_at_human_rec'];
  human_prec = details['prec_at_human_rec'];
  words = vocab['words'];
  ind = 0;
  if possOrder is None:
    possOrder = ['NN', 'VB', 'JJ', 'DT', 'PRP', 'IN', 'other']
  print ' '.join(possOrder);
  for pos in possOrder:
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    ind = np.asarray(ind,dtype=np.int32)
    if any( np.isnan(ap[0,ind] )):
       #print 'nan numbers ... skipping them for mean'
       print 'nan numbers ... setting them to zero for mean stats'
       ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0;
    print '%.1f &'%(100*np.mean(ap[0,ind])),
  print '%.1f & &'%(100*np.mean(ap[0, :]))
  for pos in possOrder:
    ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
    ind = np.asarray(ind,dtype=np.int32)
    if any( np.isnan(prec_at_human_rec[0,ind] )) or \
       any( np.isnan(human_prec[0,ind] )) :
       #print 'nan numbers ... skipping them for mean'
       print 'nan numbers ... setting them to zero for mean stats'
       prec_at_human_rec[0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0;
       human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0;
    print '%.1f &'%(100*np.mean(prec_at_human_rec[0,ind])),
  print '%.1f \\\\'%(100*np.mean(prec_at_human_rec[0, :]))
  

def print_benchmark_plain(evalFile, vocab = None, \
  sortBy = "words", printWords = False, printPos = True, printAgg = False):
  #evalFile has the following ['details', 'agg', 'vocab', 'imdb'] 
  evalData = sg_utils.load_variables(evalFile);
  if vocab==None:
    vocab = evalData['vocab'];
  if 'details' in evalData:
    details = evalData['details'];
  else:
    details = evalData;
  ap = details['ap'];
  prec_at_human_rec = details['prec_at_human_rec'];
  human_prec = details['prec_at_human_rec'];
  words = vocab['words'];
  ind = 0;

  if sortBy == "words":
    srtInds = np.argsort(words);
  elif sortBy == "ap":
    srtInds = np.argsort(ap);
    srtInds = srtInds[0];
    srtInds = srtInds[::-1];
  if printWords == True:
    print "{:>50s}".format("-"*50)
    print "{:^50s}".format("Word metrics")
    print "{:>50s}".format("-"*50)
    print "{:>15s} {:>8s} {:>6s} :     {:^5s}     {:^5s}". \
      format("Words","POS","Counts","mAP", "p@H")
    for i in srtInds:
      print "{:>15s} {:>8s} {:6d} :     {:5.2f}     {:5.2f}". \
        format(words[i], vocab['poss'][i], vocab['counts'][i], 100*np.mean(ap[0, i]), 100*np.mean(prec_at_human_rec[0, i]));

  if printPos:
    print "{:>50s}".format("-"*50)
    print "{:^50s}".format("POS metrics")
    print "{:>50s}".format("-"*50)
    print "{:>15s} :     {:^5s}     {:^5s}     {:^5s}". \
    format("POS", "mAP", "p@H", "h")

    for pos in list(set(vocab['poss'])):
      ind = [i for i,x in enumerate(vocab['poss']) if pos == x]
      ind = np.asarray(ind)
      if any( np.isnan(ap[0,ind] )) or \
         any( np.isnan(prec_at_human_rec[0,ind] )) or \
         any( np.isnan(human_prec[0,ind] )) :
         print 'nan numbers ... setting them to zero for mean stats'
         ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0;
         prec_at_human_rec[0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0;
         human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0;
      print "{:>11s} [{:4d}]:     {:5.2f}     {:5.2f}     {:5.2f}". \
        format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))

  if printAgg:
    print "{:>50s}".format("-"*50)
    print "{:^50s}".format("Agg metrics")
    print "{:>50s}".format("-"*50)
    print "{:>15s} :     {:^5s}     {:^5s}     {:^5s}". \
      format("agg", "mAP", "p@H", "h")
    pos = 'all';
    ind = srtInds;
    ind = np.asarray(ind);
    if any( np.isnan(ap[0,ind] )) or \
         any( np.isnan(prec_at_human_rec[0,ind] )) or \
         any( np.isnan(human_prec[0,ind] )) :
         print 'nan numbers ... setting them to zero for mean stats'
         ap[0, ind[np.where(np.isnan(ap[0, ind]))]] = 0;
         prec_at_human_rec[0, ind[np.where(np.isnan(prec_at_human_rec[0, ind]))]] = 0;
         human_prec[0, ind[np.where(np.isnan(human_prec[0, ind]))]] = 0;
    print "{:>11s} [{:^4d}]     :     {:^5.2f}     {:5.2f}     {:5.2f}". \
      format(pos, len(ind), 100*np.mean(ap[0, ind]), 100*np.mean(prec_at_human_rec[0, ind]), \
        100*np.mean(human_prec[0, ind]))


================================================
FILE: src/eval/utils.py
================================================
from eval.cap_eval_utils import calc_pr_ovr_noref
import numpy as np

def compute_map(all_logits, all_labels):
  num_classes = all_logits.shape[1]
  APs = []
  for cid in range(num_classes):
    this_logits = all_logits[:, cid]
    this_labels = (all_labels == cid).astype('float32')
    if np.sum(this_labels) == 0:
      print('No positive videos for class {}. Ignoring...'.format(cid))
      continue
    _, _, _, ap = calc_pr_ovr_noref(this_labels, this_logits)
    APs.append(ap)
  mAP = np.mean(APs)
  return mAP, APs


================================================
FILE: src/eval.py
================================================
"""Generic evaluation script that evaluates a model using a given dataset."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
import argparse
import sys
import tensorflow as tf
import pprint
import os
import math
import cv2
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import pdb

from datasets import dataset_factory
sys.path.append('../models/slim')
from nets import nets_factory
from preprocessing import preprocessing_factory
from config import cfg, cfg_from_file, cfg_from_list, get_output_dir
from eval.utils import compute_map
from preprocess_pipeline import get_input

slim = tf.contrib.slim

def parse_args():
  """
  Parse input arguments
  """
  parser = argparse.ArgumentParser(description='Train a keypoint regressor.')
  parser.add_argument('--cfg', dest='cfg_file',
                      help='optional config file',
                      default=None, type=str)
  parser.add_argument('--gpu', dest='gpu',
                      help='GPU to use for running this.',
                      default='0', type=str)
  parser.add_argument('--save', dest='save', action='store_const',
                      const=True, default=False,
                      help='Set to save the features. Works only in mAP mode. '
                           '(Set in cfg).')
  parser.add_argument('--outfpath', default=None,
                      help='(Optional) Give a custom path to save the features. '
                           'By def. picks a path in ckpt directory.')
  parser.add_argument('--preprocs', default=[], nargs='*',
                      help='Set additional preprocs to do when testing. Eg. '
                           'can put \'flips\'. This will flip images before '
                           'pushing through the network. Can be useful for '
                           'late fusion of multiple features.')
  parser.add_argument('--ept', dest='ept', nargs='+', type=str, default=[],
                      help='Optional end point to store. '
                           'By def store the softmax logits.')
  parser.add_argument('--split_name', default=None, type=str,
                      help='Set to change the dataset split to run on. '
                           'Eg, \'train\' or \'test\'.')
  parser.add_argument('--frames_per_video', default=None, type=int,
                      help='Set to change the '
                           'cfg.TRAIN.VIDEO_FRAMES_PER_VIDEO.')
  parser.add_argument('--dataset_list_dir', default=None, type=str,
                      help='Set to change the train_test_lists dir.')
  args = parser.parse_args()
  if args.cfg_file is not None:
    cfg_from_file(args.cfg_file)

  # Change config for some options
  if args.split_name is not None:
    cfg.TEST.DATASET_SPLIT_NAME = args.split_name
  if args.frames_per_video is not None:
    cfg.TEST.VIDEO_FRAMES_PER_VIDEO = args.frames_per_video
  if args.outfpath is not None:
    args.save = True
  return args, cfg


def mkdir_p(dpath):
  try:
    os.makedirs(dpath)
  except:
    pass


def main():
  args, cfg = parse_args()
  train_dir = get_output_dir('default' if args.cfg_file is None
                             else args.cfg_file)
  os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

  print('Using Config:')
  pprint.pprint(cfg)

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    tf_global_step = slim.get_or_create_global_step()

    ######################
    # Select the dataset #
    ######################
    kwargs = {}
    if cfg.TEST.VIDEO_FRAMES_PER_VIDEO > 1:
      kwargs['num_samples'] = cfg.TEST.VIDEO_FRAMES_PER_VIDEO
      kwargs['modality'] = cfg.INPUT.VIDEO.MODALITY
      kwargs['split_id'] = cfg.INPUT.SPLIT_ID
    if args.dataset_list_dir is not None:
      kwargs['dataset_list_dir'] = args.dataset_list_dir
    elif cfg.DATASET_LIST_DIR != '':
      kwargs['dataset_list_dir'] = cfg.DATASET_LIST_DIR
    if cfg.INPUT_FILE_STYLE_LABEL != '':
      kwargs['input_file_style_label'] = cfg.INPUT_FILE_STYLE_LABEL
    dataset, num_pose_keypoints = dataset_factory.get_dataset(
        cfg.DATASET_NAME, cfg.TEST.DATASET_SPLIT_NAME, cfg.DATASET_DIR,
        **kwargs)

    ####################
    # Select the model #
    ####################
    network_fn = nets_factory.get_network_fn(
        cfg.MODEL_NAME,
        num_classes=dataset.num_classes,
        num_pose_keypoints=num_pose_keypoints,
        is_training=False,
        cfg=cfg)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        shuffle=False,
        num_epochs=1,
        common_queue_capacity=2 * cfg.TEST.BATCH_SIZE,
        common_queue_min=cfg.TEST.BATCH_SIZE)
    [image, action_label] = get_input(provider, cfg,
                                      ['image', 'action_label'])
    # label -= FLAGS.labels_offset

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = cfg.MODEL_NAME
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=False)

    eval_image_size = cfg.TRAIN.IMAGE_SIZE or network_fn.default_image_size

    image = image_preprocessing_fn(
      image, eval_image_size, eval_image_size,
      resize_side_min=cfg.TRAIN.RESIZE_SIDE,
      resize_side_max=cfg.TRAIN.RESIZE_SIDE)

    # additional preprocessing as required
    if 'flips' in args.preprocs:
      tf.logging.info('Flipping all images while testing!')
      image = tf.stack([
        tf.image.flip_left_right(el) for el in tf.unstack(image)])

    images, action_labels = tf.train.batch(
      [image, action_label],
      batch_size=cfg.TEST.BATCH_SIZE,
      # following is because if there are more, the order of batch can be
      # different due to different speed... so avoid that
      # http://stackoverflow.com/questions/35001027/does-batching-queue-tf-train-batch-not-preserve-order#comment57731040_35001027
      # num_threads=1 if args.save else cfg.NUM_PREPROCESSING_THREADS,
      num_threads=1,  # The above was too unsafe as sometimes I forgot --save
                      # and it would just randomize the whole thing.
                      # This is very important so
                      # shifting to this by default. Better safe than sorry.
      allow_smaller_final_batch=True if cfg.TEST.VIDEO_FRAMES_PER_VIDEO == 1
                                else False,  # because otherwise we need to
                                             # average logits over the frames,
                                             # and that needs first dimensions
                                             # to be fully defined
      capacity=5 * cfg.TEST.BATCH_SIZE)

    ####################
    # Define the model #
    ####################
    logits, end_points = network_fn(images)
    end_points['images'] = images

    if cfg.TEST.MOVING_AVERAGE_DECAY:
      variable_averages = tf.train.ExponentialMovingAverage(
          cfg.TEST.MOVING_AVERAGE_DECAY, tf_global_step)
      variables_to_restore = variable_averages.variables_to_restore(
          slim.get_model_variables())
      variables_to_restore[tf_global_step.op.name] = tf_global_step
    else:
      variables_to_restore = slim.get_variables_to_restore()

    predictions = tf.argmax(logits, 1)
    if cfg.TRAIN.LOSS_FN_ACTION.startswith('multi-label'):
      logits = tf.sigmoid(logits)
    else:
      logits = tf.nn.softmax(logits, -1)
    labels = tf.squeeze(action_labels)
    end_points['labels'] = labels

    # Define the metrics:
    names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
        'Accuracy': slim.metrics.streaming_accuracy(predictions, labels),
        # 'Recall@5': slim.metrics.streaming_recall_at_k(
        #     logits, labels, 5),
    })

    # Print the summaries to screen.
    for name, value in names_to_values.iteritems():
      summary_name = 'eval/%s' % name
      op = tf.summary.scalar(summary_name, value, collections=[])
      op = tf.Print(op, [value], summary_name)
      tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

    # TODO(sguada) use num_epochs=1
    if cfg.TEST.MAX_NUM_BATCHES:
      num_batches = cfg.TEST.MAX_NUM_BATCHES
    else:
      # This ensures that we make a single pass over all of the data.
      num_batches = math.ceil(dataset.num_samples / float(cfg.TEST.BATCH_SIZE))

    # just test the latest trained model
    checkpoint_path = cfg.TEST.CHECKPOINT_PATH or train_dir
    if tf.gfile.IsDirectory(checkpoint_path):
      checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
    else:
      checkpoint_path = checkpoint_path
    checkpoint_step = int(checkpoint_path.split('-')[-1])

    tf.logging.info('Evaluating %s' % checkpoint_path)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    summary_writer = tf.summary.FileWriter(logdir=train_dir)

    if cfg.TEST.EVAL_METRIC == 'mAP' or args.save or args.ept:
      from tensorflow.python.training import supervisor
      from tensorflow.python.framework import ops
      import h5py
      saver = tf.train.Saver(variables_to_restore)
      sv = supervisor.Supervisor(graph=ops.get_default_graph(),
                                 logdir=None,
                                 summary_op=None,
                                 summary_writer=summary_writer,
                                 global_step=None,
                                 saver=None)
      all_labels = []
      end_points['logits'] = logits
      end_points_to_save = args.ept + ['logits']
      end_points_to_save = list(set(end_points_to_save))
      all_feats = dict([(ename, []) for ename in end_points_to_save])
      with sv.managed_session(
          '', start_standard_services=False,
          config=config) as sess:
        saver.restore(sess, checkpoint_path)
        sv.start_queue_runners(sess)
        for j in tqdm(range(int(math.ceil(num_batches)))):
          feats = sess.run([
            action_labels,
            [end_points[ename] for ename in end_points_to_save]])
          all_labels.append(feats[0])
          for ept_id, ename in enumerate(end_points_to_save):
            all_feats[ename].append(feats[1][ept_id])
      APs = []
      all_labels = np.concatenate(all_labels)
      if args.save or args.ept:
        res_outdir = os.path.join(train_dir, 'Features/')
        mkdir_p(res_outdir)
        outfpath = args.outfpath or os.path.join(
          res_outdir, 'features_ckpt_{}_{}.h5'.format(
          cfg.TEST.DATASET_SPLIT_NAME,
          checkpoint_step))
        print('Saving the features/logits/labels to {}'.format(outfpath))
        with h5py.File(outfpath, 'a') as fout:
          for ename in end_points_to_save:
            if ename in fout:
              tf.logging.warning('Deleting {} from output HDF5 to write the '
                                 'new features.'.format(ename))
              del fout[ename]
            if ename == 'labels':
              feat_to_save = np.array(all_feats[ename])
            else:
              feat_to_save = np.concatenate(all_feats[ename])
            try:
              fout.create_dataset(
                ename, data=feat_to_save,
                compression='gzip', compression_opts=9)
            except:
              pdb.set_trace()  # manually deal with it and continue
          if 'labels' in fout:
            del fout['labels']
          fout.create_dataset(
            'labels', data=all_labels,
            compression='gzip', compression_opts=9)

      if args.ept:
        tf.logging.info('Evaluation had --ept passed in. '
                        'This indicates script was used for feature '
                        'extraction. Hence, not performing any evaluation.')
        return
      # Evaluation code
      all_logits = np.concatenate(all_feats['logits'])
      acc = np.mean(
        all_logits.argmax(axis=1) == all_labels)
      mAP = compute_map(all_logits, all_labels)[0]
      print('Mean AP: {}'.format(mAP))
      print('Accuracy: {}'.format(acc))
      summary_writer.add_summary(tf.Summary(value=[
        tf.Summary.Value(
          tag='mAP/{}'.format(cfg.TEST.DATASET_SPLIT_NAME),
          simple_value=mAP)]),
        global_step=checkpoint_step)
      summary_writer.add_summary(tf.Summary(value=[
        tf.Summary.Value(
          tag='Accuracy/{}'.format(cfg.TEST.DATASET_SPLIT_NAME),
          simple_value=acc)]),
        global_step=checkpoint_step)
    else:
      slim.evaluation.evaluate_once(
        master='',
        checkpoint_path=checkpoint_path,
        logdir=train_dir,
        num_evals=num_batches,
        eval_op=names_to_updates.values(),
        variables_to_restore=variables_to_restore,
        session_config=config)


if __name__ == '__main__':
  main()


================================================
FILE: src/loss.py
================================================
import tensorflow as tf
slim = tf.contrib.slim

def gen_losses(
  labels_action, logits_action, loss_type_action, num_action_classes,
  action_loss_wt,
  labels_pose, logits_pose, loss_type_pose, labels_pose_valid, pose_loss_wt,
  end_points, cfg):

  with tf.name_scope('LossFn'):
    if loss_type_pose and logits_pose.get_shape().as_list()[-1] > 0:
      with tf.name_scope('PoseLoss'):
        # Loss over the pose
        if labels_pose.get_shape().as_list() != \
           logits_pose.get_shape().as_list():
          tf.logging.info('Sizes of logits {} and labels {} are different. '
                          'Change the cfg.FINAL_POSE_HMAP_SIDE to avoid '
                          'a resize operation.'.format(
                            logits_pose.get_shape().as_list(),
                            labels_pose.get_shape().as_list()))
          labels_pose = tf.image.resize_images(
            labels_pose, logits_pose.get_shape().as_list()[-3:-1])
        # ignore the unknown channels, set those channels to 0 to incur no loss

        # Following needs defining the gradient for this...
        # labels_pose = zero_out_channels(labels_pose, labels_pose_valid)
        # logits_pose = zero_out_channels(logits_pose, labels_pose_valid)

        with tf.name_scope('ValidPoseLoss'):
          channels_valid = tf.unstack(labels_pose_valid, axis=-1)
          channels_logits = tf.unstack(logits_pose, axis=-1)
          channels_labels = tf.unstack(labels_pose, axis=-1)
          loss_elements = []
          pose_loss_mask = []
          for v, lbl, lgt in zip(channels_valid, channels_logits, channels_labels):
            if cfg.TRAIN.LOSS_FN_POSE_SAMPLED:
              # To make it harder
              neg_areas = tf.equal(lgt, 0)
              pos_areas = tf.greater(lgt, 0)
              total_area = lgt.shape.num_elements()
              pos_area_ratio = tf.reduce_sum(tf.to_float(pos_areas)) / total_area
              # select that much of neg area
              neg_areas_selected = tf.to_float(tf.less(tf.random_uniform(
                tf.shape(lgt), 0, 1.0),
                pos_area_ratio)) * tf.to_float(neg_areas)
              # keep all positive pixels
              mask = tf.greater(neg_areas_selected + tf.to_float(
                tf.greater(lbl, 0)), 0)
              mask = tf.to_float(mask)
              lgt = lgt * mask  # just do loss over this subset
              lbl = lbl * mask
              loss_val = 0.5 * tf.reduce_mean(tf.square(lbl - lgt), axis=[1,2])
            else:
              mask = tf.ones(tf.shape(lgt))
              loss_val = 0.5 * tf.reduce_sum(
                tf.square(lbl - lgt), axis=[1,2]) / tf.reduce_sum(mask)
            pose_loss_mask.append(tf.expand_dims(mask, -1))
            if loss_type_pose == 'l2':
              L = tf.reduce_mean(tf.where(
                v,
                loss_val,
                [0] * v.get_shape().as_list()[0]))
            elif loss_type_pose == '':
              L = 0
            else:
              raise ValueError('Invalid loss {}'.format(loss_type_pose))
            loss_elements.append(L)
        end_points['PoseLossMask'] = tf.concat(pose_loss_mask, axis=-1)
        tot_loss = tf.reduce_sum(loss_elements, name='ValidPoseEucLoss')
        tf.losses.add_loss(tot_loss * pose_loss_wt)

    with tf.name_scope('ActionLoss'):
      # TODO (rgirdhar): Add the option of having -1 label, so ignore that one
      if loss_type_action == 'softmax-xentropy':
        tf.losses.softmax_cross_entropy(
          onehot_labels=slim.one_hot_encoding(
            labels_action,
            num_action_classes),
          logits=logits_action,
          weights=action_loss_wt)
      elif loss_type_action == 'l2':
        tf.losses.mean_squared_error(
          labels=slim.one_hot_encoding(
            labels_action,
            num_action_classes),
          predictions=logits_action,
          weights=action_loss_wt)
      elif loss_type_action == 'multi-label':
        labels_action = tf.to_float(labels_action)
        # labels_action = tf.Print(
        #   labels_action, [labels_action, tf.reduce_sum(labels_action, 1)],
        #   "Label action:")
        loss = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(
          targets=labels_action,
          logits=logits_action,
          pos_weight=10))
        tf.losses.add_loss(loss)
      elif loss_type_action == 'multi-label-2':
        tf.losses.sigmoid_cross_entropy(
          multi_class_labels=labels_action,
          logits=logits_action)
      elif loss_type_action == '':
        tf.logging.info('No loss on action')
      else:
        raise ValueError('Unrecognized loss {}'.format(loss_type_action))


================================================
FILE: src/preprocess_pipeline.py
================================================
import tensorflow as tf
from custom_ops.custom_ops_factory import pose_to_heatmap, render_pose, \
    render_objects, extract_glimpse

def _resize_if_needed(image, max_wd):
  with tf.name_scope('LimitMaxSizeOriginalImage'):
    im_ht = tf.shape(image)[-3]
    im_wd = tf.shape(image)[-2]
    new_ht = tf.cast(im_ht, tf.float32) * (
      tf.cast(max_wd, tf.float32) / tf.cast(im_wd, tf.float32))
    new_ht = tf.cast(new_ht, tf.int64)
    image = tf.cond(
      tf.greater(im_wd, max_wd),
      lambda: tf.image.resize_images(
        image, tf.cast([new_ht, max_wd], tf.int32)),
      lambda: tf.cast(image, tf.float32))
    image = tf.cast(image, tf.uint8)
  return image


def _replay_augmentation(H, aug_info):
  # use the augmentation info from the original image to identically transform
  # the heatmap H
  with tf.name_scope('ReplayAugmentation'):
    ## 1. Crop
    H_wd = tf.shape(H)[-2]
    H_ht = tf.shape(H)[-3]
    num_channels = tf.shape(H)[-1]
    orig_wd = aug_info['image_shape'][-2]
    orig_ht = aug_info['image_shape'][-3]
    ratio_x = tf.to_float(H_wd) / tf.to_float(orig_wd)
    ratio_y = tf.to_float(H_ht) / tf.to_float(orig_ht)
    start_points = [tf.to_float(aug_info['crop_info'][0]) * ratio_y,
                    tf.to_float(aug_info['crop_info'][1]) * ratio_x]
    edge_sides = [tf.to_float(aug_info['crop_info'][2]) * ratio_y,
                  tf.to_float(aug_info['crop_info'][3]) * ratio_x]
    H = tf.slice(H,
                 tf.concat([tf.to_int32(start_points), [0,]], axis=-1),
                 tf.concat([tf.to_int32(edge_sides), [num_channels,]], axis=-1))
    ## 2. Flip
    H = tf.cond(
      aug_info['whether_flip'],
      lambda: tf.image.flip_left_right(H),
      lambda: H)
  return H


def _get_other_items(provider, stuff, existing_items, new_items):
  res = []
  for item in new_items:
    if item in existing_items:
      res.append(stuff[existing_items.index(item)])
    else:
      res.append(provider.get([item])[0])
  return res


def get_input(provider, cfg, items):
  stuff = provider.get(items)
  if 'image' in items:
    img_pos = items.index('image')
    image = stuff[img_pos]
    # MPII has some huge images, which makes further processing too slow.
    # So, make image smaller if needed
    # IMP NOTE: Do not change the orig_im_ht or orig_im_wd, they are used for plotting
    # the pose and the pose is defined w.r.t to the original image size
    # Pose Label format: [16x3xn,] : x1,y1,score/isvisible...
    # if x1 and y1 are both -1, that point is not visible/labeled
    image = _resize_if_needed(image, cfg.MAX_INPUT_IMAGE_SIZE)
    if cfg.INPUT.INPUT_IMAGE_FORMAT.startswith('rendered-pose') or \
       cfg.INPUT.INPUT_IMAGE_FORMAT.startswith('pose-glimpse'):
      pose_label, orig_im_ht, orig_im_wd = _get_other_items(
        provider, stuff, items, ['pose', 'im_ht', 'im_wd'])
      # pose_label = tf.Print(pose_label, [pose_label], "Pose Label: ")
      pose_label_was_list = True
      if not isinstance(pose_label, list):
        pose_label_was_list = False
        pose_label = [pose_label]

      if cfg.INPUT.INPUT_IMAGE_FORMAT.startswith('rendered-pose'):
        rendered_pose = tf.stack([render_pose(
          pose_label[i], orig_im_ht, orig_im_wd,
          # TODO: the following tf.shape is going to read the image irrespective
          # of whether needed or not to compute shape. However the code isn't
          # slow so not worrying about it at the moment. But fix it.
          tf.cast(tf.shape(image)[-2], tf.int64),
          out_type=cfg.INPUT.INPUT_IMAGE_FORMAT_POSE_RENDER_TYPE) for
          i in range(len(pose_label))])
        rendered_pose = tf.image.resize_images(
          rendered_pose, tf.shape(image)[-3:-1])
        if not pose_label_was_list:
          rendered_pose = rendered_pose[0]
      else:
        image_glimpse = tf.stack([extract_glimpse(
          image, pose_label[i], orig_im_ht, orig_im_wd,
          cfg.TRAIN.IMAGE_SIZE if cfg.INPUT.POSE_GLIMPSE_RESIZE else -1,
          cfg.INPUT.POSE_GLIMPSE_CONTEXT_RATIO,
          cfg.INPUT.POSE_GLIMPSE_PARTS_KEEP) for
          i in range(len(pose_label))])


    if cfg.INPUT.INPUT_IMAGE_FORMAT.startswith('rendered-objects'):
      objects_label, orig_im_ht, orig_im_wd = _get_other_items(
        provider, stuff, items, ['objects', 'im_ht', 'im_wd'])
      # pose_label = tf.Print(pose_label, [pose_label], "Pose Label: ")
      rendered_objects = tf.stack([render_objects(
        objects_label[i], orig_im_ht, orig_im_wd,
        cfg.TRAIN.IMAGE_SIZE, out_channels=80) for
        i in range(len(objects_label))])

    # Final output
    if cfg.INPUT.INPUT_IMAGE_FORMAT == 'rendered-pose':
      image = rendered_pose
      # debugging
      # image = tf.tile(tf.reduce_mean(
      #   image, axis=-1, keep_dims=True), [1, 1, 1, 3])
    elif cfg.INPUT.INPUT_IMAGE_FORMAT == 'rendered-pose-on-image':
      image = tf.cast(tf.to_float(image) * 0.5 + \
                      tf.to_float(rendered_pose) * 0.5, tf.uint8)
    elif cfg.INPUT.INPUT_IMAGE_FORMAT == 'rendered-objects':
      image = rendered_objects
      # To debug
      # image = tf.cast(
      #   tf.to_float(image) * 0.0 + \
      #   tf.to_float(tf.image.resize_images(
      #     tf.reduce_mean(rendered_objects, axis=-1, keep_dims=True),
      #     tf.shape(image)[-3:-1])) * 1.0,
      #   tf.uint8)
    elif cfg.INPUT.INPUT_IMAGE_FORMAT == 'pose-glimpse':
      image = image_glimpse
    stuff[img_pos] = image
  return stuff


def train_preprocess_pipeline(provider, cfg, network_fn, num_pose_keypoints,
                              image_preprocessing_fn):

  [image, pose_label, orig_im_ht, orig_im_wd, action_label] = get_input(
    provider, cfg, ['image', 'pose', 'im_ht', 'im_wd', 'action_label'])
  # for consistency between video and image datasets, convert image datasets to
  # 1-frame videos
  if image.get_shape().ndims == 3:
    image = tf.expand_dims(image, 0)
    pose_label = [pose_label]
  train_image_size = cfg.TRAIN.IMAGE_SIZE or network_fn.default_image_size

  # joint preprocessing
  combined_preproc_flag = False
  with tf.name_scope('CombinedPreproc'):
    if num_pose_keypoints > 0 and not cfg.TRAIN.LOSS_FN_POSE == '':
      combined_preproc_flag = True
      all_pose_label_hmap = []
      all_pose_label_valid = []
      for pl in pose_label:
        pose_label_hmap, pose_label_valid = pose_to_heatmap(
          pl, orig_im_ht, orig_im_wd,
          # small enough for preproc, big enough to see
          max(200, cfg.TRAIN.FINAL_POSE_HMAP_SIDE),
          out_channels=num_pose_keypoints,
          # if needed, do using a conv layer with fixed kernel
          # would be faster on GPU
          do_gauss_blur=False,
          marker_wd_ratio=cfg.HEATMAP_MARKER_WD_RATIO)  # larger => large targets
        all_pose_label_hmap.append(pose_label_hmap)
        all_pose_label_valid.append(pose_label_valid)
      # concat on last axis for now (for preproc), will stack it (like the
      # valid labels) after that.
      pose_label_hmap = tf.concat(all_pose_label_hmap, axis=-1)
      pose_label_valid = tf.stack(all_pose_label_valid)

    # rgirdhar NOTE: This is the most expensive CPU part. My perf was super
    # slow with the output image sizes being 450x, because it'd first resize
    # the smallest dimension to 512 or so, and then take a 450 crop from that.
    # Doing that over RGB+heatmap channels was super slow, and is fixed when
    # using small sizes (now, 256 & 224 works well). Another issue was the
    # number of INTRA and INTER PARALLELIZATION THREADS, set in the train.py
    # which sped up a lot. Also saves from the machines getting stuck by
    # controlling the number of threads while giving better performance. For
    # me, the Inter=12 and Intra=4 worked well.
    preproc_info = {}
    # since images is 4D vector, need to reshape to pass it through preproc
    frames_per_video = image.get_shape().as_list()[0]
    image = tf.concat(tf.unstack(image), axis=-1)
    image = image_preprocessing_fn(
      image,
      train_image_size,
      train_image_size,
      resize_side_min=cfg.TRAIN.RESIZE_SIDE,
      resize_side_max=cfg.TRAIN.RESIZE_SIDE,
      preproc_info=preproc_info,
      modality=cfg.INPUT.VIDEO.MODALITY)  # works for image too, rgb by def
    image = tf.stack(tf.split(
      image, frames_per_video,
      axis=image.get_shape().ndims-1))
    if combined_preproc_flag:
      pose_label_hmap = _replay_augmentation(pose_label_hmap, preproc_info)
      pose_label_hmap = tf.image.convert_image_dtype(pose_label_hmap,
                                                     tf.float32)

      # undo any value scaling that happened while preproc
      pose_label_hmap -= tf.reduce_min(pose_label_hmap)
      pose_label_hmap /= (tf.reduce_max(pose_label_hmap) + cfg.EPS)
      # reduce the size of heatmaps to reduce memory usage in queues
      pose_label_hmap = tf.image.resize_images(
        pose_label_hmap,
        [cfg.TRAIN.FINAL_POSE_HMAP_SIDE,
         cfg.TRAIN.FINAL_POSE_HMAP_SIDE])
      pose_label_hmap.set_shape([
        pose_label_hmap.get_shape().as_list()[0],
        pose_label_hmap.get_shape().as_list()[1],
        num_pose_keypoints * frames_per_video])
      pose_label_hmap = tf.stack(tf.split(
        pose_label_hmap, frames_per_video,
        axis=pose_label_hmap.get_shape().ndims-1))
    else:
      pose_label_hmap = tf.zeros((0,))  # dummy value, not used
      pose_label_valid = tf.zeros((0,))  # dummy value, not used

  return image, pose_label_hmap, pose_label_valid, action_label


================================================
FILE: src/restore/__init__.py
================================================


================================================
FILE: src/restore/model_restorer.py
================================================
import numpy as np
import h5py

from tensorflow.contrib import slim
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python import pywrap_tensorflow
import tensorflow as tf
import var_name_mapper


def restore_model(checkpoint_path,
                  variables_to_restore,
                  ignore_missing_vars=False,
                  var_name_mapper_type=None):
  all_ops = []
  checkpoint_variables = variables_to_restore
  if checkpoint_path.endswith('.npy'):
    vars_to_restore_names = [
      el.name for el in checkpoint_variables]
    key_name_mapper = var_name_mapper.map(var_name_mapper_type)
    init_weights = np.load(checkpoint_path).item()
    init_weights_final = {}
    vars_restored = []
    for key in init_weights.keys():
      for subkey in init_weights[key].keys():
        final_key_name = key_name_mapper(
          key + '/' + subkey)
        if final_key_name not in vars_to_restore_names:
          logging.info('Not using %s from npy' % final_key_name)
          continue
        target_shape = slim.get_model_variables(
          final_key_name)[0].get_shape().as_list()
        pretrained_wts = init_weights[key][subkey].copy()
        target_shape_squeezed = np.delete(
          target_shape, np.where(np.array(target_shape) == 1))
        pretrained_shape_squeezed = np.delete(
          pretrained_wts.shape, np.where(np.array(pretrained_wts.shape) == 1))

        go_ahead = False  # whether or not I'll be able to copy these weights
        if np.any(target_shape_squeezed !=
                  pretrained_shape_squeezed):
          logging.info('Shape mismatch var: %s from npy [%s vs %s]. ' % (
                       final_key_name, target_shape,
                       pretrained_wts.shape))
          if pretrained_shape_squeezed[-2] != target_shape_squeezed[-2]:
            logging.info('Trying repeating channels to make it similar.')
            pretrained_wts = np.repeat(
              np.mean(pretrained_wts, axis=-2, keepdims=True),
              repeats=target_shape_squeezed[-2],
              axis=-2)
            if np.all(target_shape_squeezed == pretrained_wts.shape):
              logging.info('Success! Copying the hacked weights.')
              go_ahead = True
            else:
              logging.info('Couldnot match the weights still.')
        else:
          go_ahead = True
        if go_ahead:
          init_weights_final[final_key_name] = \
            pretrained_wts
          vars_restored.append(final_key_name)
    init_weights = init_weights_final
    for v in vars_to_restore_names:
      if v not in vars_restored:
        logging.fatal('No weights found for %s' % v)
        if not ignore_missing_vars:
          raise ValueError()
    all_ops.append(slim.assign_from_values_fn(init_weights))
  else:
    all_ops.append(assign_from_checkpoint_fn(
      checkpoint_path, checkpoint_variables,
      ignore_missing_vars=ignore_missing_vars,
      resize_variables=True))
  def combined(sess):
    for op in all_ops:
      op(sess)
  return combined


def assign_from_checkpoint_fn(model_path, var_list, ignore_missing_vars=False,
                              reshape_variables=False, resize_variables=False):
  """Modified function from
  https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/framework/python/ops/variables.py
  Mod by rgirdhar to allow for repeating the channels dimension in case a layer
  does not match. It's useful for setting the first layer in flow models for
  videos. Does this only when resize_variables is True.
  """
  """Returns a function that assigns specific variables from a checkpoint.

  If ignore_missing_vars is True and no variables are found in the checkpoint
  it returns None.

  Args:
    model_path: The full path to the model checkpoint. To get latest checkpoint
        use `model_path = tf.train.latest_checkpoint(checkpoint_dir)`
    var_list: A list of `Variable` objects or a dictionary mapping names in the
        checkpoint to the corresponding variables to initialize. If empty or
        `None`, it would return `no_op(), None`.
    ignore_missing_vars: Boolean, if True it would ignore variables missing in
        the checkpoint with a warning instead of failing.
    reshape_variables: Boolean, if True it would automatically reshape variables
        which are of different shape then the ones stored in the checkpoint but
        which have the same number of elements.
    resize_variables: Boolean, if True it would repeat the channels to match
        the target variable dimensions

  Returns:
    A function that takes a single argument, a `tf.Session`, that applies the
    assignment operation. If no matching variables were found in the checkpoint
    then `None` is returned.

  Raises:
    ValueError: If var_list is empty.
  """
  if not var_list:
    raise ValueError('var_list cannot be empty')
  reader = pywrap_tensorflow.NewCheckpointReader(model_path)
  if isinstance(var_list, dict):
    var_dict = var_list
  else:
    var_dict = {var.op.name: var for var in var_list}
  available_vars = {}
  for var in var_dict:
    if reader.has_tensor(var):
      go_ahead = False
      V = reader.get_tensor(var)
      ckpt_shape = list(V.shape)
      target_shape = var_dict[var].get_shape().as_list()
      if np.all(ckpt_shape == target_shape):
        go_ahead = True
      else:
        if resize_variables:
          logging.warning('Resizing to assign to variable {} to {} from {}'.format(
            var, var_dict[var].get_shape().as_list(),
            V.shape))
          V = np.repeat(
            np.mean(V, axis=-2, keepdims=True),
            repeats=target_shape[-2],
            axis=-2)
          ckpt_shape = list(V.shape)
          if np.all(ckpt_shape == target_shape):
            logging.info('Was able to match shape, so restoring the var :-)')
            go_ahead = True
          else:
            logging.error('Was not able to match shape, not restoring it!!!')
            go_ahead = False
        else:
          logging.error('Found a shape mismatch. Set resize_var to true to '
                        'do a hacky shape copy.')
      if go_ahead:
        available_vars[var] = V
    else:
      logging.warning(
          'Variable %s missing in checkpoint %s', var, model_path)
      if not ignore_missing_vars:
        raise ValueError()
  return slim.assign_from_values_fn(available_vars)


def get_special_assigns(special_assign_vars):
  init_wts = {}
  special_assign_vars = special_assign_vars.split(',')
  for i in range(len(special_assign_vars) / 2):
    var_name = special_assign_vars[2*i]
    file_path = special_assign_vars[2*i+1]
    with h5py.File(file_path, 'r') as fin:
      init_wts[var_name] = fin['feat'].value
    logging.info('Special Assign: %s with a %s array' % (
      var_name, init_wts[var_name].shape))
  return slim.assign_from_values_fn(init_wts)


================================================
FILE: src/restore/var_name_mapper.py
================================================
def map(var_name_mapping):
  map_fn = lambda x: x
  if var_name_mapping == 'placenet365-vgg':
    map_fn = placenet365_vgg_fn
  elif var_name_mapping == 'cuhk-action-vgg':
    map_fn = cuhk_action_vgg
  elif var_name_mapping == 'cuhk-action-tsn':
    map_fn = cuhk_action_tsn
  elif var_name_mapping == 'xiaolonw_action_vgg_hmdb':
    map_fn = xiaolonw_action_vgg_hmdb
  else:
    raise ValueError('Invalid var name mapping')
  return map_fn


def placenet365_vgg_fn(var_name):
  final_name = var_name
  if final_name.split('/')[0].startswith('conv'):
    final_name = \
      final_name.split('/')[0].split('_')[0] + '/' + final_name
  elif final_name.split('/')[0] == 'fc8a':
    final_name = final_name.replace('fc8a', 'fc8')
  return 'vgg_16/' + final_name + ':0'


def cuhk_action_vgg(var_name):
  final_name = var_name
  if final_name.split('/')[0].startswith('conv'):
    final_name = \
      final_name.split('/')[0].split('_')[0] + '/' + final_name
  elif final_name.split('/')[0].startswith('fc8'):
    final_name = final_name.replace(final_name.split('/')[0], 'fc8')
  return 'vgg_16/' + final_name + ':0'


def xiaolonw_action_vgg_hmdb(var_name):
  final_name = var_name
  if final_name.split('/')[0].startswith('conv'):
    final_name = \
      final_name.split('/')[0].split('_')[0] + '/' + final_name
  elif final_name.split('/')[0] == 'fc8_hmdb':
    final_name = final_name.replace('fc8_hmdb', 'fc8')
  return 'vgg_16/' + final_name + ':0'


def cuhk_action_tsn(var_name):
  final_name = var_name
  var_name = final_name.split('/')[-1]
  if final_name.split('/')[0].endswith('_bn'):
    if var_name == 'scale':
      var_name = 'gamma'
    elif var_name == 'shift':
      var_name = 'beta'
    elif var_name == 'mean':
      var_name = 'moving_mean'
    elif var_name == 'variance':
      var_name = 'moving_variance'
    final_name = \
      final_name.split('/')[0][:-3] + '/BatchNorm/' + var_name
  elif final_name.split('/')[0] == 'fc-action':
    final_name = 'Logits/Conv/' + var_name
  else:
    final_name = final_name.split('/')[0] + '/Conv/' + var_name
  block_name = final_name.split('/')[0]
  pos = None
  if block_name.startswith('inception'):
    pos = len('inception_xx')
  elif block_name.startswith('conv'):
    pos = len('convx')
  if pos is not None:
    final_name = final_name[:pos] + '/' + final_name[pos+1:]
  return 'InceptionV2_TSN/' + final_name + ':0'


================================================
FILE: src/train.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import tensorflow as tf
import sys
import pprint
import os
import time
import numpy as np
from datetime import datetime

from tensorflow.python.ops import control_flow_ops
from tensorflow.python.client import timeline
from datasets import dataset_factory
sys.path.append('../models/slim')
from deployment import model_deploy
from nets import nets_factory
from preprocessing import preprocessing_factory

from config import cfg, cfg_from_file, cfg_from_list, get_output_dir
from restore import model_restorer
from loss import gen_losses
from preprocess_pipeline import train_preprocess_pipeline

slim = tf.contrib.slim

def _configure_learning_rate(num_samples_per_epoch, num_clones, global_step):
  """Configures the learning rate.

  Args:
    num_samples_per_epoch: The number of samples in each epoch of training.
    global_step: The global_step tensor.

  Returns:
    A `Tensor` representing the learning rate.

  Raises:
    ValueError: if
  """
  if cfg.TRAIN.NUM_STEPS_PER_DECAY > 0:
    decay_steps = cfg.TRAIN.NUM_STEPS_PER_DECAY
    tf.logging.info('Using {} steps for decay. Ignoring any epoch setting for '
                    'decay.'.format(decay_steps))
  else:
    decay_steps = int(num_samples_per_epoch / (
      cfg.TRAIN.BATCH_SIZE * num_clones * cfg.TRAIN.ITER_SIZE) * cfg.TRAIN.NUM_EPOCHS_PER_DECAY)

  if cfg.TRAIN.LEARNING_RATE_DECAY_TYPE == 'exponential':
    return tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE,
                                      global_step,
                                      decay_steps,
                                      cfg.TRAIN.LEARNING_RATE_DECAY_RATE,
                                      staircase=True,
                                      name='exponential_decay_learning_rate')
  elif cfg.TRAIN.LEARNING_RATE_DECAY_TYPE == 'fixed':
    return tf.constant(cfg.TRAIN.LEARNING_RATE, name='fixed_learning_rate')
  elif cfg.TRAIN.LEARNING_RATE_DECAY_TYPE == 'polynomial':
    return tf.train.polynomial_decay(cfg.TRAIN.LEARNING_RATE,
                                     global_step,
                                     decay_steps,
                                     cfg.TRAIN.END_LEARNING_RATE,
                                     power=1.0,
                                     cycle=False,
                                     name='polynomial_decay_learning_rate')
  else:
    raise ValueError('learning_rate_decay_type [%s] was not recognized',
                     cfg.TRAIN.LEARNING_RATE_DECAY_RATE)


def _configure_optimizer(learning_rate):
  """Configures the optimizer used for training.

  Args:
    learning_rate: A scalar or `Tensor` learning rate.

  Returns:
    An instance of an optimizer.

  Raises:
    ValueError: if cfg.optimizer is not recognized.
  """
  if cfg.TRAIN.OPTIMIZER == 'adam':
    optimizer = tf.train.AdamOptimizer(
        learning_rate,
        beta1=cfg.TRAIN.ADAM_BETA1,
        beta2=cfg.TRAIN.ADAM_BETA2,
        epsilon=cfg.TRAIN.OPT_EPSILON)
  elif cfg.TRAIN.OPTIMIZER == 'momentum':
    optimizer = tf.train.MomentumOptimizer(
        learning_rate,
        momentum=cfg.TRAIN.MOMENTUM,
        name='Momentum')
  elif cfg.TRAIN.OPTIMIZER == 'rmsprop':
    optimizer = tf.train.RMSPropOptimizer(
        learning_rate,
        decay=cfg.TRAIN.RMSPROP_DECAY,
        momentum=cfg.TRAIN.MOMENTUM,
        epsilon=cfg.TRAIN.OPT_EPSILON)
  elif cfg.TRAIN.OPTIMIZER == 'sgd':
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
  else:
    raise ValueError('Optimizer [%s] was not recognized', cfg.TRAIN.OPTIMIZER)
  return optimizer


def _add_variables_summaries(learning_rate):
  summaries = []
  for variable in slim.get_model_variables():
    summaries.append(tf.histogram_summary(variable.op.name, variable))
  summaries.append(tf.summary.scalar(tensor=learning_rate,
                                     name='training/Learning Rate'))
  return summaries


def _get_init_fn(train_dir):
  """Returns a function run by the chief worker to warm-start the training.

  Note that the init_fn is only run when initializing the model during the very
  first global step.

  Returns:
    An init function run by the supervisor.
  """
  if cfg.TRAIN.CHECKPOINT_PATH is None:
    return None

  # Warn the user if a checkpoint exists in the train_dir. Then we'll be
  # ignoring the checkpoint anyway.
  if tf.train.latest_checkpoint(train_dir):
    tf.logging.info(
        'Ignoring --checkpoint_path because a checkpoint already exists in %s'
        % train_dir)
    return None

  exclusions = []
  if cfg.TRAIN.CHECKPOINT_EXCLUDE_SCOPES:
    exclusions = [scope.strip()
                  for scope in cfg.TRAIN.CHECKPOINT_EXCLUDE_SCOPES.split(',')]

  # variables_to_restore = slim.get_variables_to_restore(exclude=exclusions)
  # NOTE: The above was wrong!! It would restore all global_step, momentum etc
  # variables too, which we don't want when starting from a pretrained model
  # (like imagenet). The above is (and should be) used when restoring from a
  # half-trained model of the same script (which doesn't happen here anyway,
  # see above, there's a return None if a checkpoint exists)
  variables_to_restore = slim.filter_variables(
    slim.get_model_variables(),
    exclude_patterns=exclusions)

  if tf.gfile.IsDirectory(cfg.TRAIN.CHECKPOINT_PATH):
    checkpoint_path = tf.train.latest_checkpoint(cfg.TRAIN.CHECKPOINT_PATH)
  else:
    checkpoint_path = cfg.TRAIN.CHECKPOINT_PATH

  tf.logging.info('Fine-tuning from %s' % checkpoint_path)

  return model_restorer.restore_model(
      checkpoint_path,
      variables_to_restore,
      ignore_missing_vars=cfg.TRAIN.IGNORE_MISSING_VARS,
      var_name_mapper_type=cfg.TRAIN.VAR_NAME_MAPPER)


def _get_variables_to_train():
  """Returns a list of variables to train.

  Returns:
    A list of variables to train by the optimizer.
  """
  if cfg.TRAIN.TRAINABLE_SCOPES == '':
    return tf.trainable_variables()
  else:
    scopes = [scope.strip() for scope in cfg.TRAIN.TRAINABLE_SCOPES.split(',')]

  variables_to_train = []
  for scope in scopes:
    variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
    variables_to_train.extend(variables)
  return variables_to_train


def _gen_overlayed_img(hmap, img):
  with tf.name_scope('VisualizeOverlayedHeatmap'):
    hmap = tf.expand_dims(hmap, -1)
    hmap = tf.image.resize_images(
      hmap, img.get_shape().as_list()[-3:-1])
    img = tf.tile(
      tf.image.rgb_to_grayscale(img), [1, 1, 1, 3])
    hmap = tf.image.grayscale_to_rgb(hmap)
    hmap = tf.concat([
      tf.expand_dims(hmap[..., 0] * 255.0, -1),
      hmap[..., 1:] * 0.0], axis=-1)
    return (0.5 * img + 0.5 * hmap)


def _summarize_heatmaps(name, tensor, img_tensor):
  # return tf.summary.image(name, tf.reduce_sum(tensor, axis=-1, keep_dims=True))
  if tensor.get_shape()[-1] == 0:
    tf.logging.info('Pose heatmaps have 0 channels. Not summarizing')
    return set()
  return set([
    tf.summary.image(
      name + '/head', _gen_overlayed_img(tensor[..., 9], img_tensor)),
    tf.summary.image(
      name + '/lwrist', _gen_overlayed_img(tensor[..., 15], img_tensor)),
    tf.summary.image(
      name + '/rankle', _gen_overlayed_img(tensor[..., 0], img_tensor)),
    tf.summary.image(
      name + '/pelvis', _gen_overlayed_img(tensor[..., 6], img_tensor))])


end_points_debug = {}
def _train_step(sess, train_op, global_step, train_step_kwargs):
  """Function that takes a gradient step and specifies whether to stop.

  Args:
    sess: The current session.
    train_op: An `Operation` that evaluates the gradients and returns the
      total loss.
    global_step: A `Tensor` representing the global training step.
    train_step_kwargs: A dictionary of keyword arguments.

  Returns:
    The total loss and a boolean indicating whether or not to stop training.

  Raises:
    ValueError: if 'should_trace' is in `train_step_kwargs` but `logdir` is not.
  """
  start_time = time.time()

  trace_run_options = None
  run_metadata = None
  if 'should_trace' in train_step_kwargs:
    if 'logdir' not in train_step_kwargs:
      raise ValueError('logdir must be present in train_step_kwargs when '
                       'should_trace is present')
    if sess.run(train_step_kwargs['should_trace']):
      trace_run_options = config_pb2.RunOptions(
          trace_level=config_pb2.RunOptions.FULL_TRACE)
      run_metadata = config_pb2.RunMetadata()

  if cfg.TRAIN.ITER_SIZE == 1:
    # To Debug, uncomment here and observe the end_points_debug
    total_loss, np_global_step = sess.run([train_op, global_step],
                                          options=trace_run_options,
                                          run_metadata=run_metadata)
  else:
    for j in range(cfg.TRAIN.ITER_SIZE-1):
      sess.run([train_op[j]])
    total_loss, np_global_step = sess.run([
      train_op[cfg.TRAIN.ITER_SIZE-1], global_step],
      options=trace_run_options,
      run_metadata=run_metadata)
  time_elapsed = time.time() - start_time

  if run_metadata is not None:
    tl = timeline.Timeline(run_metadata.step_stats)
    trace = tl.generate_chrome_trace_format()
    trace_filename = os.path.join(train_step_kwargs['logdir'],
                                  'tf_trace-%d.json' % np_global_step)
    tf.logging.info('Writing trace to %s', trace_filename)
    file_io.write_string_to_file(trace_filename, trace)
    if 'summary_writer' in train_step_kwargs:
      train_step_kwargs['summary_writer'].add_run_metadata(run_metadata,
                                                           'run_metadata-%d' %
                                                           np_global_step)

  if 'should_log' in train_step_kwargs:
    if sess.run(train_step_kwargs['should_log']):
      tf.logging.info('%s: global step %d: loss = %.4f (%.2f sec/step)',
                   datetime.now(), np_global_step, total_loss, time_elapsed)

  if 'should_stop' in train_step_kwargs:
    should_stop = sess.run(train_step_kwargs['should_stop'])
  else:
    should_stop = False

  return total_loss, should_stop


def parse_args():
  """
  Parse input arguments
  """
  parser = argparse.ArgumentParser(description='Train a keypoint regressor.')
  parser.add_argument('--cfg', dest='cfg_file',
                      help='optional config file',
                      default=None, type=str)

  args = parser.parse_args()
  return args


def main():
  args = parse_args()
  if args.cfg_file is not None:
    cfg_from_file(args.cfg_file)

  tf.logging.info('Using Config:')
  pprint.pprint(cfg)

  train_dir = get_output_dir('default' if args.cfg_file is None
                             else args.cfg_file)
  os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPUS
  num_clones = len(cfg.GPUS.split(','))

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    ######################
    # Config model_deploy#
    ######################
    tf.set_random_seed(cfg.RNG_SEED)
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=num_clones,
        clone_on_cpu=False,
        replica_id=0,
        num_replicas=1,
        num_ps_tasks=0)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    kwargs = {}
    if cfg.TRAIN.VIDEO_FRAMES_PER_VIDEO > 1:
      kwargs['num_samples'] = cfg.TRAIN.VIDEO_FRAMES_PER_VIDEO
      kwargs['randomFromSegmentStyle'] = cfg.TRAIN.READ_SEGMENT_STYLE
      kwargs['modality'] = cfg.INPUT.VIDEO.MODALITY
      kwargs['split_id'] = cfg.INPUT.SPLIT_ID
    if cfg.DATASET_LIST_DIR != '':
      kwargs['dataset_list_dir'] = cfg.DATASET_LIST_DIR
    if cfg.INPUT_FILE_STYLE_LABEL != '':
      kwargs['input_file_style_label'] = cfg.INPUT_FILE_STYLE_LABEL
    dataset, num_pose_keypoints = dataset_factory.get_dataset(
      cfg.DATASET_NAME, cfg.TRAIN.DATASET_SPLIT_NAME, cfg.DATASET_DIR,
      **kwargs)

    ####################
    # Select the network #
    ####################
    network_fn = nets_factory.get_network_fn(
        cfg.MODEL_NAME,
        num_classes=(dataset.num_classes),
        num_pose_keypoints=num_pose_keypoints,
        weight_decay=cfg.TRAIN.WEIGHT_DECAY,
        is_training=True,
        cfg=cfg)  # advanced network creation controlled with cfg.NET

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = cfg.MODEL_NAME
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=True)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = slim.dataset_data_provider.DatasetDataProvider(
          dataset,
          num_readers=cfg.NUM_READERS,
          common_queue_capacity=20 * cfg.TRAIN.BATCH_SIZE,
          common_queue_min=10 * cfg.TRAIN.BATCH_SIZE)

      [image, pose_label_hmap,
       pose_label_valid, action_label] = train_preprocess_pipeline(
         provider, cfg, network_fn, num_pose_keypoints,
         image_preprocessing_fn)
      # input_data = [preprocess_pipeline(
      #   provider, cfg, network_fn, num_pose_keypoints, image_preprocessing_fn)
      #   for _ in range(cfg.NUM_PREPROCESSING_THREADS)]

      images, pose_labels_hmap, pose_labels_valid, action_labels = tf.train.batch(
          [image, pose_label_hmap, pose_label_valid, action_label],
          # input_data,
          batch_size=cfg.TRAIN.BATCH_SIZE,
          num_threads=cfg.NUM_PREPROCESSING_THREADS,
          capacity=5 * cfg.TRAIN.BATCH_SIZE)
      batch_queue = slim.prefetch_queue.prefetch_queue(
        [images, pose_labels_hmap, pose_labels_valid, action_labels],
        capacity=5 * deploy_config.num_clones * cfg.TRAIN.ITER_SIZE)

    ####################
    # Define the model #
    ####################
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, labels_pose, labels_pose_valid, labels_action = batch_queue.dequeue()
      # due to the multi-frame/video thing, need to squeeze first 2 dimensions
      labels_pose = tf.concat(tf.unstack(labels_pose), axis=0)
      labels_pose_valid = tf.concat(tf.unstack(labels_pose_valid), axis=0)
      logits, end_points = network_fn(images)
      pose_logits = end_points['PoseLogits']

      #############################
      # Specify the loss function #
      #############################
      # if 'AuxLogits' in end_points:
      #   slim.losses.softmax_cross_entropy(
      #       end_points['AuxLogits'], labels,
      #       label_smoothing=cfg.TRAIN.LABEL_SMOOTHING, weight=0.4, scope='aux_loss')
      # slim.losses.softmax_cross_entropy(
      #     logits, labels, label_smoothing=cfg.TRAIN.LABEL_SMOOTHING, weight=1.0)
      end_points['Images'] = images
      end_points['PoseLabels'] = labels_pose
      end_points['ActionLabels'] = labels_action
      end_points['ActionLogits'] = logits
      tf.logging.info('PoseLogits shape is {}.'.format(pose_logits.get_shape().as_list()))

      gen_losses(labels_action, logits, cfg.TRAIN.LOSS_FN_ACTION,
                 dataset.num_classes, cfg.TRAIN.LOSS_FN_ACTION_WT,
                 labels_pose, pose_logits, cfg.TRAIN.LOSS_FN_POSE,
                 labels_pose_valid, cfg.TRAIN.LOSS_FN_POSE_WT, end_points, cfg)

      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    end_points = clones[0].outputs

    # store the end points in a global variable for debugging in train_step
    global end_points_debug
    end_points_debug = end_points

    for end_point in end_points:
      x = end_points[end_point]
      summaries.add(tf.summary.histogram('activations/' + end_point, x))
      # summaries.add(tf.summary.scalar(tf.nn.zero_fraction(x),
      #                                 name='sparsity/' + end_point))
    sum_img = tf.concat(tf.unstack(end_points['Images']), axis=0)
    if sum_img.get_shape().as_list()[-1] not in [1, 3, 4]:
      sum_img = tf.reduce_sum(sum_img, axis=-1, keep_dims=True)
      sum_img = sum_img - tf.reduce_min(sum_img)
      sum_img = sum_img / (tf.reduce_max(sum_img) + cfg.EPS)
    summaries.add(tf.summary.image('images', sum_img))
    for epname in cfg.TRAIN.OTHER_IMG_SUMMARIES_TO_ADD:
      if epname in end_points:
        summaries.add(tf.summary.image('image_vis/' + epname, end_points[epname]))
    summaries = summaries.union(_summarize_heatmaps(
      'labels', end_points['PoseLabels'], sum_img))
    summaries = summaries.union(_summarize_heatmaps(
      'pose', end_points['PoseLogits'], sum_img))
    if 'PoseLossMask' in end_points:
      summaries = summaries.union(_summarize_heatmaps(
        'loss_mask/pose', end_points['PoseLossMask'], sum_img))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar(tensor=loss, name='losses/%s' % loss.op.name))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    #################################
    # Configure the moving averages #
    #################################
    if cfg.TRAIN.MOVING_AVERAGE_VARIABLES:
      moving_average_variables = slim.get_model_variables()
      variable_averages = tf.train.ExponentialMovingAverage(
          cfg.TRAIN.MOVING_AVERAGE_VARIABLES, global_step)
    else:
      moving_average_variables, variable_averages = None, None

    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
      learning_rate = _configure_learning_rate(dataset.num_samples, num_clones, global_step)
      optimizer = _configure_optimizer(learning_rate)
      summaries.add(tf.summary.scalar(tensor=learning_rate,
                                      name='learning_rate'))

    # if cfg.sync_replicas:
    #   # If sync_replicas is enabled, the averaging will be done in the chief
    #   # queue runner.
    #   optimizer = tf.train.SyncReplicasOptimizer(
    #       opt=optimizer,
    #       replicas_to_aggregate=,
    #       variable_averages=variable_averages,
    #       variables_to_average=moving_average_variables,
    #       replica_id=tf.constant(cfg.task, tf.int32, shape=()),
    #       total_num_replicas=cfg.worker_replicas)
    # elif cfg.moving_average_decay:
    #   # Update ops executed locally by trainer.
    #   update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()
    tf.logging.info('Training the following variables: {}'.format(
                    ', '.join([var.op.name for var in variables_to_train])))

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones,
        optimizer,
        var_list=variables_to_train,
        clip_gradients=cfg.TRAIN.CLIP_GRADIENTS)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar(tensor=total_loss,
                                    name='total_loss'))

    # Create gradient updates.
    train_ops = {}
    if cfg.TRAIN.ITER_SIZE == 1:
      grad_updates = optimizer.apply_gradients(clones_gradients,
                                               global_step=global_step)
      update_ops.append(grad_updates)

      update_op = tf.group(*update_ops)
      train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,
                                                        name='train_op')
      train_ops = train_tensor
    else:
      with tf.name_scope('AccumulateGradients'):
        # copied as is from my previous code
        gvs = [(grad, var) for grad, var in clones_gradients]
        varnames = [var.name for grad, var in gvs]
        varname_to_var = {var.name: var for grad, var in gvs}
        varname_to_grad = {var.name: grad for grad, var in gvs}
        varname_to_ref_grad = {}
        for vn in varnames:
          grad = varname_to_grad[vn]
          print("accumulating ... ", (vn, grad.get_shape()))
          with tf.variable_scope("ref_grad"):
            with tf.device(deploy_config.variables_device()):
              ref_var = slim.local_variable(
                  np.zeros(grad.get_shape(),dtype=np.float32),
                  name=vn[:-2])
              varname_to_ref_grad[vn] = ref_var

        all_assign_ref_op = [ref.assign(varname_to_grad[vn]) for vn, ref in varname_to_ref_grad.items()]
        all_assign_add_ref_op = [ref.assign_add(varname_to_grad[vn]) for vn, ref in varname_to_ref_grad.items()]
        assign_gradients_ref_op = tf.group(*all_assign_ref_op)
        accmulate_gradients_op = tf.group(*all_assign_add_ref_op)
        with tf.control_dependencies([accmulate_gradients_op]):
          final_gvs = [(varname_to_ref_grad[var.name] / float(cfg.TRAIN.ITER_SIZE), var) for grad, var in gvs]
          apply_gradients_op = optimizer.apply_gradients(final_gvs, global_step=global_step)
          update_ops.append(apply_gradients_op)
          update_op = tf.group(*update_ops)
          train_tensor = control_flow_ops.with_dependencies([update_op],
              total_loss, name='train_op')
        for i in range(cfg.TRAIN.ITER_SIZE):
          if i == 0:
            train_ops[i] = assign_gradients_ref_op
          elif i < cfg.TRAIN.ITER_SIZE - 1:  # because apply_gradients also computes
                                             # (see control_dependency), so
                                             # no need of running an extra iteration
            train_ops[i] = accmulate_gradients_op
          else:
            train_ops[i] = train_tensor

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries), name='summary_op')

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    config.intra_op_parallelism_threads = 4  # to avoid too many threads
    # The following seems optimal... though not sure
    config.inter_op_parallelism_threads = max(
      cfg.NUM_PREPROCESSING_THREADS, 12)
    ###########################
    # Kicks off the training. #
    ###########################
    slim.learning.train(
        train_ops,
        train_step_fn=_train_step,
        logdir=train_dir,
        master='',
        is_chief=True,
        init_fn=_get_init_fn(train_dir),
        summary_op=summary_op,
        number_of_steps=cfg.TRAIN.MAX_NUMBER_OF_STEPS,
        log_every_n_steps=cfg.TRAIN.LOG_EVERY_N_STEPS,
        save_summaries_secs=cfg.TRAIN.SAVE_SUMMARIES_SECS,
        save_interval_secs=cfg.TRAIN.SAVE_INTERVAL_SECS,
        sync_optimizer=None,
        session_config=config)


if __name__ == '__main__':
  main()


================================================
FILE: utils/convert_mpii_result_for_eval.m
================================================
function convert(h5_file)

outfpath = [h5_file '.mat'];

TOTAL_ACT_IDS = 983;  % = max([RELEASE.act.act_id])
DATA_DIR='../src/data/mpii/mpii_tfrecords';

sample_ids = dlmread(fullfile(DATA_DIR, 'test_ids.txt'));
% scores = zeros(numel(sample_ids), numel(class_ids));
class_ids = {};

cid = 0;
fid = fopen(fullfile(DATA_DIR, 'classes.txt'), 'r');
while ~feof(fid)
  line = fgetl(fid);
  cid = cid + 1;
  parts = strsplit(line, ';');
  class_ids{cid} = parts{1};
  % nums = cellfun(@str2num, strsplit(parts{2}, ','));
  % cls_to_ids{cid} = nums;
end

scores = h5read(h5_file, '/logits')';
% for cid = 1 : numel(cls_to_ids)
%   targets = cls_to_ids{cid};
%   for i = 1 : numel(targets)
%     scores(:, targets(i)) = logits(:, cid);
%   end
% end

save(outfpath, 'sample_ids', 'class_ids', 'scores');


================================================
FILE: utils/convert_mpii_result_for_eval.sh
================================================
if [ $# -lt 1 ]; then
  echo "Usage $0 <H5 path>"
fi

nice -n 19 matlab -nodisplay -r "cd ../utils/; convert_mpii_result_for_eval('$1'); exit;"


================================================
FILE: utils/dataset_utils/gen_tfrecord_mpii.py
================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
import os
import random
import sys
import scipy.io
import operator
import numpy as np

import tensorflow as tf

os.environ['CUDA_VISIBLE_DEVICES'] = ""

# Set the following paths
_MPII_MAT_FILE = '/path/to/mpii_human_pose_v1_u12_1.mat'
_IMG_DIR = '/path/to/MPII/images/'


dataset_dir = '../../src/data/mpii/mpii_tfrecords/'
_SPLITS_PATH = '../../src/data/mpii/lists/images_mpii_{0}.txt'

# Seed for repeatability.
_RANDOM_SEED = 42

# The number of shards per dataset split.
_NUM_SHARDS = 20

_NUM_JOINTS = 16  # for pose

class ImageReader(object):
  """Helper class that provides TensorFlow image coding utilities."""

  def __init__(self):
    # Initializes function that decodes RGB JPEG data.
    self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
    self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)

  def read_image_dims(self, sess, image_data):
    image = self.decode_jpeg(sess, image_data)
    return image.shape[0], image.shape[1]

  def decode_jpeg(self, sess, image_data):
    image = sess.run(self._decode_jpeg,
                     feed_dict={self._decode_jpeg_data: image_data})
    assert len(image.shape) == 3
    assert image.shape[2] == 3
    return image


def int64_feature(values):
  """Returns a TF-Feature of int64s.

  Args:
    values: A scalar or list of values.

  Returns:
    a TF-Feature.
  """
  if not isinstance(values, (tuple, list)):
    values = [values]
  return tf.train.Feature(int64_list=tf.train.Int64List(value=values))


def float_feature(values):
  return tf.train.Feature(float_list=tf.train.FloatList(value=values))


def bytes_feature(values):
  """Returns a TF-Feature of bytes.

  Args:
    values: A string.

  Returns:
    a TF-Feature.
  """
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))


def image_to_tfexample(image_data, image_format, height, width,
                       pose, # [x,y,is_vis,...]
                       action_label):
  assert(len(pose) % (_NUM_JOINTS * 3) == 0)
  return tf.train.Example(features=tf.train.Features(feature={
      'image/encoded': bytes_feature(image_data),
      'image/format': bytes_feature(image_format),
      'image/class/pose': int64_feature([int(el) for el in pose]),
      'image/class/action_label': int64_feature(action_label),
      'image/height': int64_feature(height),
      'image/width': int64_feature(width),
  }))


def _get_dataset_filename(dataset_dir, split_name, shard_id):
  output_filename = 'mpii_%s_%05d-of-%05d.tfrecord' % (
      split_name, shard_id, _NUM_SHARDS)
  return os.path.join(dataset_dir, output_filename)


def _convert_dataset(split_name, list_to_write, dataset_dir):
  num_per_shard = int(math.ceil(len(list_to_write) / float(_NUM_SHARDS)))

  with tf.Graph().as_default():
    image_reader = ImageReader()

    with tf.Session('') as sess:

      for shard_id in range(_NUM_SHARDS):
        output_filename = _get_dataset_filename(
            dataset_dir, split_name, shard_id)

        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
          start_ndx = shard_id * num_per_shard
          end_ndx = min((shard_id+1) * num_per_shard, len(list_to_write))
          for i in range(start_ndx, end_ndx):
            sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
                i+1, len(list_to_write), shard_id))
            sys.stdout.flush()

            # Read the filename:
            fname = os.path.join(_IMG_DIR, list_to_write[i][0])
            action_label = list_to_write[i][1]
            poses = list_to_write[i][2]
            all_joints = []
            for pose in poses:
              joints = dict((el[0], [el[1], el[2], el[3]]) for el in pose)
              final_pose = []
              for i in range(_NUM_JOINTS):
                if i in joints:
                  final_pose.append(joints[i])
                else:
                  final_pose.append([-1, -1, 0])
              final_pose = [item for sublist in final_pose for item in sublist]
              all_joints += final_pose
            assert(len(all_joints) % 16 == 0)
            image_data = tf.gfile.FastGFile(fname, 'r').read()
            height, width = image_reader.read_image_dims(sess, image_data)

            example = image_to_tfexample(
                image_data, 'jpg', height, width, all_joints, action_label)
            tfrecord_writer.write(example.SerializeToString())

  sys.stdout.write('\n')
  sys.stdout.flush()


def _get_action_class(cname, D, act_id):
  try:
    if cname not in D:
      D[cname] = (len(D.keys()), set([act_id]))  # act_id is the actual MPII action id
    else:
      D[cname][1].add(act_id)
      # It's pretty crazy that same action will have multiple action IDs
    return D[cname][0]
  except Exception, e:
    print('Invalid class name {}. setting -1. {}'.format(cname, e))
    return -1


def main():
  T = scipy.io.loadmat(_MPII_MAT_FILE, squeeze_me=True,
                       struct_as_record=False)
  annots = T['RELEASE'].annolist
  is_train = T['RELEASE'].img_train
  action_label = T['RELEASE'].act
  splits = ['train', 'val', 'test']
  lists_to_write = {}
  img_id_in_split = {}
  all_imnames = []
  for spl in splits:
    lists_to_write[spl] = []
    img_id_in_split[spl] = []
  splits_filenames = {}
  filename_to_split = {}
  actclassname_to_id = {}
  for spl in splits:
    with open(_SPLITS_PATH.format(spl), 'r') as fin:
      splits_filenames[spl] = fin.read().splitlines()
      filename_to_split.update(dict(zip(
        splits_filenames[spl], [spl] * len(splits_filenames[spl]))))
  for aid,annot in enumerate(annots):
    imname = annot.image.name
    all_imnames.append(imname)
    try:
      this_split = filename_to_split[imname[:-4]]
    except:
      continue  # ignore this image
    points_fmted = []  # put all points one after the other
    if 'annorect' in dir(annot):
      all_rects = annot.annorect
      if isinstance(all_rects, scipy.io.matlab.mio5_params.mat_struct):
        all_rects = np.array([all_rects])
      for rect in all_rects:
        points_rect = []
        try:
          points = rect.annopoints.point
        except:
          continue
        if isinstance(points, scipy.io.matlab.mio5_params.mat_struct):
          points = np.array([points])
        for point in points:
          try:
            is_visible = point.is_visible if point.is_visible in [1,0] else 0
          except:
            is_visible = 0
          points_rect.append((point.id, point.x, point.y, is_visible))
        points_fmted.append(points_rect)
    [el.sort() for el in points_fmted]

    # the following assert is not true, so putting -1 when writing it out
    # assert(all([len(el) == 16 for el in points_fmted]))
    image_obj = (annot.image.name,
                 _get_action_class(action_label[aid].act_name,
                                   actclassname_to_id,
                                   action_label[aid].act_id),
                 points_fmted)
    if os.path.exists(os.path.join(_IMG_DIR, imname)):
      lists_to_write[this_split].append(image_obj)
      img_id_in_split[this_split].append(aid+1)  # 1-indexed
  cls_ids = sorted(actclassname_to_id.items(), key=operator.itemgetter(1))
  print('Total classes found: {}'.format(len(cls_ids)))
  #write out the dictionary of classnames
  with open(os.path.join(dataset_dir, 'classes.txt'), 'w') as fout:
    fout.write('\n'.join([el[0] + ';' + ','.join([
      str(e) for e in list(el[1][1])]) for el in cls_ids]))

  if not tf.gfile.Exists(dataset_dir):
    tf.gfile.MakeDirs(dataset_dir)

  # Only randomize the train set
  random.seed(_RANDOM_SEED)
  train_ids = range(len(lists_to_write['train']))
  random.shuffle(train_ids)
  lists_to_write['train'] = [lists_to_write['train'][i] for i in train_ids]
  img_id_in_split['train'] = [img_id_in_split['train'][i] for i in train_ids]

  with open(os.path.join(dataset_dir, 'imnames.txt'), 'w') as fout:
    fout.write('\n'.join(all_imnames))
  for spl in splits:
    with open(os.path.join(
      dataset_dir, '{}_ids.txt'.format(spl)), 'w') as fout:
      fout.write('\n'.join([str(el) for el in img_id_in_split[spl]]))
    spl_name = spl
    if spl in ['train', 'val']:
      spl_name = 'trainval_' + spl  # would be useful when training on tr+val
    print('Writing {} images for split {}.'.format(
      len(lists_to_write[spl]), spl))
    _convert_dataset(spl_name, lists_to_write[spl],
                     dataset_dir)

  print('\nFinished converting the MPII dataset!')

if __name__ == '__main__':
  main()