[
  {
    "path": ".gitignore",
    "content": ".idea\ntest.py\ndata\ncheckpoint\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nenv/\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# dotenv\n.env\n\n# virtualenv\n.venv\nvenv/\nENV/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n"
  },
  {
    "path": ".gitmodules",
    "content": "[submodule \"models/shiftnet_cuda_v2\"]\n\tpath = models/shiftnet_cuda_v2\n\turl = git@github.com:peterhj/shiftnet_cuda_v2.git\n\tbranch = master\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "# ShiftResNet\n\nTrain ResNet with shift operations on CIFAR10, CIFAR100 using PyTorch. This uses the [original resnet CIFAR10 codebase](https://github.com/kuangliu/pytorch-cifar.git) written by Kuang Liu. In this codebase, we replace 3x3 convolutional layers with a conv-shift-conv--a 1x1 convolutional layer, a set of shift operations, and a second 1x1 convolutional layer. The repository includes the following:\n\n- training utility to reproduce results\n- efficient implementation of the shift layer from [Peter Jin](https://people.eecs.berkeley.edu/~phj/)\n- ResNet and ShiftResNet derivatives on CIFAR10/CIFAR100\n- count utility for parameters and FLOPs\n- evaluation script for offline evaluation\n- links to 60+ pretrained models: [#12](https://github.com/alvinwan/shiftresnet-cifar/issues/12) for CIFAR-10 and CIFAR-100\n\nUnless otherwise specified, the code was written by and experiments were run by [Alvin Wan](http://alvinwan.com) with help from [Bichen Wu](https://github.com/BichenWuUCB).\n\n## [_Shift:_ A Zero FLOP, Zero Parameter Alternative to Spatial Convolutions](https://arxiv.org/pdf/1711.08141.pdf)\nBy Bichen Wu, Alvin Wan, Xiangyu Yue, Peter Jin, Sicheng Zhao, Noah Golmant, Amir Gholaminejad, Joseph Gonzalez, Kurt Keutzer\n\nTradeoffs and further analysis can be found in the paper. If you find this work useful for your research, please consider citing:\n\n    @inproceedings{shift,\n        Author = {Bichen Wu and Alvin Wan and Xiangyu Yue and Peter Jin and Sicheng Zhao and Noah Golmant and Amir Gholaminejad and Joseph Gonzalez and Kurt Keutzer},\n        Title = {Shift: A Zero FLOP, Zero Parameter Alternative to Spatial Convolutions},\n        Journal = {arXiv:1711.08141},\n        Year = {2017}\n    }\n    \n\n## Getting Started\n\n1. If you have not already, setup a virtual environment with Python2.7, and activate it.\n\n```\nvirtualenv shift --python=python2.7\nsource shift/bin/activate\n```\n\nYour prompt should now be prefaced with `(shift)`, as in\n\n```\n(shift) [user@server:~]$ \n```\n\n2. Install `pytorch` and `torchvision`. Access [pytorch.org](http://pytorch.org), scroll down to the \"Getting Started\" section, and select the appropriate OS, package manager, Python, and CUDA build. For example, selecting Linux, pip, Python2.7, and CUDA 8 gives the following, as of the time of this writing\n\n```\npip install pytorch torchvision # upgrade to latest PyTorch 0.4.1 official stable version\n```\n\n3. Clone the repository\n\n```\ngit clone --recursive git@github.com:alvinwan/shiftresnet-cifar.git\n```\n\n4. `cd` into the cuda layer repository.\n```\ncd shiftresnet-cifar/models/shiftnet_cuda_v2\n```\n\n5. Follow the [ShiftNet Cuda layer instructions](https://github.com/peterhj/shiftnet_cuda_v2), steps 5 and 6:\n\n```\npip install -r requirements.txt\nmake\n```\n\n6. In dir `shiftresnet-cifar/models/shiftnet_cuda_v2`, create an additional `__init__.py` so that Python2 can use `shiftnet_cuda_v2` as a module.\n\n```\ntouch __init__.py\n```\n\n7. Then, `cd` back into the root of this repository. Create the `checkpoint` directory and download a checkpoint.\n\n```\ncd ../..\nmkdir checkpoint\n```\n\nIn this example below, we download the original `ResNet20`, 3x smaller `ShiftResNet20-3`, and 3x smaller `ResNet20`. Download [all CIFAR-100 models](https://github.com/alvinwan/shiftresnet-cifar/issues/12). Save these in a `checkpoint` directory, so that your file structure resembles the following:\n\n```\nshiftresnet-cifar/\n   |\n   |-- eval.py\n   |-- checkpoint/\n       |-- resnet20_cifar100.t7\n       |-- ...\n```\n\n8. Run the following. This will get you started, downloading the dataset locally to `./data` accordingly. We begin by just evaluating the original ResNet model on CIFAR100.\n\n```\npython eval.py --model=checkpoint/resnet20_cifar100.t7 --dataset=cifar100\n```\n\nThis default ResNet model should give 66.25%. By default, the script loads and trains on CIFAR10. Use the `--dataset` flag, as above, for CIFAR100.\n\n### ShiftNet Expansion\n\nTo control the expansion hyperparameter for ShiftNet, identify a ShiftNet architecture and apply expansion. For example, the following uses ResNet20 with Shift modules of expansion `3c`. We should start by counting parameters and FLOPS (for CIFAR10/CIFAR100):\n\n```\npython count.py --arch=shiftresnet20 --expansion=3\n```\n\nThis should output the following parameter and FLOP count:\n\n```\nParameters: (new) 95642 (original) 272474 (reduction) 2.85\nFLOPs: (new) 16581248 (original) 40960640 (reduction) 2.47\n```\n\nWe can then evaluate the associated ShiftResNet, which we downloaded in the first part of this README. Note the arguments to `main.py` and `count.py` are very similar.\n\n```\npython eval.py --model=checkpoint/shiftresnet20_3.0_cifar100.t7 --dataset=cifar100\n```\n\nThe ShiftResNet model above yields 70.77% on CIFAR-100.\n\n### ResNet Reduction\n\nTo reduce ResNet by some factor, in terms of its parameters, specify a reduction either block-wise or net-wise. The former reduces the internal channel representation for each BasicBlock. The latter reduces the input and output channels for all convolution layers by half. First, we can check the reduction in parameter count for the entire network. For example, we specify a block-wise reduction of 3x below:\n\n```\npython count.py --arch=resnet20 --reduction=2.8 --reduction-mode=block\n```\n\nThis should output the following parameter and FLOP count:\n\n```\n==> resnet20 with reduction 2.80\nParameters: (new) 96206 (original) 272474 (reduction) 2.83\nFLOPs: (new) 14197376 (original) 40960640 (reduction) 2.89\n```\n\nWe again evaluate the associated neural network, which we downloaded in the first part of this README.\n\n```\npython eval.py --model=checkpoint/resnet20_2.8_block_cifar100.t7 --dataset=cifar100\n```\n\nThis reduced ResNet gives 68.30% accuracy on CIFAR-100, 2.47% less than ShiftResNet despite having several hundred more parameters.\n\n## Experiments\n\nBelow, we run experiments on the following:\n\n1. Varying expansion used for all conv-shift-conv layers in the neural network. Here, we replace 3x3 filters.\n2. Varying number of output channels for a 3x3 convolution filter, matching the reduction in parameters that shift provides. This is `--reduction-mode=block`, which is *not* the default reduction mode.\n\n`a` is the number of filters in the first set of 1x1 convolutional filters. `c` is the number of channels in our input.\n\n### CIFAR-100 Accuracy\n\nAccuracies below are all Top 1. All CIFAR-100 pretrained models can be found [here](https://github.com/alvinwan/shiftresnet-cifar/issues/12) (It's worth noticing that this pre-trained model is encoded in the python2 way which may cause problems when the model is loaded in a python3 program.). Below, we compare reductions in parameters for the entire net (`--reduction_mode=net`) and block-wise (`--reduction_mode=block`)\n\n| Model | `e` | SRN Acc* | RN Conv Acc | RN Depth Acc | Params | Reduction (conv) | `r`** | `r`*** |\n|-------|-----|----------|-------------|--------------|--------|------------------|-------|--------|\n| ResNet20  | 1c | 55.05% | 50.23% | **61.32%** | 0.03 | 7.8 (7.2) | 1.12 | 0.38 |\n| ResNet20  | 3c | **65.83%** | 60.72% | 64.51% | 0.10 | 2.9 (2.8) | 0.38 | 0.13 | \n| ResNet20  | 6c | **69.73%** | 65.59% | 65.38% | 0.19 | 1.5 | 0.19 | 0.065 |\n| ResNet20  | 9c | **70.77%** | 68.30% | 65.59% | 0.28 | .98 | 0.125 | 0.04 |\n| ResNet20  | -- | -- | 66.25% | -- | 0.27 | 1.0 | -- | -- |\n| ResNet56  | 1c | 63.20% | 58.70% | **65.30%** | 0.10 | 8.4 (7.6) | 1.12 | 0.38 |\n| ResNet56  | 3c | **69.77%** | 66.89% | 66.49% | 0.29 | 2.9 | 0.37 | 0.128 |\n| ResNet56  | 6c | **72.33%** | 70.49% | 67.46% | 0.58 | 1.5 | 0.19 | 0.065 |\n| ResNet56  | 9c | **73.43%** | 71.57% | 67.75% | 0.87 | 0.98 | 0.124 | 0.04 |\n| ResNet56  | -- | -- | 69.27% | -- | 0.86 | 1.0 | -- | -- |\n| ResNet110 | 1c | **68.01%** | 65.79% | 65.80% | 0.20 | 8.5 (7.8) | 1.1 | 0.37 |\n| ResNet110 | 3c | **72.10%** | 70.22% | 67.22% | 0.59 | 2.9 | 0.37 | 0.125 |\n| ResNet110 | 6c | **73.17%** | 72.21% | 68.11% | 1.18 | 1.5 | 0.19 | 0.065 |\n| ResNet110 | 9c | **73.71%** | 72.67% | 68.39% | 1.76 | 0.98 | 0.123 | 0.04 |\n| ResNet110 | -- | -- | 72.11% | -- | 1.73 | 1.0 | -- | -- |\n\n`*` `SRN` ShiftResNet and `RN` ResNet accuracy using convolutional layers (by reducing the number of channels in the intermediate representation of each ResNet block) and using depth-wise convolutional layers (again reducing number of channels in intermediate representation)\n\n`**` This parameter `r` is used for the `--reduction` flag when replicating results for depth-wise convolutional blocks AND for mobilenet blocks.\n\n`***` This parameter `r` is used for the `--reduction` flag with shuffle blocks.\n\n### CIFAR-10 Accuracy\n\nAll CIFAR-10 pretrained models can be found on [here](https://github.com/alvinwan/shiftresnet-cifar/issues/12) (Same as above, the encoding is in python2 way which is different from python3's encoding).\n\n| Model | `e` | ShiftResNet Acc | ResNet Acc | Params* | Reduction** |\n|-------|-----|-----|-----------|---------|-------------|\n| ResNet20 | c | 85.78% | 84.77% | 0.03 | 7.8 (7.2) |\n| ResNet20 | 3c | 89.56% | 88.81% | 0.10 | 2.9 (2.8) |\n| ResNet20 | 6c | 91.07% | 91.30% | 0.19 | 1.5  |\n| ResNet20 | 9c | 91.79 | 91.96% | 0.28 | .98 |\n| ResNet20 | original | - | 91.35% | 0.27 | 1.0 |\n| ResNet56 | c | 89.69% | 88.32% | 0.10 | 8.4 (7.6) |\n| ResNet56 | 3c | 92.48% | 91.20% | 0.29 | 2.9 |\n| ResNet56 | 6c | 93.49% | 93.01% | 0.58 | 1.5 |\n| ResNet56 | 9c | 93.17% | 93.74% | 0.87 | 0.98 |\n| ResNet56 | original | - | 92.01% | 0.86 | 1.0 |\n| ResNet110 | c | 90.67% | 89.79% | 0.20 | 8.5 (7.8) |\n| ResNet110 | 3c | 92.42% | 93.18% | 0.59 | 2.9 |\n| ResNet110 | 6c | 93.03% | 93.40% | 1.18 | 1.5 |\n| ResNet110 | 9c | 93.36% | 94.09% | 1.76 | 0.98 (0.95) |\n| ResNet110 | original | - | 92.46% | 1.73 | 1.0 |\n\n`*` parameters are in the millions\n\n`**` The number in parantheses is the reduction in parameters we used for ResNet, if we could not obtain the exact reduction in parameters used for shift.\n\n`***` If using `--reduction_mode=block`, pass the `reduction` to `main.py` for the `--reduction` flag, to reproduce the provided accuracies. This represents the amount to reduce each resnet block's number of \"internal convolutional channels\" by. In constrast, the column to the left of it is the total neural network's reduction in parameters.\n"
  },
  {
    "path": "count.py",
    "content": "from models import ResNet20\nfrom models import ShiftResNet20\nfrom models import ResNet56\nfrom models import ShiftResNet56\nfrom models import ResNet110\nfrom models import ShiftResNet110\nimport torch\nfrom torch.autograd import Variable\nimport numpy as np\nimport argparse\n\nall_models = {\n    'resnet20': ResNet20,\n    'shiftresnet20': ShiftResNet20,\n    'resnet56': ResNet56,\n    'shiftresnet56': ShiftResNet56,\n    'resnet110': ResNet110,\n    'shiftresnet110': ShiftResNet110,\n}\n\nparser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')\nparser.add_argument('--arch', choices=all_models.keys(),\n                    help='Architecture to count parameters for', default='shiftresnet110')\nparser.add_argument('--expansion', type=int, default=1, help='expansion for shift layers')\nparser.add_argument('--reduction', type=float, default=1, help='reduction for resnet')\nparser.add_argument('--reduction-mode', choices=('block', 'net', 'depthwise', 'shuffle', 'mobile'), help='\"block\" reduces inner representation for BasicBlock, \"net\" reduces for all layers', default='net')\nargs = parser.parse_args()\n\ndef count_params(net):\n     return sum([np.prod(param.size()) for name, param in net.named_parameters()])\n\ndef count_flops(net):\n     \"\"\"Approximately count number of FLOPs\"\"\"\n     dummy = Variable(torch.randn(1, 3, 32, 32)).cuda()  # size is specific to cifar10, cifar100!\n     net.cuda().forward(dummy)\n     return net.flops()\n\noriginal = all_models[args.arch.replace('shift', '')]()\noriginal_count = count_params(original)\noriginal_flops = count_flops(original)\n\ncls = all_models[args.arch]\n\nassert 'shift' not in args.arch or args.reduction == 1, \\\n    'Only default resnet supports reductions'\nif args.reduction != 1:\n    print('==> %s with reduction %.2f' % (args.arch, args.reduction))\n    net = cls(reduction=args.reduction, reduction_mode=args.reduction_mode)\nelse:\n    net = cls() if 'shift' not in args.arch else cls(expansion=args.expansion)\nnew_count = count_params(net)\nnew_flops = count_flops(net)\n\nprint('Parameters: (new) %d (original) %d (reduction) %.2f' % (\n      new_count, original_count, float(original_count) / new_count))\nprint('FLOPs: (new) %d (original) %d (reduction) %.2f' % (\n      new_flops, original_flops, float(original_flops) / new_flops))\n"
  },
  {
    "path": "eval.py",
    "content": "'''Test CIFAR10 with PyTorch.'''\nfrom __future__ import print_function\n\nimport glob\n\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nimport torch.nn.functional as F\nimport torch.backends.cudnn as cudnn\n\nimport torchvision\nimport torchvision.transforms as transforms\n\nimport os\nimport argparse\n\nfrom models import *\nfrom utils import progress_bar\nfrom torch.autograd import Variable\n\n\nparser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')\nparser.add_argument('--model', action='append', help='Specify model to test')\nparser.add_argument('--suppress-errors', action='store_true')\nparser.add_argument('--dataset', choices=('cifar10', 'cifar100'), help='Dataset to train and validate on.', default='cifar10')\nargs = parser.parse_args()\n\nuse_cuda = torch.cuda.is_available()\nbest_acc = 0  # best test accuracy\nstart_epoch = 0  # start from epoch 0 or last checkpoint epoch\n\ntransform_test = transforms.Compose([\n    transforms.ToTensor(),\n    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n])\n\nif args.dataset == 'cifar10':\n    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)\nelif args.dataset == 'cifar100':\n    testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)\n\ntestloader = torch.utils.data.DataLoader(testset, batch_size=512, shuffle=False, num_workers=4)\n\ncriterion = nn.CrossEntropyLoss()\n\n# Load checkpoint.\nprint('==> Resuming from checkpoint..')\nassert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'\n\ndef test(epoch):\n    global best_acc\n    net.eval()\n    test_loss = 0\n    correct = 0\n    total = 0\n    for batch_idx, (inputs, targets) in enumerate(testloader):\n        with torch.no_grad():\n            if use_cuda:\n                inputs, targets = inputs.cuda(), targets.cuda()\n            inputs, targets = Variable(inputs), Variable(targets)\n            outputs = net(inputs)\n            loss = criterion(outputs, targets)\n\n            test_loss += loss.item() * targets.size(0)\n            _, predicted = torch.max(outputs.data, 1)\n            total += targets.size(0)\n            correct += predicted.eq(targets.data).cpu().sum()\n\n        progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'\n            % (test_loss/total, 100.*correct/total, correct, total))\n    return ' '\n\nfor pattern in args.model:\n  for model in sorted(glob.iglob(pattern), reverse=True):\n    print('Reading from model', model)\n    checkpoint = torch.load(model)\n    net = checkpoint['net']\n    best_acc = checkpoint.get('acc', 0)\n    start_epoch = checkpoint.get('epoch', 0)\n    if use_cuda:\n        net.cuda()\n        net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))\n        cudnn.benchmark = True\n    if args.suppress_errors:\n      try:\n        print(test(0))\n      except AssertionError as e:\n        print('The model may be malformed.')\n        print(e)\n    else:\n      print(test(0))\n"
  },
  {
    "path": "main.py",
    "content": "'''Train CIFAR10 with PyTorch.'''\n\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nimport torch.nn.functional as F\nimport torch.backends.cudnn as cudnn\n\nimport torchvision\nimport torchvision.transforms as transforms\n\nimport os\nimport argparse\n\nfrom models import ResNet20\nfrom models import ResNet56\nfrom models import ResNet110\nfrom models import ShiftResNet20\nfrom models import ShiftResNet56\nfrom models import ShiftResNet110\nfrom models import DepthwiseResNet20\nfrom models import DepthwiseResNet56\nfrom models import DepthwiseResNet110\nfrom utils import progress_bar\nfrom torch.autograd import Variable\n\n\nall_models = {\n    'resnet20': ResNet20,\n    'shiftresnet20': ShiftResNet20,\n    'depthwiseresnet20': DepthwiseResNet20,\n    'resnet56': ResNet56,\n    'shiftresnet56': ShiftResNet56,\n    'depthwiseresnet56': DepthwiseResNet56,\n    'resnet110': ResNet110,\n    'shiftresnet110': ShiftResNet110,\n    'depthwiseresnet110': DepthwiseResNet110\n}\n\nparser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')\nparser.add_argument('--lr', default=0.1, type=float, help='learning rate')\nparser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')\nparser.add_argument('--batch_size', '-b', default=128, type=int, help='batch size')\nparser.add_argument('--arch', '-a', choices=all_models.keys(), default='shiftresnet110', help='neural network architecture')\nparser.add_argument('--expansion', '-e', help='Expansion for shift resnet.', default=1, type=float)\nparser.add_argument('--reduction', help='Amount to reduce raw resnet model by', default=1.0, type=float)\nparser.add_argument('--reduction-mode', choices=('block', 'net', 'depthwise'), help='\"block\" reduces inner representation for BasicBlock, \"net\" reduces for all layers', default='net')\nparser.add_argument('--dataset', choices=('cifar10', 'cifar100', 'imagenet'), help='Dataset to train and validate on.', default='cifar10')\nparser.add_argument('--datadir', help='Folder containing data', default='./data/')\nargs = parser.parse_args()\n\nuse_cuda = torch.cuda.is_available()\nbest_acc = 0.0  # best test accuracy\nstart_epoch = 0  # start from epoch 0 or last checkpoint epoch\n\n# Data\nprint('==> Preparing data..')\ntransform_train = transforms.Compose([\n    transforms.RandomCrop(32, padding=4),\n    transforms.RandomHorizontalFlip(),\n    transforms.ToTensor(),\n    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n])\n\ntransform_test = transforms.Compose([\n    transforms.ToTensor(),\n    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n])\n\nif args.dataset == 'cifar10':\n    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)\n    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)\n    num_classes=10\nelif args.dataset == 'cifar100':\n    trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)\n    testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)\n    num_classes = 100\nelif args.dataset == 'imagenet':\n    raise NotImplementedError()\n    transform_train = transforms.Compose([\n        transforms.RandomSizedCrop(224),\n        transforms.RandomHorizontalFlip(),\n        transforms.ToTensor(),\n        transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],\n                             std = [ 0.229, 0.224, 0.225 ]),\n    ])\n\n    transform_test = transforms.Compose([\n        transforms.ToTensor(),\n        transforms.Normalize(mean = [ 0.485, 0.456, 0.406 ],\n                             std = [ 0.229, 0.224, 0.225 ]),\n    ])\n\n    traindir = os.path.join(args.datadir, 'train')\n    valdir = os.path.join(args.datadir, 'val')\n    trainset = torchvision.datasets.ImageFolder(traindir, transform_train)\n    testset = torchvision.datasets.ImageFolder(valdir, transform_test)\n    num_classes = 1000\n\ntrainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=2)\ntestloader = torch.utils.data.DataLoader(testset, batch_size=1000, shuffle=False, num_workers=2)\n\n\nif 'shift' in args.arch:\n    suffix = '_%s' % args.expansion\nelif args.reduction != 1:\n    suffix = '_%s_%s' % (args.reduction, args.reduction_mode)\nelse:\n    suffix = ''\n\nif args.dataset == 'cifar100':\n    suffix += '_cifar100'\n\nif args.dataset == 'imagenet':\n    suffix += '_imagenet'\n\npath = './checkpoint/%s%s.t7' % (args.arch, suffix)\nprint('Using path: %s' % path)\n\n# Model\nif args.resume:\n    # Load checkpoint.\n    print('==> Resuming from checkpoint.. %s' % path)\n    assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'\n    checkpoint = torch.load(path)\n    net = checkpoint['net']\n    best_acc = float(checkpoint['acc'])\n    start_epoch = checkpoint['epoch']\nelse:\n    print('==> Building model..')\n    cls = all_models[args.arch]\n    assert 'shift' not in args.arch or args.reduction == 1, \\\n        'Only default resnet and depthwise resnet support reductions'\n    if args.reduction != 1:\n        print('==> %s with reduction %.2f' % (args.arch, args.reduction))\n        net = cls(reduction=args.reduction, reduction_mode=args.reduction_mode, num_classes=num_classes)\n    else:\n        net = cls(args.expansion, num_classes=num_classes) if 'shift' in args.arch else cls(num_classes=num_classes)\n\nif use_cuda:\n    net.cuda()\n    net = torch.nn.DataParallel(\n        net, device_ids=range(torch.cuda.device_count()))\n    cudnn.benchmark = True\n\ncriterion = nn.CrossEntropyLoss()\n\ndef adjust_learning_rate(epoch, lr):\n    if epoch <= 81:  # 32k iterations\n      return lr\n    elif epoch <= 122:  # 48k iterations\n      return lr/10\n    else:\n      return lr/100\n\n# Training\ndef train(epoch):\n    lr = adjust_learning_rate(epoch, args.lr)\n    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)\n    print('\\nEpoch: %d' % epoch)\n    net.train()\n    train_loss = 0\n    correct = 0\n    total = 0\n    for batch_idx, (inputs, targets) in enumerate(trainloader):\n        if use_cuda:\n            inputs, targets = inputs.cuda(), targets.cuda()\n        optimizer.zero_grad()\n        inputs, targets = Variable(inputs), Variable(targets)\n        outputs = net(inputs)\n        loss = criterion(outputs, targets)\n        loss.backward()\n        optimizer.step()\n\n        train_loss += loss.item() * targets.size(0)\n        _, predicted = torch.max(outputs.data, 1)\n        total += targets.size(0)\n        correct += predicted.eq(targets.data).cpu().sum()\n\n        progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'\n            % (train_loss/total, 100.*float(correct)/float(total), correct, total))\n\ndef test(epoch):\n    global best_acc\n    net.eval()\n    test_loss = 0\n    correct = 0\n    total = 0\n    for batch_idx, (inputs, targets) in enumerate(testloader):\n        with torch.no_grad():\n            if use_cuda:\n                inputs, targets = inputs.cuda(), targets.cuda()\n            inputs, targets = Variable(inputs), Variable(targets)\n            outputs = net(inputs)\n            loss = criterion(outputs, targets)\n\n            test_loss += loss.item() * targets.size(0)\n            _, predicted = torch.max(outputs.data, 1)\n            total += targets.size(0)\n            correct += predicted.eq(targets.data).cpu().sum()\n\n        progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'\n            % (test_loss/total, 100.*float(correct)/float(total), correct, total))\n\n    # Save checkpoint.\n    acc = 100.*float(correct)/float(total)\n    if acc > best_acc:\n        print('Saving..')\n        state = {\n            'net': net.module if use_cuda else net,\n            'acc': acc,\n            'epoch': epoch,\n        }\n        if not os.path.isdir('checkpoint'):\n            os.mkdir('checkpoint')\n        torch.save(state, path)\n        print('* Saved checkpoint to %s' % path)\n        best_acc = acc\n\n\nfor epoch in range(start_epoch, 164):\n    train(epoch)\n    test(epoch)\n"
  },
  {
    "path": "models/__init__.py",
    "content": "from .resnet import *\nfrom .shiftresnet import *\nfrom .depthwiseresnet import *\n"
  },
  {
    "path": "models/depthwiseresnet.py",
    "content": "\"\"\"PyTorch implementation of DepthwiseResNet\n\nShiftResNet modifications written by Bichen Wu and Alvin Wan.\n\nReference:\n[1] Bichen Wu, Alvin Wan, Xiangyu Yue, Peter Jin, Sicheng Zhao, Noah Golmant,\n    Amir Gholaminejad, Joseph Gonzalez, Kurt Keutzer\n    Shift: A Zero FLOP, Zero Parameter Alternative to Spatial Convolutions.\n    arXiv:1711.08141\n\"\"\"\n\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom .resnet import ResNet\n\n\nclass DepthWiseWithSkipBlock(nn.Module):\n\n    def __init__(self, in_planes, out_planes, stride=1, reduction=1):\n        super(DepthWiseWithSkipBlock, self).__init__()\n        self.expansion = 1 / float(reduction)\n        self.in_planes = in_planes\n        self.mid_planes = mid_planes = int(self.expansion * out_planes)\n        self.out_planes = out_planes\n\n        self.conv1 = nn.Conv2d(\n            in_planes, mid_planes, kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(mid_planes)\n        self.depth = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, padding=1,\n                               stride=1, bias=False, groups=mid_planes)\n        self.bn2 = nn.BatchNorm2d(mid_planes)\n        self.conv3 = nn.Conv2d(\n            mid_planes, out_planes, kernel_size=1, bias=False, stride=stride)\n        self.bn3 = nn.BatchNorm2d(out_planes)\n\n        self.shortcut = nn.Sequential()\n        if stride != 1 or in_planes != out_planes:\n            self.shortcut = nn.Sequential(\n                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,\n                          bias=False),\n                nn.BatchNorm2d(out_planes)\n            )\n\n    def flops(self):\n        if not hasattr(self, 'int_nchw'):\n            raise UserWarning('Must run forward at least once')\n        (_, _, int_h, int_w), (\n        _, _, out_h, out_w) = self.int_nchw, self.out_nchw\n        flops = int_h * int_w * self.mid_planes * self.in_planes + out_h * out_w * self.mid_planes * self.out_planes\n        flops += out_h * out_w * self.mid_planes * 9  # depth-wise convolution\n        if len(self.shortcut) > 0:\n            flops += self.in_planes * self.out_planes * out_h * out_w\n        return flops\n\n    def forward(self, x):\n        out = F.relu(self.bn1(self.conv1(x)))\n        self.int_nchw = out.size()\n        out = self.bn2(self.depth(out))\n        out = self.bn3(self.conv3(out))\n        self.out_nchw = out.size()\n        out += self.shortcut(x)\n        out = F.relu(out)\n        return out\n\n\ndef DepthwiseResNet20(reduction=1, num_classes=10):\n    block = lambda in_planes, planes, stride: \\\n        DepthWiseWithSkipBlock(in_planes, planes, stride, reduction=reduction)\n    return ResNet(block, [3, 3, 3], num_classes=num_classes)\n\n\ndef DepthwiseResNet56(reduction=1, num_classes=10):\n    block = lambda in_planes, planes, stride: \\\n        DepthWiseWithSkipBlock(in_planes, planes, stride, reduction=reduction)\n    return ResNet(block, [9, 9, 9], num_classes=num_classes)\n\n\ndef DepthwiseResNet110(reduction=1, num_classes=10):\n    block = lambda in_planes, planes, stride: \\\n        DepthWiseWithSkipBlock(in_planes, planes, stride, reduction=reduction)\n    return ResNet(block, [18, 18, 18], num_classes=num_classes)\n"
  },
  {
    "path": "models/resnet.py",
    "content": "\"\"\"PyTorch implementation of ResNet\n\nResNet modifications written by Bichen Wu and Alvin Wan, based\noff of ResNet implementation by Kuang Liu.\n\nReference:\n[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun\n    Deep Residual Learning for Image Recognition. arXiv:1512.03385\n\"\"\"\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass BasicBlock(nn.Module):\n\n    def __init__(self, in_planes, planes, stride=1, reduction=1):\n        super(BasicBlock, self).__init__()\n        self.expansion = 1 / float(reduction)\n        self.in_planes = in_planes\n        self.mid_planes = mid_planes = int(self.expansion * planes)\n        self.out_planes = planes\n\n        self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=3, stride=stride, padding=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(mid_planes)\n        self.conv2 = nn.Conv2d(mid_planes, planes, kernel_size=3, stride=1, padding=1, bias=False)\n        self.bn2 = nn.BatchNorm2d(planes)\n\n        self.shortcut = nn.Sequential()\n        if stride != 1 or in_planes != planes:\n            self.shortcut = nn.Sequential(\n                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),\n                nn.BatchNorm2d(planes)\n            )\n\n    def flops(self):\n        if not hasattr(self, 'int_nchw'):\n            raise UserWarning('Must run forward at least once')\n        (_, _, int_h, int_w), (_, _, out_h, out_w) = self.int_nchw, self.out_nchw\n        flops = int_h*int_w*9*self.mid_planes*self.in_planes + out_h*out_w*9*self.mid_planes*self.out_planes\n        if len(self.shortcut) > 0:\n            flops += self.in_planes*self.out_planes*out_h*out_w\n        return flops\n\n    def forward(self, x):\n        out = F.relu(self.bn1(self.conv1(x)))\n        self.int_nchw = out.size()\n        out = self.bn2(self.conv2(out))\n        self.out_nchw = out.size()\n        out += self.shortcut(x)\n        out = F.relu(out)\n        return out\n\n\nclass ResNet(nn.Module):\n    def __init__(self, block, num_blocks, reduction=1, num_classes=10):\n        super(ResNet, self).__init__()\n        self.reduction = float(reduction) ** 0.5\n        self.num_classes = num_classes\n        self.in_planes = int(16 / self.reduction)\n\n        self.conv1 = nn.Conv2d(3, self.in_planes, kernel_size=3, stride=1, padding=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(self.in_planes)\n        self.layer1 = self._make_layer(block, self.in_planes, num_blocks[0], stride=1)\n        self.layer2 = self._make_layer(block, int(32 / self.reduction), num_blocks[1], stride=2)\n        self.layer3 = self._make_layer(block, int(64 / self.reduction), num_blocks[2], stride=2)\n        self.linear = nn.Linear(int(64 / self.reduction), num_classes)\n\n    def _make_layer(self, block, planes, num_blocks, stride):\n        strides = [stride] + [1]*(num_blocks-1)\n        layers = []\n        planes = int(planes)\n        for stride in strides:\n            layers.append(block(self.in_planes, planes, stride))\n            self.in_planes = planes\n        return nn.Sequential(*layers)\n\n    def flops(self):\n        if not hasattr(self, 'int_nchw'):\n            raise UserWarning('Must run forward at least once')\n        (_, _, int_h, int_w), (out_h, out_w) = self.int_nchw, self.out_hw\n        flops = 0\n        for mod in (self.layer1, self.layer2, self.layer3):\n            for layer in mod:\n                flops += layer.flops()\n        return int_h*int_w*9*self.in_planes*3 + out_w*self.num_classes + flops\n\n    def forward(self, x):\n        out = F.relu(self.bn1(self.conv1(x)))\n        self.int_nchw = out.size()\n        out = self.layer1(out)\n        out = self.layer2(out)\n        out = self.layer3(out)\n        out = F.avg_pool2d(out, 8)\n        out = out.view(out.size(0), -1)\n        self.out_hw = out.size()\n        out = self.linear(out)\n        return out\n\n\ndef ResNetWrapper(num_blocks, reduction=1, reduction_mode='net', num_classes=10):\n    if reduction_mode == 'block':\n        block = lambda in_planes, planes, stride: \\\n            BasicBlock(in_planes, planes, stride, reduction=reduction)\n        return ResNet(block, num_blocks, num_classes=num_classes)\n    return ResNet(BasicBlock, num_blocks, num_classes=num_classes, reduction=reduction)\n\n\ndef ResNet20(reduction=1, reduction_mode='net', num_classes=10):\n    return ResNetWrapper([3, 3, 3], reduction, reduction_mode, num_classes)\n\n\ndef ResNet56(reduction=1, reduction_mode='net', num_classes=10):\n    return ResNetWrapper([9, 9, 9], reduction, reduction_mode, num_classes)\n\n\ndef ResNet110(reduction=1, reduction_mode='net', num_classes=10):\n    return ResNetWrapper([18, 18, 18], reduction, reduction_mode, num_classes)\n"
  },
  {
    "path": "models/shiftresnet.py",
    "content": "\"\"\"PyTorch implementation of ShiftResNet\n\nShiftResNet modifications written by Bichen Wu and Alvin Wan. Efficient CUDA\nimplementation of shift written by Peter Jin.\n\nReference:\n[1] Bichen Wu, Alvin Wan, Xiangyu Yue, Peter Jin, Sicheng Zhao, Noah Golmant,\n    Amir Gholaminejad, Joseph Gonzalez, Kurt Keutzer\n    Shift: A Zero FLOP, Zero Parameter Alternative to Spatial Convolutions.\n    arXiv:1711.08141\n\"\"\"\n\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom .resnet import ResNet\nfrom models.shiftnet_cuda_v2.nn import GenericShift_cuda\n\n\nclass ShiftConv(nn.Module):\n\n    def __init__(self, in_planes, out_planes, stride=1, expansion=1):\n        super(ShiftConv, self).__init__()\n        self.expansion = expansion\n        self.in_planes = in_planes\n        self.out_planes = out_planes\n        self.mid_planes = mid_planes = int(out_planes * self.expansion)\n\n        self.conv1 = nn.Conv2d(\n            in_planes, mid_planes, kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(mid_planes)\n\n        self.shift2 = GenericShift_cuda(kernel_size=3, dilate_factor=1)\n        self.conv2 = nn.Conv2d(\n            mid_planes, out_planes, kernel_size=1, bias=False, stride=stride)\n        self.bn2 = nn.BatchNorm2d(out_planes)\n\n        self.shortcut = nn.Sequential()\n        if stride != 1 or in_planes != out_planes:\n            self.shortcut = nn.Sequential(\n                nn.Conv2d(\n                  in_planes, out_planes, kernel_size=1, stride=stride,\n                  bias=False),\n                nn.BatchNorm2d(out_planes)\n            )\n\n    def flops(self):\n        if not hasattr(self, 'int_nchw'):\n            raise UserWarning('Must run forward at least once')\n        (_, _, int_h, int_w), (_, _, out_h, out_w) = self.int_nchw, self.out_nchw\n        flops = int_h * int_w * self.in_planes * self.mid_planes + \\\n                out_h * out_w * self.mid_planes * self.out_planes\n        if len(self.shortcut) > 0:\n            flops += self.in_planes * self.out_planes * out_h * out_w\n        return flops\n\n    def forward(self, x):\n        shortcut = self.shortcut(x)\n        x = F.relu(self.bn1(self.conv1(x)))\n        self.int_nchw = x.size()\n        x = F.relu(self.bn2(self.conv2(self.shift2(x))))\n        self.out_nchw = x.size()\n        x += shortcut\n        return x\n\n\ndef ShiftResNet20(expansion=1, num_classes=10):\n    block = lambda in_planes, out_planes, stride: \\\n        ShiftConv(in_planes, out_planes, stride, expansion=expansion)\n    return ResNet(block, [3, 3, 3], num_classes=num_classes)\n\n\ndef ShiftResNet56(expansion=1, num_classes=10):\n    block = lambda in_planes, out_planes, stride: \\\n        ShiftConv(in_planes, out_planes, stride, expansion=expansion)\n    return ResNet(block, [9, 9, 9], num_classes=num_classes)\n\n\ndef ShiftResNet110(expansion=1, num_classes=10):\n    block = lambda in_planes, out_planes, stride: \\\n        ShiftConv(in_planes, out_planes, stride, expansion=expansion)\n    return ResNet(block, [18, 18, 18], num_classes=num_classes)\n"
  },
  {
    "path": "requirements.txt",
    "content": "cffi==1.11.2\nnumpy==1.13.3\n"
  },
  {
    "path": "utils.py",
    "content": "'''Some helper functions for PyTorch, including:\n    - get_mean_and_std: calculate the mean and std value of dataset.\n    - msr_init: net parameter initialization.\n    - progress_bar: progress bar mimic xlua.progress.\n'''\nimport os\nimport sys\nimport time\nimport math\n\nimport torch.nn as nn\nimport torch.nn.init as init\n\n\ndef get_mean_and_std(dataset):\n    '''Compute the mean and std value of dataset.'''\n    dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)\n    mean = torch.zeros(3)\n    std = torch.zeros(3)\n    print('==> Computing mean and std..')\n    for inputs, targets in dataloader:\n        for i in range(3):\n            mean[i] += inputs[:,i,:,:].mean()\n            std[i] += inputs[:,i,:,:].std()\n    mean.div_(len(dataset))\n    std.div_(len(dataset))\n    return mean, std\n\ndef init_params(net):\n    '''Init layer parameters.'''\n    for m in net.modules():\n        if isinstance(m, nn.Conv2d):\n            init.kaiming_normal(m.weight, mode='fan_out')\n            if m.bias:\n                init.constant(m.bias, 0)\n        elif isinstance(m, nn.BatchNorm2d):\n            init.constant(m.weight, 1)\n            init.constant(m.bias, 0)\n        elif isinstance(m, nn.Linear):\n            init.normal(m.weight, std=1e-3)\n            if m.bias:\n                init.constant(m.bias, 0)\n\n\n_, term_width = os.popen('stty size', 'r').read().split()\nterm_width = int(term_width)\n\nTOTAL_BAR_LENGTH = 65.\nlast_time = time.time()\nbegin_time = last_time\ndef progress_bar(current, total, msg=None):\n    global last_time, begin_time\n    if current == 0:\n        begin_time = time.time()  # Reset for new bar.\n\n    cur_len = int(TOTAL_BAR_LENGTH*current/total)\n    rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1\n\n    sys.stdout.write(' [')\n    for i in range(cur_len):\n        sys.stdout.write('=')\n    sys.stdout.write('>')\n    for i in range(rest_len):\n        sys.stdout.write('.')\n    sys.stdout.write(']')\n\n    cur_time = time.time()\n    step_time = cur_time - last_time\n    last_time = cur_time\n    tot_time = cur_time - begin_time\n\n    L = []\n    L.append('  Step: %s' % format_time(step_time))\n    L.append(' | Tot: %s' % format_time(tot_time))\n    if msg:\n        L.append(' | ' + msg)\n\n    msg = ''.join(L)\n    sys.stdout.write(msg)\n    for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):\n        sys.stdout.write(' ')\n\n    # Go back to the center of the bar.\n    for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):\n        sys.stdout.write('\\b')\n    sys.stdout.write(' %d/%d ' % (current+1, total))\n\n    if current < total-1:\n        sys.stdout.write('\\r')\n    else:\n        sys.stdout.write('\\n')\n    sys.stdout.flush()\n\ndef format_time(seconds):\n    days = int(seconds / 3600/24)\n    seconds = seconds - days*3600*24\n    hours = int(seconds / 3600)\n    seconds = seconds - hours*3600\n    minutes = int(seconds / 60)\n    seconds = seconds - minutes*60\n    secondsf = int(seconds)\n    seconds = seconds - secondsf\n    millis = int(seconds*1000)\n\n    f = ''\n    i = 1\n    if days > 0:\n        f += str(days) + 'D'\n        i += 1\n    if hours > 0 and i <= 2:\n        f += str(hours) + 'h'\n        i += 1\n    if minutes > 0 and i <= 2:\n        f += str(minutes) + 'm'\n        i += 1\n    if secondsf > 0 and i <= 2:\n        f += str(secondsf) + 's'\n        i += 1\n    if millis > 0 and i <= 2:\n        f += str(millis) + 'ms'\n        i += 1\n    if f == '':\n        f = '0ms'\n    return f\n"
  }
]