Repository: NVlabs/condensa
Branch: master
Commit: e81e4f2e9738
Files: 61
Total size: 173.7 KB

Directory structure:
gitextract_kv0mug3a/

├── .gitignore
├── .style.yapf
├── LICENSE
├── README.md
├── condensa/
│   ├── __init__.py
│   ├── cfg.py
│   ├── compressor.py
│   ├── data.py
│   ├── delta.py
│   ├── dtypes.py
│   ├── finetune.py
│   ├── functional.py
│   ├── lr.py
│   ├── opt/
│   │   ├── __init__.py
│   │   ├── direct/
│   │   │   ├── __init__.py
│   │   │   └── dc.py
│   │   └── lc/
│   │       ├── __init__.py
│   │       ├── adam.py
│   │       ├── lc.py
│   │       └── sgd.py
│   ├── pi.py
│   ├── schemes.py
│   ├── tensor.py
│   ├── type_enums.py
│   └── util.py
├── docs/
│   ├── Makefile
│   ├── make.bat
│   └── source/
│       ├── _static/
│       │   └── ga_tracker.js
│       ├── conf.py
│       ├── guide/
│       │   ├── install.rst
│       │   └── usage.rst
│       ├── index.rst
│       └── modules/
│           ├── compressor.rst
│           ├── finetuner.rst
│           ├── functional.rst
│           ├── lc.rst
│           ├── opt.rst
│           ├── pi.rst
│           ├── schemes.rst
│           ├── tensor.rst
│           └── util.rst
├── examples/
│   └── cifar/
│       ├── compress.py
│       ├── compress_alexnet.sh
│       ├── finetune.py
│       ├── models/
│       │   ├── __init__.py
│       │   ├── alexnet.py
│       │   ├── resnet.py
│       │   └── vgg.py
│       └── util.py
├── notebooks/
│   ├── AlexNet.ipynb
│   ├── AlexNet.pth
│   └── util.py
├── run_all_tests.sh
├── setup.cfg
├── setup.py
└── test/
    ├── schemes/
    │   ├── test_prune.py
    │   └── test_qz.py
    ├── tensor/
    │   ├── test_mask_apply.py
    │   ├── test_maskgen.py
    │   └── test_util.py
    └── test_lr.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/

# Distribution / packaging
.Python
*.egg-info/
*.egg
.eggs/
dist/
sdist/

# Jupyter Notebook
.ipynb_checkpoints

# Condensa-specific
compressed/
trained/
results/
data/
build/
# version.py is auto-generated by setup.py
version.py


================================================
FILE: .style.yapf
================================================
[style]
based_on_style = pep8
blank_lines_around_top_level_definition = 1
column_limit = 79
indent_width = 4


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright 2019 NVIDIA Corporation

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
# A Programming System for Neural Network Compression

**Note:** the original version of Condensa (contained in this branch) is no longer actively maintained. Please check out the [lite branch](https://github.com/NVlabs/condensa/tree/lite) for the most up-to-date version.

Condensa is a framework for _programmable model compression_ in Python.
It comes with a set of built-in compression operators which may be used to
compose complex compression schemes targeting specific combinations of DNN architecture,
hardware platform, and optimization objective.
To recover any accuracy lost during compression, Condensa uses a constrained
optimization formulation of model compression and employs an Augmented Lagrangian-based
algorithm as the optimizer.

**Status**: Condensa is under active development, and bug reports, pull requests, and other feedback are all highly appreciated. See the contributions section below for more details on how to contribute.

## Supported Operators and Schemes

Condensa provides the following set of pre-built compression schemes:

* [Unstructured Pruning](https://nvlabs.github.io/condensa/modules/schemes.html#unstructured-pruning)
* [Filter and Neuron Pruning](https://nvlabs.github.io/condensa/modules/schemes.html#neuron-pruning)
* [Block Pruning](https://nvlabs.github.io/condensa/modules/schemes.html#block-pruning)
* [Quantization](https://nvlabs.github.io/condensa/modules/schemes.html#quantization)
* [Scheme Composition](https://nvlabs.github.io/condensa/modules/schemes.html#composition)

The schemes above are built using one or more [compression operators](https://nvlabs.github.io/condensa/modules/pi.html), which may be combined in various ways to define your own custom schemes.

Please refer to the [documentation](https://nvlabs.github.io/condensa/index.html) for a detailed description of available operators and schemes.

## Prerequisites

Condensa requires:

* A working Linux installation (we use Ubuntu 18.04)
* NVIDIA drivers and CUDA 10+ for GPU support
* Python 3.5 or newer
* PyTorch 1.0 or newer

## Installation

The most straightforward way of installing Condensa is via `pip`:

```bash
pip install condensa
```

### Installation from Source

Retrieve the latest source code from the Condensa repository:

```bash
git clone https://github.com/NVlabs/condensa.git
```

Navigate to the source code directory and run the following:

```bash
pip install -e .
```

### Test out the Installation

To check the installation, run the unit test suite:

```bash
bash run_all_tests.sh -v
```

## Getting Started

The [AlexNet Notebook](https://github.com/NVlabs/condensa/blob/master/notebooks/AlexNet.ipynb) contains a simple step-by-step walkthrough of compressing a pre-trained model using Condensa.
Check out the [`examples` folder](https://github.com/NVlabs/condensa/tree/master/examples/cifar) for additional, more complex examples of using Condensa (**note**: some examples require the `torchvision` package to be installed).

## Documentation

Documentation is available [here](https://nvlabs.github.io/condensa/). Please also check out the [Condensa paper](https://arxiv.org/abs/1911.02497) for a detailed
description of Condensa's motivation, features, and performance results.

## Contributing

We appreciate all contributions, including bug fixes, new features and documentation, and additional tutorials. You can initiate
contributions via Github pull requests. When making code contributions, please follow the `PEP 8` Python coding standard and provide
unit tests for the new features. Finally, make sure to sign off your commits using the `-s` flag or adding 
`Signed-off-By: Name<Email>` in the commit message.

## Citing Condensa

If you use Condensa for research, please consider citing the following paper:

```
@article{condensa2020,
  title={A Programmable Approach to Neural Network Compression}, 
  author={V. {Joseph} and G. L. {Gopalakrishnan} and S. {Muralidharan} and M. {Garland} and A. {Garg}},
  journal={IEEE Micro}, 
  year={2020},
  volume={40},
  number={5},
  pages={17-25},
  doi={10.1109/MM.2020.3012391}
}
```

## Disclaimer

Condensa is a research prototype and not an official NVIDIA product. Many features are still experimental and yet to be properly documented.


================================================
FILE: condensa/__init__.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name = "condensa"

from .version import __version__
from . import opt

from .dtypes import *
from .compressor import *
from .finetune import *
from .pi import *
from .delta import *
from .util import *

from . import schemes
from . import data


================================================
FILE: condensa/cfg.py
================================================
# Copyright 2019 NVIDIA Corporation
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__CONDENSA_RECORD_MODE__ = False
__CONDENSA_PI_PRECHECK__ = True


================================================
FILE: condensa/compressor.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch.nn

class Compressor(object):
    """Condensa model compressor class."""
    def __init__(self,
                 opt,
                 scheme,
                 model,
                 trainloader,
                 testloader,
                 valloader,
                 criterion):
        """
        Creates a `Compressor` instance.

        :param opt: Optimizer.
        :type opt: `condensa.Optimizer`
        :param scheme: Compression scheme (class).
        :param model: PyTorch model.
        :type model: `torch.nn.Module`
        :param trainloader: Training dataloader.
        :param testloader: Test dataloader.
        :param valloader: Validation dataloader.
        :param criterion: Loss criterion.
        """
        assert isinstance(model, torch.nn.Module)

        self.opt = opt
        self.pi = scheme.pi
        self.delta = scheme.delta
        self.model = model
        self.trainloader = trainloader
        self.testloader = testloader
        self.valloader = valloader
        self.criterion = criterion

        self._statistics = None

    @property
    def statistics(self):
        """
        Retrieves compressed model statistics.

        :return: Model statistics.
        :rtype: `dict`
        """
        return self._statistics

    def run(self):
        """
        Executes model compressor.

        :return: Compressed model.
        :rtype: `torch.nn.Module`
        """
        w, statistics = self.opt.compress(self.model, self.pi, self.delta,
                                          self.trainloader, self.testloader,
                                          self.valloader, self.criterion)
        self._statistics = statistics
        return w


================================================
FILE: condensa/data.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import numpy as np
import torch
import torch.utils.data as data
import PIL

def fast_collate(batch):
    """Fast batch collation. Based on version from
       NVIDIA Apex: https://github.com/NVIDIA/apex."""
    imgs = [img[0] for img in batch]
    targets = torch.tensor([target[1] for target in batch], dtype=torch.int64)
    w = imgs[0].size[0]
    h = imgs[0].size[1]
    tensor = torch.zeros((len(imgs), 3, h, w), dtype=torch.uint8)
    for i, img in enumerate(imgs):
        nump_array = np.asarray(img, dtype=np.uint8)
        if (nump_array.ndim < 3):
            nump_array = np.expand_dims(nump_array, axis=-1)
        nump_array = np.rollaxis(nump_array, 2)
        tensor[i] += torch.from_numpy(nump_array)
    return tensor, targets

class GPUDataLoader(object):
    """Custom data loader with support for prefetching and fast collation.
       Based on version from NVIDIA Apex: https://github.com/NVIDIA/apex."""
    def __init__(self,
                 dataset,
                 batch_size,
                 shuffle,
                 num_workers,
                 sampler=None,
                 meanstd=None):
        if isinstance(dataset[0][0], PIL.Image.Image):
            nc = len(dataset[0][0].getbands())
        else:
            raise RuntimeError(
                '[Condensa] GPUDataLoader only supports PIL image datasets')

        if not torch.cuda.is_available():
            raise RuntimeError(
                '[Condensa] GPUDataLoader requires PyTorch CUDA support')

        if nc == 3:
            loader = data.DataLoader(dataset,
                                     batch_size=batch_size,
                                     shuffle=shuffle,
                                     pin_memory=True,
                                     sampler=sampler,
                                     collate_fn=fast_collate,
                                     num_workers=num_workers)
        else:
            raise NotImplementedError(
                '[Condensa] GPUDataLoader currently only supports 3-channel images'
            )

        self.base_loader = loader
        self.loader = iter(loader)
        self.stream = torch.cuda.Stream()
        if meanstd is not None:
            mean, std = meanstd
            self.mean = torch.tensor([x * 255
                                      for x in mean]).cuda().view(1, nc, 1, 1)
            self.std = torch.tensor([x * 255
                                     for x in std]).cuda().view(1, nc, 1, 1)
        self.preload()

    def __len__(self):
        return len(self.base_loader)

    def __iter__(self):
        self.loader = iter(self.base_loader)
        self.preload()
        return self

    def __next__(self):
        torch.cuda.current_stream().wait_stream(self.stream)
        input = self.next_input
        target = self.next_target
        if input is None and target is None:
            raise StopIteration
        input.record_stream(torch.cuda.current_stream())
        target.record_stream(torch.cuda.current_stream())
        self.preload()
        return input, target

    def preload(self):
        try:
            self.next_input, self.next_target = next(self.loader)
        except StopIteration:
            self.next_input = None
            self.next_target = None
            return
        with torch.cuda.stream(self.stream):
            self.next_input = self.next_input.cuda(non_blocking=True)
            self.next_target = self.next_target.cuda(non_blocking=True)

            self.next_input = self.next_input.float()
            if self.mean is not None and self.std is not None:
                self.next_input = self.next_input.sub_(self.mean).div_(
                    self.std)


================================================
FILE: condensa/delta.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from condensa import dtypes

def dequantize(module, dtype):
    """
    De-quantizes module to given data type (inplace).

    :param module: PyTorch module.
    :type module: `torch.nn.Module`
    :param dtype: Target data type.
    """
    if dtype.as_dtype_enum == dtypes.DT_FLOAT32:
        module.float()
    elif dtype.as_dtype_enum == dtypes.DT_FLOAT64:
        module.double()
    else:
        raise TypeError('Unknown data type specified for de-quantization')


================================================
FILE: condensa/dtypes.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from .type_enums import *

class DType(object):
    """Data type for quantization."""
    def __init__(self, dtype):
        self._dtype = dtype

    @property
    def name(self):
        return _DTYPE_TO_STRING[self._dtype]

    @property
    def as_numpy_dtype(self):
        return _TO_NP[self._dtype]

    @property
    def as_dtype_enum(self):
        return self._dtype

    def __int__(self):
        return self._dtype

    def __str__(self):
        return "<dtype: %r>" % self.name

float16 = DType(DT_FLOAT16)
float32 = DType(DT_FLOAT32)
float64 = DType(DT_FLOAT64)
int8 = DType(DT_INT8)
uint8 = DType(DT_UINT8)
int16 = DType(DT_INT16)
uint16 = DType(DT_UINT16)

_DTYPE_TO_STRING = {
    DT_FLOAT16: "float16",
    DT_FLOAT32: "float32",
    DT_FLOAT64: "float64",
    DT_INT8: "int8",
    DT_UINT8: "uint8",
    DT_INT16: "int16",
    DT_UINT16: "uint16"
}

_TO_NP = {
    DT_FLOAT16: np.float16,
    DT_FLOAT32: np.float32,
    DT_FLOAT64: np.float64,
    DT_INT8: np.int8,
    DT_UINT8: np.uint8,
    DT_INT16: np.int16,
    DT_UINT16: np.uint16
}


================================================
FILE: condensa/finetune.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import logging
import numpy as np
from copy import deepcopy

import torch
import torch.backends.cudnn as cudnn
from tqdm import tqdm

import condensa.tensor as T
import condensa.util as util

logger = logging.getLogger(__name__)

class FineTuner(object):
    """Condensa model fine-tuner. Can be used for retraining compressed
       models while keeping all zero-valued parameters clipped to zero."""
    def __init__(self, w, layer_types=None, biases=True):
        self.w = w
        self.layer_types = layer_types
        self.biases = biases
        self._compute_mask_inplace()

    def _compute_mask_inplace(self):
        with torch.no_grad():
            for m in self.w.modules():
                if type(m) in self.layer_types\
                   and not hasattr(m, 'condensa_nocompress'):
                    if hasattr(m, 'weight'):
                        m.mask_w = torch.gt(m.weight.data.abs(), 0.)
                    if self.biases:
                        if hasattr(m, 'bias') and m.bias is not None:
                            m.mask_b = torch.gt(m.bias.data.abs(), 0.)

    def _apply_mask(self):
        with torch.no_grad():
            for m in self.w.modules():
                if hasattr(m, 'mask_w'):
                    T.apply_mask_inplace(m.weight.data, m.mask_w)
                if hasattr(m, 'mask_b'):
                    T.apply_mask_inplace(m.bias.data, m.mask_b)

    def run(self,
            epochs,
            lr,
            lr_end,
            momentum,
            weight_decay,
            criterion,
            trainloader,
            testloader,
            valloader,
            debugging_flags={}):
        """
        Fine-tunes a compressed model. Currently only supports SGD.

        :param epochs: Number of epochs
        :type epochs: `int`
        :param lr: Learning rate
        :type lr: `float`
        :param lr_end: End learning rate
        :type lr_end: `float`
        :param momentum: Momentum
        :type momentum: float
        :param weight_decay: Weight decay
        :type weight_decay: float
        :param criterion: Loss criterion
        :param trainloader: Training dataloader
        :param testloader: Test dataloader
        :param valloader: Validation dataloader
        :param debugging_flags: Debugging flags
        :type debugging_flags: dict
        """
        use_cuda = torch.cuda.is_available()

        validate = (valloader is not None)
        test = (testloader is not None)

        if use_cuda:
            cudnn.benchmark = True
            self.w = self.w.cuda()

        _model_stat_fn = debugging_flags['custom_model_statistics']\
                   if 'custom_model_statistics' in debugging_flags\
                   else util.empty_stat_fn

        if validate:
            val_loss, val_stats = _model_stat_fn(self.w, criterion, valloader)
            logging.info(
                '[Condensa:FineTuner] Original model val_loss: {:.2f}, {}'
                .format(val_loss,
                ', '.join(['{}:{}'.format(k, v) for k,v in val_stats.items()])))
        if test:
            test_loss, test_stats = _model_stat_fn(
                self.w, criterion, testloader)
            logging.info(
                '[Condensa:FineTuner] Original model test_loss: {:.2f}, {} '
                .format(test_loss,
                ', '.join(['{}:{}'.format(k, v) for k,v in test_stats.items()])))

        l_alpha = np.exp((np.log(lr_end) - np.log(lr)) / float(epochs))
        optimizer = torch.optim.SGD(self.w.parameters(),
                                    lr=lr,
                                    momentum=momentum,
                                    weight_decay=weight_decay,
                                    nesterov=False)
        with torch.no_grad():
            best_model = deepcopy(self.w)
        best_loss = sys.float_info.max
        for epoch in range(epochs):
            # Switch to training mode
            self.w.train()
            nbatches = len(trainloader)
            if logger.isEnabledFor(logging.INFO):
                pbar = tqdm(total=nbatches, ascii=True)
            for input, target in trainloader:
                if torch.cuda.is_available():
                    if not input.is_cuda: input = input.cuda()
                    if not target.is_cuda: target = target.cuda()
                output = self.w(input)
                loss = criterion(output, target)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                # Apply mask
                self._apply_mask()
                if logger.isEnabledFor(logging.INFO):
                    pbar.update()
            if logger.isEnabledFor(logging.INFO):
                pbar.close()

            # Switch to eval mode
            self.w.eval()

            if validate:
                val_loss, val_stats = _model_stat_fn(
                    self.w, criterion, valloader)
                logging.info(
                    '[Condensa:FineTuner] Epoch [{}], VAL loss: {:.2f}, {}'
                    .format(epoch, val_loss,
                    ', '.join(['{}:{}'.format(k, v) for k,v in val_stats.items()])))
            if test:
                test_loss, test_stats = _model_stat_fn(
                    self.w, criterion, testloader)
                logging.info(
                    '[Condensa:FineTuner] Epoch [{}], TEST loss: {:.2f}, {}'
                    .format(epoch, test_loss,
                    ', '.join(['{}:{}'.format(k, v) for k,v in test_stats.items()])))

            if validate:
                if val_loss < best_loss:
                    logger.info(
                        '[Condensa:FineTuner] SAVING MODEL based on VAL')
                    best_loss = val_loss
                    best_model = deepcopy(self.w)
            elif test:
                if test_loss < best_loss:
                    logger.info(
                        '[Condensa:FineTuner] SAVING MODEL based on TEST')
                    best_loss = test_loss
                    best_model = deepcopy(self.w)
            else:
                logger.info(
                    '[Condensa:FineTuner] SAVING MODEL based on most recent')
                best_model = deepcopy(self.w)

            lr *= l_alpha
            for g in optimizer.param_groups:
                g['lr'] = lr

        return best_model


================================================
FILE: condensa/functional.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch

def l2norm(tensor, dim, keepdim):
    """
    Computes the l2-norm of elements in input tensor.

    :param tensor: PyTorch tensor.
    :type tensor: `torch.nn.Module`
    :param dim: Reduction dimension.
    :type dim: `int`
    :param keepdim: Whether the output has `dim` retained.
    :type keepdim: `bool`
    :return: l2-norm of input tensor.
    """
    return torch.norm(tensor, 2, dim, keepdim)

def max(tensor, dim, keepdim):
    """
    Computes the maximum value of elements in input tensor.

    :param tensor: PyTorch tensor.
    :type tensor: `torch.nn.Module`
    :param dim: Reduction dimension.
    :type dim: `int`
    :param keepdim: Whether the output has `dim` retained.
    :type keepdim: `bool`
    :return: Max of input tensor.
    """
    return torch.max(tensor, dim, keepdim)[0]

def min(tensor, dim, keepdim):
    """
    Computes the minimum value of elements in input tensor.

    :param tensor: PyTorch tensor.
    :type tensor: `torch.nn.Module`
    :param dim: Reduction dimension.
    :type dim: `int`
    :param keepdim: Whether the output has `dim` retained.
    :type keepdim: `bool`
    :return: Min of input tensor.
    """
    return torch.min(tensor, dim, keepdim)[0]

def mean(tensor, dim, keepdim):
    """
    Computes the mean value of elements in input tensor.

    :param tensor: PyTorch tensor.
    :type tensor: `torch.nn.Module`
    :param dim: Reduction dimension.
    :type dim: `int`
    :param keepdim: Whether the output has `dim` retained.
    :type keepdim: `bool`
    :return: Mean value of input tensor.
    """
    return torch.mean(tensor, dim, keepdim)

def sum(tensor, dim, keepdim):
    """
    Computes the sum of elements in input tensor.

    :param tensor: PyTorch tensor.
    :type tensor: `torch.nn.Module`
    :param dim: Reduction dimension.
    :type dim: `int`
    :param keepdim: Whether the output has `dim` retained.
    :type keepdim: `bool`
    :return: Sum of input tensor.
    """
    return torch.sum(tensor, dim, keepdim)


================================================
FILE: condensa/lr.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

class IntervalLR(object):
    """Decays learning rate between two values."""
    def __init__(self, begin, end, n):
        """
        Construct an instance of `IntervalLR`.

        :param begin: Starting learning rate (LR).
        :type begin: `float`
        :param end: Ending LR.
        :type end: `float`
        :param n: Number of iterations.
        :type n: `int`
        """
        self.alpha = np.exp((np.log(end) - np.log(begin)) / n)
        self.lr = begin

    def step(self):
        """Signal end of iteration."""
        self.lr *= self.alpha

    @property
    def learning_rate(self):
        """Returns current learning rate."""
        return self.lr

class DecayedLR(object):
    """Decays learning rate at fixed intervals."""
    def __init__(self, begin, schedule, gamma=0.1):
        """
        Construct an instance of `DecayedLR`.

        :param begin: Starting LR.
        :type begin: `float`
        :param schedule: List of iterations when LR must be adjusted.
        :type schedule: `List/Tuple`
        :param gamma: LR multiplier.
        :type gamma: `float`
        """
        self.gamma = gamma
        self.lr = begin
        self.schedule = schedule
        self.counter = 0

    def step(self):
        """Signal end of iteration."""
        if self.counter in self.schedule:
            self.lr *= self.gamma
        self.counter += 1

    @property
    def learning_rate(self):
        """Returns current learning rate."""
        return self.lr

class ExpDecayedLR(object):
    """Decays learning rate exponentially."""
    def __init__(self, begin, gamma):
        """
        Construct an instance of `ExpDecayedLR`.

        :param begin: Starting LR.
        :type begin: `float`
        :param gamma: LR multiplier.
        :type gamma: `float`
        """
        self.gamma = gamma
        self.lr = begin
        self.counter = 0

    def step(self):
        """Signal end of iteration."""
        self.counter += 1

    @property
    def learning_rate(self):
        """Returns current learning rate."""
        return self.lr * (self.gamma**self.counter)


================================================
FILE: condensa/opt/__init__.py
================================================
# Copyright 2019 NVIDIA Corporation
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .direct.dc import DC
from .lc.lc import LC

from . import lc


================================================
FILE: condensa/opt/direct/__init__.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: condensa/opt/direct/dc.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from copy import deepcopy
import torch

from condensa.util import EventTimer

class DC(object):
    """Condensa direct compression optimizer."""
    def compress(self,
                 w,
                 pi,
                 delta,
                 trainloader,
                 testloader,
                 valloader,
                 criterion):
        """
        Performs model compression using direct optimization.

        :param w: PyTorch model.
        :type w: `torch.nn.Module`
        :param pi: Compression function.
        :param delta: Decompression function.
        :param trainloader: Training dataloader.
        :param testloader: Test dataloader.
        :param valloader: Validation dataloader.
        :param criterion: Loss criterion.
        """
        statistics = dict()
        timer_dc = EventTimer()
        with torch.no_grad():
            compressed = deepcopy(w)
        pi(compressed)
        statistics['total_elapsed'] = timer_dc.elapsed_seconds

        return compressed, statistics


================================================
FILE: condensa/opt/lc/__init__.py
================================================
# Copyright 2019 NVIDIA Corporation
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .sgd import SGD
from .adam import Adam


================================================
FILE: condensa/opt/lc/adam.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
from collections import defaultdict

import torch

class Adam(object):
    """Custom Adam implementation for L-C optimizer."""
    def __init__(self,
                 w,
                 lr=1e-3,
                 betas=(0.9, 0.999),
                 eps=1e-8,
                 weight_decay=0,
                 amsgrad=False):
        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {}".format(eps))
        if not 0.0 <= betas[0] < 1.0:
            raise ValueError("Invalid beta parameter at index 0: {}".format(
                betas[0]))
        if not 0.0 <= betas[1] < 1.0:
            raise ValueError("Invalid beta parameter at index 1: {}".format(
                betas[1]))

        try:
            self.w = w.module
        except AttributeError:
            self.w = w

        self.lr = lr
        self.betas = betas
        self.eps = eps
        self.weight_decay = weight_decay
        self.amsgrad = amsgrad

        self.state = defaultdict(dict)

    def zero_grad(self):
        """Zeroes out all gradients."""
        for p in self.w.parameters():
            if p.grad is not None:
                p.grad.detach_()
                p.grad.zero_()

    def reset_state(self):
        """Resets optimizer state."""
        for p in self.w.parameters():
            if 'state' in self.state[p]:
                self.state[p]['step'] = 0
            if 'exp_avg' in self.state[p]:
                self.state[p]['exp_avg'] = torch.zeros_like(p.data)
            if 'exp_avg_sq' in self.state[p]:
                self.state[p]['exp_avg_sq'] = torch.zeros_like(p.data)
            if self.amsgrad and 'max_exp_avg_sq' in self.state[p]:
                self.state[p]['max_exp_avg_sq'] = torch.zeros_like(p.data)

    def _step(self, p, condense=False, mu=None, p_theta=None, p_lm=None):
        if p.grad is None:
            return

        grad = p.grad.data
        if grad.is_sparse:
            raise RuntimeError('Adam does not support sparse gradients.')

        state = self.state[p]
        # State initialization
        if len(state) == 0:
            state['step'] = 0
            # Exponential moving average of gradient values
            state['exp_avg'] = torch.zeros_like(p.data)
            # Exponential moving average of squared gradient values
            state['exp_avg_sq'] = torch.zeros_like(p.data)
            if self.amsgrad:
                # Maintains max of all exp. moving avg. of sq. grad. values
                state['max_exp_avg_sq'] = torch.zeros_like(p.data)

        exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
        if self.amsgrad:
            max_exp_avg_sq = state['max_exp_avg_sq']
        beta1, beta2 = self.betas

        state['step'] += 1

        if self.weight_decay != 0:
            grad.add_(self.weight_decay, p.data)

        if condense is True:
            assert (mu is not None
                    and p_theta is not None
                    and p_lm is not None)
            grad.add_(mu * (p.data - p_theta.data) - p_lm.data)

        # Decay the first and second moment running average coefficient
        exp_avg.mul_(beta1).add_(1 - beta1, grad)
        exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
        if self.amsgrad:
            # Maintains the maximum of all 2nd moment running avg. till now
            torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
            # Use the max. for normalizing running avg. of gradient
            denom = max_exp_avg_sq.sqrt().add_(self.eps)
        else:
            denom = exp_avg_sq.sqrt().add_(self.eps)

        bias_correction1 = 1 - beta1**state['step']
        bias_correction2 = 1 - beta2**state['step']
        step_size = self.learning_rate * math.sqrt(
            bias_correction2) / bias_correction1

        p.data.addcdiv_(-step_size, exp_avg, denom)

    def step(self, lr, mu, theta, lm, closure=None):
        loss = None
        if closure is not None:
            loss = closure()

        self.learning_rate = lr
        for w_m, theta_m, lm_m in zip(self.w.modules(), theta.modules(),
                                      lm.modules()):
            if hasattr(theta_m, 'condense'):
                for pname in theta_m.condense:
                    self._step(getattr(w_m, pname), True, mu,
                               getattr(theta_m, pname), getattr(lm_m, pname))
                params = set([name for name, _ in theta_m.named_parameters()])
                rparams = params - theta_m.condense
                for pname in rparams:
                    self._step(getattr(w_m, pname))
            else:
                for w_p in w_m.parameters(recurse=False):
                    self._step(w_p)
        return loss


================================================
FILE: condensa/opt/lc/lc.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
from copy import deepcopy
import logging
from collections import defaultdict

import numpy as np
import torch
import torch.utils.data as data
import torch.backends.cudnn as cudnn
from tqdm import tqdm

from condensa.util import EventTimer
from condensa import cfg
from condensa import util
from .sgd import SGD
from condensa.lr import *

logger = logging.getLogger(__name__)

class record_mode(object):
    def __enter__(self):
        cfg.__CONDENSA_RECORD_MODE__ = True

    def __exit__(self, *args):
        cfg.__CONDENSA_RECORD_MODE__ = False
        return False

class LC(object):
    """Condensa L-C compression engine."""
    def __init__(self,
                 steps=30,
                 l_optimizer=None,
                 l_optimizer_params={},
                 lr=None,
                 lr_end=None,
                 lr_decay=None,
                 lr_schedule=None,
                 lr_multiplier=None,
                 mb_iterations_per_l=0,
                 mb_iterations_first_l=0,
                 mu_init=0.,
                 mu_multiplier=1.,
                 mu_cap=10000,
                 distributed=False,
                 debugging_flags={}):
        """
        Constructs an `LC` class instance.

        :param steps: Number of L-C iterations.
        :type steps: float
        :param l_optimizer: L-step optimizer to use.
        :param l_optimizer_params: L-step optimizer hyper-parameters.
        :type l_optimizer_params: dict
        :param lr: Starting learning rate.
        :type lr: float
        :param lr_end: Ending learning rate.
        :type lr_end: float
        :param lr_schedule: Learning rate schedule.
        :type lr_schedule: List
        :param lr_multiplier: Learning rate multiplier.
        :type lr_multiplier: float
        :param mb_iterations_per_l: Number of mini-batch iterations per L-step.
        :type mb_iterations_per_l: int
        :param mb_iterations_first_l: Number of mini-batch iterations for first L-step.
        :type mb_iterations_first_l: int
        :param mu_init: Initial value of `mu`.
        :type mu_init: float
        :param mu_multiplier: Mu multiplier.
        :type mu_multiplier: float
        :param mu_cap: Maximum permitted value for `mu`.
        :type mu_cap: float
        :param distributed: Enable/disable data-parallelism in L-step.
        :type distributed: bool
        :param debugging_flags: Debugging flags
        :type debugging_flags: dict
        """
        self._engine_config = {
            k: v
            for k, v in locals().items() if k != 'self'
        }
        logger.info('[Condensa] LC ENGINE CONFIG [' +
                    ', '.join('{!s}={!r}'.format(k, v)
                              for k, v in self._engine_config.items()) + ']')

        if not 0 <= steps:
            raise ValueError(
                'Invalid steps specified: {}'.format(steps))
        if not isinstance(l_optimizer_params, dict):
            raise TypeError('l_optimizer_params must be a dictionary')
        if not 0. <= lr:
            raise ValueError('Invalid learning rate: {}'.format(lr))
        if lr_schedule is not None and lr_multiplier is None:
            raise TypeError(
                'Please specify multiplier when using fixed LR schedule')
        if not 0 < mb_iterations_per_l:
            raise ValueError(
                'Invalid mb_iterations_per_l specified: {}'.format(mb_iterations_per_l))
        if not 0 < mb_iterations_first_l:
            raise ValueError(
                'Invalid mb_iterations_first_l specified: {}'.format(mb_iterations_first_l))
        if not isinstance(debugging_flags, dict):
            raise TypeError('debugging_flags must be a dictionary')

        self.use_cuda = torch.cuda.is_available()
        self.steps = steps
        self.l_optimizer = l_optimizer if l_optimizer else SGD
        self.l_optimizer_params = l_optimizer_params
        self.lr = lr
        self.lr_end = lr_end
        self.lr_decay = lr_decay
        self.lr_schedule = lr_schedule
        self.lr_multiplier = lr_multiplier
        self.mb_iterations_per_l = mb_iterations_per_l
        self.mb_iterations_first_l = mb_iterations_first_l
        self.mu_init = mu_init
        self.mu_multiplier = mu_multiplier
        self.mu_cap = mu_cap
        self.distributed = distributed
        self.debugging_flags = debugging_flags

    def zero_(self, model):
        """
        Zeroes out model parameters.
    
        :param model: PyTorch model.
        :type model: torch.nn.Module
        """
        with torch.no_grad():
            pflat = torch.nn.utils.parameters_to_vector(
                model.parameters()).fill_(0.)
            torch.nn.utils.vector_to_parameters(pflat, model.parameters())

    def compress(self, w, pi, delta, trainloader, testloader, valloader,
                 loss_fn):
        """
        Main L-C compression method.
    
        :param w: Input model.
        :type w: torch.nn.Module
        :param pi: Compression function.
        :param delta: Decompression function.
        :param trainloader: Training dataloader.
        :param testloader: Test dataloader.
        :param valloader: Validation dataloader.
        :param loss_fn: Loss criterion.
        """
        statistics = {}
        # Save engine configuration
        statistics.update(self._engine_config)

        _model_stat_fn = self.debugging_flags['custom_model_statistics']\
                   if 'custom_model_statistics' in self.debugging_flags\
                   else util.empty_stat_fn
        _disable_train_stats = self.debugging_flags['disable_train_stats']\
                     if 'disable_train_stats' in self.debugging_flags\
                     else False
        timer_lc = EventTimer()

        if self.use_cuda: cudnn.benchmark = True
        logger.debug("[Condensa] cuDNN VERSION: {}".format(cudnn.version()))

        validate = (valloader is not None)
        test = (testloader is not None)

        # Copy model to GPU0 memory
        if self.use_cuda: w = w.cuda(0)

        # Mark all compressible modules in w
        with record_mode():
            pi(w)

        with torch.no_grad():
            theta = deepcopy(w)
        self.zero_(theta)

        with torch.no_grad():
            lm = deepcopy(w)
        self.zero_(lm)

        with torch.no_grad():
            best_model = deepcopy(w)

        # Enable data-parallelism in  L step
        if self.use_cuda and self.distributed:
            ngpus = torch.cuda.device_count()
            logger.info('[Condensa] {} GPUs enabled for L-step'.format(ngpus))
            w = torch.nn.DataParallel(w)

        mu = 0.
        learning_rate = self.lr

        optimizer = self.l_optimizer(w,
                                     lr=learning_rate,
                                     **self.l_optimizer_params)
        optimizer.reset_state()

        if not _disable_train_stats:
            w_train_loss, w_train_stats = _model_stat_fn(w, loss_fn, trainloader)
            logger.info('[Condensa] w TRAIN\tloss={:.5f}, {}'
                .format(w_train_loss,
                ', '.join(['{}:{}'.format(k, v) for k,v in w_train_stats.items()])))
        if validate:
            w_val_loss, w_val_stats = _model_stat_fn(w, loss_fn, valloader)
            logger.info('[Condensa] w VAL\tloss={:.5f}, {}'
                .format(w_val_loss,
                ', '.join(['{}:{}'.format(k, v) for k,v in w_val_stats.items()])))
        if test:
            w_test_loss, w_test_stats = _model_stat_fn(w, loss_fn, testloader)
            logger.info('[Condensa] w TEST\tloss={:.5f}, {}'
                .format(w_test_loss,
                ', '.join(['{}:{}'.format(k, v) for k,v in w_test_stats.items()])))

        best_loss = sys.float_info.max
        train_losses = []
        if validate: val_losses = []
        if test: test_losses = []
        outer_lr_scheduler = None
        if self.lr_decay is not None:
            outer_lr_scheduler = ExpDecayedLR(self.lr, self.lr_decay)
        elif self.lr_schedule is not None:
            outer_lr_scheduler = DecayedLR(self.lr, self.lr_schedule,
                                           self.lr_multiplier)
        for j in range(0, self.steps):
            n_sgd_iter = (self.mb_iterations_first_l
                          if j == 1 else self.mb_iterations_per_l)

            # Set up outer learning rate
            learning_rate = self.lr
            if outer_lr_scheduler is not None:
                learning_rate = outer_lr_scheduler.learning_rate

            logger.info(
                '[Condensa] LC Iteration {}:\tmu={:.5f}, lr={:.5f}'.format(
                    j, mu, learning_rate))

            inner_lr_scheduler = None
            if self.lr_end is not None:
                inner_lr_scheduler = IntervalLR(learning_rate, self.lr_end,
                                                n_sgd_iter)

            # L step
            # Switch to training mode
            i = 0
            w.train()
            iterator = iter(trainloader)
            if logger.isEnabledFor(logging.INFO) and j>0:
                pbar = tqdm(total=n_sgd_iter, ascii=True)
            while True:
                if j == 0:
                    logger.info('[Condensa] Skipping first L-step')
                    break
                if j == 1 and i >= self.mb_iterations_first_l:
                    break
                if j > 1 and i >= self.mb_iterations_per_l:
                    break

                try:
                    inputs, targets = next(iterator)
                except StopIteration:
                    iterator = iter(trainloader)
                    inputs, targets = next(iterator)

                if self.use_cuda:
                    if not inputs.is_cuda: inputs = inputs.cuda()
                    if not targets.is_cuda:
                        targets = targets.cuda(non_blocking=True)
                outputs = w(inputs)
                loss = loss_fn(outputs, targets)

                optimizer.zero_grad()

                loss.backward()
                optimizer.step(learning_rate, mu, theta, lm)

                if inner_lr_scheduler is not None:
                    inner_lr_scheduler.step()
                    learning_rate = inner_lr_scheduler.learning_rate

                if logger.isEnabledFor(logging.INFO):
                    pbar.update()
                i += 1

            if logger.isEnabledFor(logging.INFO) and j>0:
                pbar.close()
            logger.info('')

            if self.use_cuda: torch.cuda.synchronize()

            w.eval()
            # C step and theta update
            try:
                theta.load_state_dict(w.module.state_dict())
            except AttributeError:
                theta.load_state_dict(w.state_dict())
            if mu > 0:
                try:
                    wmodules = w.module.modules()
                except AttributeError:
                    wmodules = w.modules()
                with record_mode():
                    pi(theta)
                with torch.no_grad():
                    for w_m, theta_m, lm_m in zip(wmodules, theta.modules(),
                                                  lm.modules()):
                        if hasattr(theta_m, 'condense'):
                            for pname in theta_m.condense:
                                getattr(theta_m, pname).data = (
                                    getattr(w_m, pname).detach() -
                                    getattr(lm_m, pname).data / mu)

            pi(theta)

            if not _disable_train_stats:
                nested_train_loss, nested_train_stats = _model_stat_fn(theta, loss_fn, trainloader)
                train_losses.append(nested_train_loss)
                logger.info(
                    '[Condensa] Nested (theta) TRAIN\tloss={:.5f}, {}'
                    .format(nested_train_loss,
                    ', '.join(['{}:{}'.format(k, v) for k,v in nested_train_stats.items()])))
            if validate:
                nested_val_loss, nested_val_stats = _model_stat_fn(theta, loss_fn, valloader)
                val_losses.append(nested_val_loss)
                logger.info(
                    '[Condensa] Nested (theta) VAL\tloss={:.5f}, {}'
                    .format(nested_val_loss,
                    ', '.join(['{}:{}'.format(k, v) for k,v in nested_val_stats.items()])))
            if test:
                nested_test_loss, nested_test_stats = _model_stat_fn(theta, loss_fn, testloader)
                test_losses.append(nested_test_loss)
                logger.info(
                    '[Condensa] Nested (theta) TEST\tloss={:.5f}, {}'
                    .format(nested_test_loss,
                    ', '.join(['{}:{}'.format(k, v) for k,v in nested_test_stats.items()])))

            if validate:
                if nested_val_loss < best_loss:
                    logger.info('[Condensa] Saving model based on VAL')
                    best_loss = nested_val_loss
                    # Deep-copy required here to preserve dtypes
                    best_model = deepcopy(theta)
            elif test:
                if nested_test_loss < best_loss:
                    logger.info('[Condensa] Saving model based on TEST')
                    best_loss = nested_test_loss
                    # Deep-copy required here to preserve dtypes
                    best_model = deepcopy(theta)
            else:
                logger.info('[Condensa] Saving model based on most recent')
                best_model = deepcopy(theta)

            # theta <- delta(theta)
            delta(theta)

            # LM update
            if mu > 0:
                try:
                    wmodules = w.module.modules()
                except AttributeError:
                    wmodules = w.modules()
                for w_m, theta_m, lm_m in zip(wmodules, theta.modules(),
                                              lm.modules()):
                    if hasattr(theta_m, 'condense'):
                        for pname in theta_m.condense:
                            getattr(
                                lm_m,
                                pname).data = (getattr(lm_m, pname).data - mu *
                                               (getattr(w_m, pname).detach() -
                                                getattr(theta_m, pname).data))

            optimizer.reset_state()
            # Update mu
            mu = self._update_mu(mu, self.mu_init, self.mu_multiplier,
                                 self.mu_cap)
            # Update LR schedule
            if outer_lr_scheduler is not None: outer_lr_scheduler.step()

        statistics['elapsed_lc'] = timer_lc.elapsed_seconds
        statistics['train_losses'] = train_losses
        if test: statistics['test_losses'] = test_losses
        if validate: statistics['val_losses'] = val_losses
        return best_model, statistics

    def _update_mu(self, mu, mu_init, mu_multiplier, mu_cap):
        if mu > mu_cap:
            return mu
        if mu != 0:
            return mu * mu_multiplier
        else:
            return mu_init


================================================
FILE: condensa/opt/lc/sgd.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from collections import defaultdict
import torch

class SGD(object):
    """Custom SGD implementation for L-C optimizer."""
    def __init__(self, w, lr=None, momentum=None, weight_decay=0):
        """
        Creates instance of `SGD`.

        :param w: PyTorch model.
        :type w: torch.nn.Module
        :param lr: Learning rate.
        :type lr: float
        :param momentum: SGD momentum.
        :type momentum: float
        :param weight_decay: Weight decay amount (L2 regularation).
        :type weight_decay: float
        """
        if lr is None or momentum is None:
            raise ValueError('Learning rate and momentum are required')
        if lr < 0.0:
            raise ValueError('Invalid learning rate: {}'.format(lr))
        if momentum < 0.0:
            raise ValueError('Invalid momentum value: {}'.format(momentum))
        if weight_decay < 0.0:
            raise ValueError(
                'Invalid weight decay value: {}'.format(weight_decay))

        try:
            self.w = w.module
        except AttributeError:
            self.w = w

        self.lr = lr
        self.momentum = momentum
        self.weight_decay = weight_decay

        self.state = defaultdict(dict)

    def zero_grad(self):
        """Zeroes out all gradients."""
        for p in self.w.parameters():
            if p.grad is not None:
                p.grad.detach_()
                p.grad.zero_()

    def reset_state(self):
        """Resets optimizer state."""
        for p in self.w.parameters():
            if 'velocity' in self.state[p]:
                self.state[p]['velocity'] = torch.zeros_like(p.data)

    def _step(self, p, condense=False, mu=None, p_theta=None, p_lm=None):
        if p.grad is None:
            return
        lr = self.learning_rate
        d_p = p.grad.data
        if self.weight_decay != 0:
            d_p.add_(self.weight_decay, p.data)
        if condense is True:
            assert (mu is not None
                    and p_theta is not None
                    and p_lm is not None)
            d_p.add_(mu * (p.data - p_theta.data) - p_lm.data)
        update = p.data - lr * (d_p)
        if 'velocity' not in self.state[p]:
            velocity = torch.zeros_like(p.data)
        else:
            velocity = self.state[p]['velocity']
        x = self.momentum * velocity + update - p.data
        self.state[p]['velocity'] = x
        p.data = self.momentum * x + update

    def step(self, lr, mu, theta, lm, closure=None):
        """
        Takes one optimizer step.

        :param lr: Current learning rate.
        :type lr: float
        :param mu: L-C mu hyper-parameter value.
        :type mu: float
        :param theta: Compressed model.
        :type theta: torch.nn.Module
        :param lm: Lagrange multiplier.
        :type lm: torch.nn.Module
        :param closure: Loss closure.
        """
        loss = None
        if closure is not None:
            loss = closure()

        self.learning_rate = lr
        for w_m, theta_m, lm_m in zip(self.w.modules(), theta.modules(),
                                      lm.modules()):
            if hasattr(theta_m, 'condense'):
                for pname in theta_m.condense:
                    self._step(getattr(w_m, pname), True, mu,
                               getattr(theta_m, pname), getattr(lm_m, pname))
                params = set([name for name, _ in theta_m.named_parameters()])
                rparams = params - theta_m.condense
                for pname in rparams:
                    self._step(getattr(w_m, pname))
            else:
                for w_p in w_m.parameters(recurse=False):
                    self._step(w_p)
        return loss


================================================
FILE: condensa/pi.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
import torch.nn

from condensa import dtypes
from condensa import cfg
import condensa.tensor as T

def __precheck(module):
    if not cfg.__CONDENSA_PI_PRECHECK__: return

    if len(list(module.children())) > 0:
        raise RuntimeError('Only leaf modules may be compressed')
    for name, _ in module.named_parameters():
        if name != 'weight' and name != 'bias':
            raise NotImplementedError(
                'Unknown parameter {} detected'.format(name))

def quantize(module, dtype):
    """
    Quantizes module to given data type (inplace).

    :param module: PyTorch module.
    :type module: `torch.nn.Module`
    :param dtype: Target data type.
    """
    __precheck(module)

    parameters = ['weight']
    #parameters = [name for name, _ in module.named_parameters()]
    if hasattr(module, 'condense'): module.condense |= set(parameters)
    else: module.condense = set(parameters)

    if not cfg.__CONDENSA_RECORD_MODE__:
        if dtype.as_dtype_enum == dtypes.DT_FLOAT16:
            module.half()
        elif dtype.as_dtype_enum == dtypes.DT_FLOAT32:
            module.float()
        else:
            raise TypeError('Unknown data type specified for quantization')

def prune(module, threshold, parameter='weight'):
    """
    Prunes module parameters based on magnitude (inplace).

    :param module: PyTorch module.
    :type module: `torch.nn.Module`
    :param threshold: Magnitude threshold for pruning.
    :type threshold: `float`
    :param parameter: Module parameter to prune (default: 'weight')
    :type parameter: str
    """
    __precheck(module)

    if not hasattr(module, parameter):
        raise ValueError('Could not find parameter \'{}\' in module',
                         parameter)

    if hasattr(module, 'condense'): module.condense.add(parameter)
    else: module.condense = set([parameter])

    if not cfg.__CONDENSA_RECORD_MODE__:
        p = getattr(module, parameter)
        pdata = p.data.view(-1)
        mask = T.simple_mask(pdata, threshold).type(pdata.type())
        T.apply_mask_inplace(pdata, mask)
        p.data = pdata.view_as(p).data
        #if cfg.__CONDENSA_SAVE_MASK__: module.mask = mask.view_as(p).data

def blockprune(module,
               threshold,
               block_size,
               criteria,
               align=None,
               parameter='weight'):
    """
    Prunes blocks of module parameters based on magnitude (inplace).

    :param module: PyTorch module.
    :type module: `torch.nn.Module`
    :param threshold: Magnitude threshold for pruning.
    :type threshold: `float`
    :param block_size: Block size for pruning.
    :type block_size: `Tuple`
    :param criteria: Aggregation function for thresholding.
    :type criteria: `condensa.functional`
    :param align: Alignment of compressed parameters.
    :type align: `int`
    :param parameter: Module parameter to prune (default: 'weight')
    :type parameter: str
    """
    __precheck(module)

    if not hasattr(module, parameter):
        raise ValueError('Could not find parameter \'{}\' in module',
                         parameter)

    p = getattr(module, parameter)
    ndim = p.dim()
    bdim = len(block_size)
    if ndim != bdim:
        raise RuntimeError(
            'Block must have same dimensions as parameter \'{}\''.format(
                parameter))

    if hasattr(module, 'condense'): module.condense.add(parameter)
    else: module.condense = set([parameter])

    if not cfg.__CONDENSA_RECORD_MODE__:
        mask = T.block_mask(p.data, threshold, block_size, criteria, align)
        T.apply_mask_inplace(p.data, mask)
        return mask
    return None

def neuron_prune(module, threshold, criteria, align=None, prune_bias=True):
    """
    Prunes neurons based on magnitude (inplace).

    :param module: PyTorch module.
    :type module: `torch.nn.Module`
    :param threshold: Magnitude threshold for pruning.
    :type threshold: `float`
    :param criteria: Aggregation function for thresholding.
    :type criteria: `condensa.functional`
    :param align: Alignment of compressed parameters.
    :type align: `int`
    :param prune_bias: Whether to prune corresponding biases.
    :type prune_bias: `bool`
    """
    __precheck(module)

    parameter = 'weight'
    if not hasattr(module, parameter):
        raise ValueError('Could not find parameter \'{}\' in module',
                         parameter)

    shape = getattr(module, parameter).data.shape
    if len(shape) != 2:
        raise NotImplementedError(
            'Row pruning currently only supported for 2D parameters')

    if hasattr(module, 'condense'): module.condense.add(parameter)
    else: module.condense = set([parameter])

    if not cfg.__CONDENSA_RECORD_MODE__:
        block_size = (1, shape[1])
        mask = blockprune(module, threshold, block_size, criteria, align,
                          parameter)
        # Prune corresponding bias tensor
        if module.bias is not None and prune_bias is True:
            assert mask.ndimension() == 2
            T.apply_mask_inplace(module.bias.data, mask[:, 0])

def filter_prune(module, threshold, criteria, align=None, prune_bias=True):
    """
    Prunes 3D blocks (filters) of module parameters based on magnitude (inplace).

    :param module: PyTorch module.
    :type module: `torch.nn.Module`
    :param threshold: Magnitude threshold for pruning.
    :type threshold: `float`
    :param criteria: Aggregation function for thresholding.
    :type criteria: `condensa.functional`
    :param align: Alignment of compressed parameters.
    :type align: `int`
    :param prune_bias: Whether to prune corresponding biases.
    :type prune_bias: `bool`
    """
    __precheck(module)

    parameter = 'weight'
    if not hasattr(module, parameter):
        raise ValueError('Could not find parameter \'{}\' in module',
                         parameter)

    p = getattr(module, parameter)
    ndim = p.dim()
    if ndim != 4:
        raise RuntimeError('Filter pruning requires a 4D parameter')

    if hasattr(module, 'condense'): module.condense.add(parameter)
    else: module.condense = set([parameter])

    if not cfg.__CONDENSA_RECORD_MODE__:
        block_size = (1, *p.data.shape[1:])
        mask = T.block_mask(p.data, threshold, block_size, criteria, align)
        T.apply_mask_inplace(p.data, mask)
        # Prune corresponding bias tensor
        if module.bias is not None and prune_bias is True:
            assert mask.ndimension() == 4
            T.apply_mask_inplace(module.bias.data, mask[:, 0, 0, 0])


================================================
FILE: condensa/schemes.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch

import condensa
import condensa.tensor as T
import condensa.functional as F

class Compose(object):
    """Composes two or more schemes together."""
    def __init__(self, schemes):
        """
        Creates a `Compose` instance.

        :param schemes: List of schemes to compose.
        :type schemes: `list`
        """
        if not isinstance(schemes, list):
            raise TypeError('Please specify schemes to compose as a list')
        self.schemes = schemes

    def pi(self, module):
        """
        Applies compression scheme to module.
    
        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        for s in self.schemes:
            s.pi(module)

    def delta(self, module):
        """
        Applies de-compression scheme to module.

        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        for s in reversed(self.schemes):
            s.delta(module)

    def __repr__(self):
        return '<Compose: {}>'.format(self.schemes)

class Prune(object):
    """Prunes network to given density."""
    def __init__(self, density):
        """
        Creates a `Prune` instance.

        :param density: Target density.
        :type density: `float`
        """
        self.density = density
        self.layer_types = [torch.nn.Linear, torch.nn.Conv2d]

    def threshold(self, module):
        """
        Computes magnitude threshold.

        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        vec = []
        for m in module.modules():
            if type(m) in self.layer_types and not hasattr(
                    m, 'condensa_nocompress'):
                vec.append(m.weight.data.view(-1))
        return T.threshold(torch.cat(vec), self.density)

    def pi(self, module):
        """
        Applies compression scheme to module.
    
        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        threshold = self.threshold(module)
        for m in module.modules():
            if type(m) in self.layer_types and not hasattr(
                    m, 'condensa_nocompress'):
                condensa.prune(m, threshold)

    def delta(self, module):
        """
        Applies de-compression scheme to module.

        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        pass

    def __repr__(self):
        return '<Prune: density:{}>'.format(self.density)

class Quantize(object):
    """Quantizes network to given data-type."""
    def __init__(self, dtype=condensa.float16):
        """
        Creates `Quantize` class instance.

        :param dtype: Target data type (default: float16).
        """
        self.dtype = dtype
        self.layer_types = [torch.nn.Linear, torch.nn.Conv2d]

    def pi(self, module):
        """
        Applies compression scheme to module.
    
        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        for m in module.modules():
            if type(m) in self.layer_types and not hasattr(
                    m, 'condensa_nocompress'):
                condensa.quantize(m, self.dtype)

    def delta(self, module):
        """
        Applies de-compression scheme to module.

        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        for m in module.modules():
            if type(m) in self.layer_types and not hasattr(
                    m, 'condensa_nocompress'):
                condensa.dequantize(m, condensa.float32)

    def __repr__(self):
        return '<Quantize: dtype:{}>'.format(self.dtype)

class NeuronPrune(object):
    """Prunes neurons from fully-connected layers."""
    def __init__(self, density, align=None, criteria=F.l2norm,
                 prune_bias=True):
        """
        Creates an instance of `NeuronPrune`.

        :param density: Target density.
        :type density: `float`
        :param align: Tensor alignment in compressed model.
        :type align: `int`
        :param criteria: Neuron aggregation criteria (default: l2norm).
        :type criteria: `condensa.functional`
        :param prune_bias: Whether to prune corresponding biases (default: True).
        :type prune_bias: `bool`
        """
        self.density = density
        self.align = align
        self.criteria = criteria
        self.prune_bias = prune_bias

    def threshold(self, module):
        """
        Computes magnitude threshold.

        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        vec = []
        for m in module.modules():
            if isinstance(m, torch.nn.Linear) and not hasattr(
                    m, 'condensa_nocompress'):
                agg = T.aggregate_neurons(m.weight.data, self.criteria)
                vec.append(agg.view(-1))
        return T.threshold(torch.cat(vec), self.density)

    def pi(self, module):
        """
        Applies compression scheme to module.
    
        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        threshold = self.threshold(module)
        for m in module.modules():
            if isinstance(m, torch.nn.Linear) and not hasattr(
                    m, 'condensa_nocompress'):
                condensa.neuron_prune(m,
                                      threshold,
                                      align=self.align,
                                      criteria=self.criteria,
                                      prune_bias=self.prune_bias)

    def delta(self, module):
        """
        Applies de-compression scheme to module.

        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        pass

    def __repr__(self):
        return '<NeuronPrune: density:{}, align:{}, criteria:{}, prune_bias:{}>'.format(
            self.density, self.align, self.criteria, self.prune_bias)

class FilterPrune(object):
    """Prunes filters from convolutional layers."""
    def __init__(self, density, align=None, criteria=F.l2norm,
                 prune_bias=True):
        """
        Creates an instance of `FilterPrune`.

        :param density: Target density.
        :type density: `float`
        :param align: Tensor alignment in compressed model.
        :type align: `int`
        :param criteria: Filter aggregation criteria (default: l2norm).
        :type criteria: `condensa.functional`
        :param prune_bias: Whether to prune corresponding biases (default: True).
        :type prune_bias: `bool`
        """
        self.density = density
        self.align = align
        self.criteria = criteria
        self.prune_bias = prune_bias

    def threshold(self, module):
        """
        Computes magnitude threshold.

        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        vec = []
        for m in module.modules():
            if isinstance(m, torch.nn.Conv2d) and not hasattr(
                    m, 'condensa_nocompress'):
                agg = T.aggregate_filters(m.weight.data, self.criteria)
                vec.append(agg.view(-1))
        return T.threshold(torch.cat(vec), self.density)

    def pi(self, module):
        """
        Applies compression scheme to module.
    
        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        threshold = self.threshold(module)
        for m in module.modules():
            if isinstance(m, torch.nn.Conv2d) and not hasattr(
                    m, 'condensa_nocompress'):
                condensa.filter_prune(m,
                                      threshold,
                                      align=self.align,
                                      criteria=self.criteria,
                                      prune_bias=self.prune_bias)

    def delta(self, module):
        """
        Applies de-compression scheme to module.

        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        pass

    def __repr__(self):
        return '<FilterPrune: density:{}, align:{}, criteria:{}, prune_bias:{}>'.format(
            self.density, self.align, self.criteria, self.prune_bias)

class StructurePrune(object):
    """Combines neuron and filter pruning using a single threshold value."""
    def __init__(self, density, align=None, criteria=F.l2norm,
                 prune_bias=True):
        """
        Creates an instance of `StructurePrune`.

        :param density: Target density.
        :type density: `float`
        :param align: Tensor alignment in compressed model.
        :type align: `int`
        :param criteria: Structure aggregation criteria (default: l2norm).
        :type criteria: `condensa.functional`
        :param prune_bias: Whether to prune corresponding biases (default: True).
        :type prune_bias: `bool`
        """
        self.density = density
        self.align = align
        self.criteria = criteria
        self.prune_bias = prune_bias

    def threshold(self, module):
        """
        Computes magnitude threshold.

        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        vec = []
        for m in module.modules():
            if isinstance(m, torch.nn.Linear) and not hasattr(
                    m, 'condensa_nocompress'):
                agg = T.aggregate_neurons(m.weight.data, self.criteria)
                vec.append(agg.view(-1))
            if isinstance(m, torch.nn.Conv2d) and not hasattr(
                    m, 'condensa_nocompress'):
                agg = T.aggregate_filters(m.weight.data, self.criteria)
                vec.append(agg.view(-1))
        return T.threshold(torch.cat(vec), self.density)

    def pi(self, module):
        """
        Applies compression scheme to module.
    
        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        threshold = self.threshold(module)
        for m in module.modules():
            if isinstance(m, torch.nn.Linear) and not hasattr(
                    m, 'condensa_nocompress'):
                condensa.neuron_prune(m,
                                      threshold,
                                      align=self.align,
                                      criteria=self.criteria,
                                      prune_bias=self.prune_bias)
            if isinstance(m, torch.nn.Conv2d) and not hasattr(
                    m, 'condensa_nocompress'):
                condensa.filter_prune(m,
                                      threshold,
                                      align=self.align,
                                      criteria=self.criteria,
                                      prune_bias=self.prune_bias)

    def delta(self, module):
        """
        Applies de-compression scheme to module.

        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        pass

    def __repr__(self):
        return '<StructurePrune: density:{}, align:{}, criteria:{}, prune_bias:{}>'.format(
            self.density, self.align, self.criteria, self.prune_bias)

class BlockPrune(object):
    """Prunes blocks in Linear layers."""
    def __init__(self, density, block_size, criteria=F.l2norm):
        """
        Creates an instance of `BlockPrune`.

        :param density: Target density.
        :type density: `float`
        :param block_size: Target block size.
        :type block_size: `Tuple`
        :param criteria: Structure aggregation criteria (default: l2norm).
        :type criteria: `condensa.functional`
        """
        self.density = density
        self.block_size = block_size
        self.criteria = criteria
        self.layer_types = [torch.nn.Linear]

    def threshold(self, module):
        """
        Computes magnitude threshold.

        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        vec = []
        for m in module.modules():
            if type(m) in self.layer_types and not hasattr(
                    m, 'condensa_nocompress'):
                agg = T.aggregate(m.weight.data, self.block_size,
                                  self.criteria)
                vec.append(agg.view(-1))
        return T.threshold(torch.cat(vec), self.density)

    def pi(self, module):
        """
        Applies compression scheme to module.
    
        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        threshold = self.threshold(module)
        for m in module.modules():
            if type(m) in self.layer_types and not hasattr(
                    m, 'condensa_nocompress'):
                condensa.blockprune(m,
                                    threshold,
                                    block_size=self.block_size,
                                    criteria=self.criteria)

    def delta(self, module):
        """
        Applies de-compression scheme to module.

        :param module: PyTorch module.
        :type module: `torch.nn.Module`
        """
        pass

    def __repr__(self):
        return '<BlockPrune: density:{}, block_size:{}>'.format(
            self.density, self.block_size)


================================================
FILE: condensa/tensor.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import torch

def density(tensor):
    """
    Computes the ratio of nonzeros to total elements in a tensor.

    :param tensor: PyTorch tensor
    :type tensor: `torch.Tensor`
    :return: Ratio of nonzeros to total elements
    :rtype: `float`
    """
    t = tensor.view(-1)
    return float(t.nonzero().numel()) / float(t.numel())

def sparsity(tensor):
    """
    Computes the ratio of zeros to total elements in a tensor.

    :param tensor: PyTorch tensor
    :type tensor: torch.Tensor
    :return: Ratio of zeros to total elements
    :rtype: `float`
    """
    return 1. - density(tensor)

def threshold(tensor, density):
    """
    Computes a magnitude-based threshold for given tensor.

    :param tensor: PyTorch tensor
    :type tensor: `torch.Tensor`
    :param density: Desired ratio of nonzeros to total elements
    :type density: `float`
    :return: Magnitude threshold
    :rtype: `float`
    """
    tf = tensor.abs().view(-1)
    numel = int(density * tf.numel())
    if numel == 0:
        raise RuntimeError('Provided density value causes model to be zero.')

    topk, _ = torch.topk(tf.abs(), numel, sorted=True)
    return topk.data[-1]

def aggregate(tensor, blocksize, criteria):
    """
    Aggregates tensor dimensions according to criteria.

    :param tensor: PyTorch tensor
    :type tensor: `torch.Tensor`
    :param blocksize: Size of blocks to aggregate
    :type blocksize: `Tuple(int)`
    :param criteria: Aggregation criteria
    :type criteria: `condensa.functional`
    :return: Aggregated tensor
    :rtype: `torch.Tensor`
    """
    if tensor.dim() != len(blocksize):
        raise RuntimeError('Tensor and block dimensions do not match')
    ndim = tensor.dim()

    blocksize_flat = np.prod(np.array(blocksize))
    shape = np.array(tensor.shape)
    repeats = (shape / blocksize).astype(int)
    divcheck = (shape % blocksize).astype(int)

    if not np.all(divcheck == 0):
        raise TypeError('Block size must be divisible by tensor size')

    tmpshape = np.column_stack([repeats, blocksize]).ravel()
    order = np.arange(len(tmpshape))
    order = np.concatenate([order[::2], order[1::2]])
    blocks = tensor.abs().reshape(tuple(tmpshape))
    blocks = blocks.permute(tuple(order)).reshape(-1, *blocksize)
    agg = criteria(blocks.reshape(-1, blocksize_flat), dim=1, keepdim=True)
    return agg

def aggregate_neurons(tensor, criteria):
    """
    Aggregates neurons (rows) in given weight matrix.
  
    :param tensor: PyTorch tensor
    :type tensor: `torch.Tensor`
    :param criteria: Aggregation criteria
    :type criteria: `condensa.functional`
    :return: Neuron-aggregated tensor
    :rtype: `torch.Tensor`
    """
    return aggregate(tensor, (1, tensor.shape[1]), criteria)

def aggregate_filters(tensor, criteria):
    """
    Aggregates 3D filters in given weight tensor.
  
    :param tensor: PyTorch tensor
    :type tensor: `torch.Tensor`
    :param criteria: Aggregation criteria
    :type criteria: `condensa.functional`
    :return: Filter-aggregated tensor
    :rtype: `torch.Tensor`
    """
    return aggregate(tensor, (1, *tensor.shape[1:]), criteria)

def simple_mask(tensor, threshold, align=None):
    """
    Computes a simple binary mask for given magnitude threshold.

    :param tensor: PyTorch tensor
    :type tensor: `torch.Tensor`
    :param threshold: magnitude threshold for pruning
    :type threshold: `float`
    :return: Mask
    :rtype: `torch.Tensor`
    """
    assert tensor.dim() == 1
    if align is None:
        return torch.ge(tensor.abs(), threshold)
    else:
        size = tensor.size(0)
        if size < align:
            raise RuntimeError('Tensor too small for given alignment')
        t = tensor.abs()
        nnz = torch.ge(t, threshold).nonzero().size(0)
        nnz = int(nnz / align) * align
        _, indices = torch.topk(t, nnz)
        ones = torch.ones(nnz,
                          dtype=tensor.dtype,
                          layout=tensor.layout,
                          device=tensor.device)
        mask = torch.zeros_like(tensor).scatter_(0, indices, ones)
        return mask

def block_mask(tensor, threshold, blocksize, criteria, align=None):
    """
    Computes an n-D binary mask for given magnitude threshold.

    :param tensor: PyTorch tensor
    :type tensor: `torch.Tensor`
    :param threshold: magnitude threshold for pruning
    :type threshold: `float`
    :param blocksize: desired block size (Tuple)
    :type blocksize: `Tuple`
    :param criteria: aggregation function for thresholding (default: max)
    :type criteria: `condensa.functional`
    :return: Mask
    :rtype: `torch.Tensor`
    """
    # Original implementation at: https://stackoverflow.com/questions/42297115
    # /numpy-split-cube-into-cubes/42298440#42298440
    if tensor.dim() != len(blocksize):
        raise RuntimeError('Tensor and block dimensions do not match')
    ndim = tensor.dim()

    blocksize_flat = np.prod(np.array(blocksize))
    shape = np.array(tensor.shape)
    repeats = (shape / blocksize).astype(int)
    divcheck = (shape % blocksize).astype(int)

    if not np.all(divcheck == 0):
        raise TypeError('Block size must be divisible by tensor size')

    tmpshape = np.column_stack([repeats, blocksize]).ravel()
    order = np.arange(len(tmpshape))
    order = np.concatenate([order[::2], order[1::2]])
    blocks = tensor.abs().reshape(tuple(tmpshape))
    blocks = blocks.permute(tuple(order)).reshape(-1, *blocksize)
    agg = criteria(blocks.reshape(-1, blocksize_flat), dim=1, keepdim=True)

    mask = simple_mask(agg.view(-1), threshold, align)
    mask = mask.view(agg.shape).expand(-1,
                                       blocksize_flat).reshape(blocks.shape)

    N, newshape = mask.shape[0], mask.shape[1:]
    repeats = (shape / newshape).astype(int)
    tmpshape = np.concatenate([repeats, newshape])
    order = np.arange(len(tmpshape)).reshape(2, -1).ravel(order='F')
    return mask.reshape(tuple(tmpshape)).permute(tuple(order)).reshape(
        tuple(shape))

def apply_mask(tensor, mask):
    """
    Computes masked version of tensor.

    :param tensor: PyTorch tensor
    :type tensor: `torch.Tensor`
    :param mask: Binary mask
    :type mask: `torch.Tensor`
    :return: Masked version of `tensor`
    :rtype: `torch.Tensor`
    """
    #assert isinstance(tensor, torch.Tensor)
    return torch.mul(tensor, mask.type(tensor.type()))

def apply_mask_inplace(tensor, mask):
    """
    Applies binary mask in-place.

    :param tensor: PyTorch tensor
    :type tensor: `torch.Tensor`
    :param mask: Binary mask
    :type mask: `torch.Tensor`
    """
    #assert isinstance(tensor, torch.Tensor)
    tensor.mul_(mask.type(tensor.type()))


================================================
FILE: condensa/type_enums.py
================================================
# Copyright 2019 NVIDIA Corporation
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Supported data types
DT_FLOAT16 = 1
DT_FLOAT32 = 2
DT_FLOAT64 = 3
DT_INT8 = 4
DT_UINT8 = 5
DT_INT16 = 6
DT_UINT16 = 7


================================================
FILE: condensa/util.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import time
import sys
import numpy as np
import logging
from tqdm import tqdm

import torch.nn.utils
import torch.utils.data as data
from torch.autograd import Variable

import condensa.tensor as T

logger = logging.getLogger(__name__)

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def to_python_float(t):
    if hasattr(t, 'item'):
        return t.item()
    else:
        return t[0]

def is_leaf_node(module):
    """
    Checks if given module is a leaf module.

    :param module: PyTorch module
    :type module: `torch.nn.Module`
    :return: Boolean value representing whether module is a leaf.
    :rtype: `bool`
    """
    return list(module.children()) == []

def magnitude_threshold(module, density):
    """
    Computes a magnitude-based threshold for given module.

    :param module: PyTorch module
    :type module: `torch.nn.Module`
    :param density: Desired ratio of nonzeros to total elements
    :type density: `float`
    :return: Magnitude threshold
    :rtype: `float`
    """
    params = torch.nn.utils.parameters_to_vector(module.parameters())
    return T.threshold(params, density)

def empty_stat_fn(model, criterion, dataloader):
    """
    Empty model statistics function: returns loss.

    :param model: PyTorch model
    :type model: `torch.nn.Module`
    :param loss_fn: Loss function
    :param dataloader: Data loader to use
    :return: Tuple of loss, dictionary of statistics
    :rtype: `Tuple(float, dict)`
    """
    return (loss(model, criterion, dataloader), {})

def accuracy(output, target, topk=(1, )):
    """
    Computes the precision@k for the specified values of k

    :param output: Predicted output batch
    :type output: `torch.Tensor`
    :param target: Actual output batch
    :type target: `torch.Tensor`
    :param topk: Top-k value
    :type topk: `Tuple`
    :return: Model accuracy
    :rtype: `float`
    """
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

def loss(model, criterion, dataloader):
    """
    Computes loss on given dataset.
  
    :param model: PyTorch model
    :type model: `torch.nn.Module`
    :param loss_fn: Loss function
    :param dataloader: Data loader to use
    :return: Loss
    :rtype: `float`
    """
    losses = AverageMeter()
    model.eval()
    pzero = list(model.parameters())[0]
    if (pzero.dtype != torch.float32 and pzero.dtype != torch.float16):
        raise NotImplementedError('Only FP16 and FP32 weights are supported')
    cast2fp16 = (isinstance(pzero, torch.HalfTensor)
                 or isinstance(pzero, torch.cuda.HalfTensor))
    loss = 0.
    with torch.no_grad():
        for input, target in dataloader:
            if torch.cuda.is_available():
                input = input.cuda(non_blocking=True)
                target = target.cuda(non_blocking=True)
            if cast2fp16:
                input = input.half()
            output = model(input)
            loss = criterion(output, target)
            losses.update(to_python_float(loss.data), input.size(0))
    return losses.avg

def cnn_statistics(model, criterion, dataloader):
    """
    Computes accuracy of given CNN model.
  
    :param model: PyTorch model
    :type model: `torch.nn.Module`
    :param criterion: Loss function
    :param dataloader: Data loader to use
    :return: Top-1 and Top-5 accuracies
    :rtype: Tuple(top1, top5)
    """
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    model.eval()
    pzero = list(model.parameters())[0]
    if (pzero.dtype != torch.float32 and pzero.dtype != torch.float16):
        raise NotImplementedError('Only FP16 and FP32 weights are supported')
    cast2fp16 = (isinstance(pzero, torch.HalfTensor)
                 or isinstance(pzero, torch.cuda.HalfTensor))
    loss = 0.
    correct = 0.
    with torch.no_grad():
        for input, target in dataloader:
            if torch.cuda.is_available():
                input = input.cuda(non_blocking=True)
                target = target.cuda(non_blocking=True)
            if cast2fp16:
                input = input.half()
            output = model(input)
            loss = criterion(output, target)
            prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
            losses.update(to_python_float(loss.data), input.size(0))
            top1.update(to_python_float(prec1), input.size(0))
            top5.update(to_python_float(prec5), input.size(0))
    return (losses.avg,
            {'top1': top1.avg, 'top5': top5.avg})

def compressed_model_stats(w, wc):
    """
    Retrieve various statistics for compressed model.

    :param w: Original model
    :type w: `torch.nn.Module`
    :param wc: Compressed model
    :type wc: `torch.nn.Module`
    :return: Dictionary of compressed model statistics
    :rtype: `dict`
    """
    stats = dict()
    nparams_w = dict()
    nparams_wc = dict()

    nparams_w['total_nnz'] = torch.nn.utils.parameters_to_vector(
        w.parameters()).view(-1).nonzero().numel()
    nparams_wc['total_nnz'] = torch.nn.utils.parameters_to_vector(
        wc.parameters()).view(-1).nonzero().numel()

    for (name_w, m_w), (name_wc, m_wc) in zip(w.named_modules(),
                                              wc.named_modules()):
        if type(m_w) == torch.nn.Linear or type(m_w) == torch.nn.Conv2d:
            nparams_w[name_w] = torch.nn.utils.parameters_to_vector(
                m_w.parameters()).view(-1).nonzero().numel()
            nparams_wc[name_wc] = torch.nn.utils.parameters_to_vector(
                m_wc.parameters()).view(-1).nonzero().numel()

    stats['num_params'] = nparams_w
    stats['num_params_compressed'] = nparams_wc
    return stats

def pretrain(epochs, model, trainloader, criterion, optimizer):
    """
    No-frills pre-training method.
  
    :param epochs: Number of epochs
    :type epochs: `int`
    :param model: PyTorch model
    :type model: `torch.nn.Module`
    :param trainloader: Training dataloader
    :param criterion: Loss criterion
    :param optimizer: Optimizer to use
    """
    _config = {'epochs': epochs}
    logging.info('[Condensa] PRETRAIN CONFIG [' +
                 ', '.join('{!s}={!r}'.format(k, v)
                           for k, v in _config.items()) + ']')

    use_cuda = torch.cuda.is_available()
    if use_cuda:
        model = model.cuda()
        model = torch.nn.DataParallel(model)
    mb_iterator = iter(trainloader)
    model.train()
    for j in range(0, epochs):
        if logger.isEnabledFor(logging.INFO):
            pbar = tqdm(total=len(trainloader),
                        ascii=True,
                        desc='Epoch {}'.format(j))
        for input, target in trainloader:
            if torch.cuda.is_available():
                input = input.cuda(non_blocking=True)
                target = target.cuda(non_blocking=True)
            input, target = Variable(input), Variable(target)
            optimizer.zero_grad()
            output = model(input)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            if logger.isEnabledFor(logging.INFO):
                pbar.update()
    if logger.isEnabledFor(logging.INFO):
        pbar.close()
    logging.info('')

class EventTimer(object):
    """Simple timer class."""
    def __init__(self):
        """Constructor. Begins timing."""
        self.begin = time.perf_counter()

    def reset(self):
        """Reset timer."""
        self.begin = time.perf_counter()

    @property
    def elapsed_seconds(self):
        """Returns elapsed seconds."""
        return (time.perf_counter() - self.begin)


================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = sphinx-build
SPHINXPROJ    = Condensa
SOURCEDIR     = source
BUILDDIR      = build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

================================================
FILE: docs/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
set SPHINXPROJ=Condensa

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%

:end
popd


================================================
FILE: docs/source/_static/ga_tracker.js
================================================
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-146596996-1');


================================================
FILE: docs/source/conf.py
================================================
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.abspath('../..'))
sys.setrecursionlimit(1500)

import condensa

# -- Project information -----------------------------------------------------

project = u'Condensa'
copyright = u'2019, NVIDIA Corporation'
author = u'Saurav Muralidharan'

# The short X.Y version
version = '0.5'
# The full version, including alpha/beta/rc tags
release = '0.5-beta'


# -- General configuration ---------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.mathjax',
    'sphinx.ext.ifconfig',
    'sphinx.ext.napoleon',
]

napoleon_use_ivar = True

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'

# The master toctree document.
master_doc = 'index'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path .
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'


# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
if os.environ.get('READTHEDOCS') != 'True':
  try:
    import sphinx_rtd_theme
  except ImportError:
    pass  # assume we have sphinx >= 1.3
  else:
    html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
  html_theme = 'sphinx_rtd_theme'

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself.  Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}


# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'Condensadoc'


# -- Options for LaTeX output ------------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',

    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',

    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',

    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, 'Condensa.tex', u'Condensa Documentation',
     u'Saurav Muralidharan', 'manual'),
]


# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
    (master_doc, 'condensa', u'Condensa Documentation',
     [author], 1)
]


# -- Options for Texinfo output ----------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (master_doc, 'Condensa', u'Condensa Documentation',
     author, 'Condensa', 'Programmable Model Compression',
     'Miscellaneous'),
]


# -- Extension configuration -------------------------------------------------

#autoclass_content = 'both'

def setup(app):
    """
    Insert Google Analytics tracker.
    Based on this Stackoverflow suggestion: https://stackoverflow.com/a/41885884
    """
    app.add_javascript("https://www.googletagmanager.com/gtag/js?id=UA-146596996-1")
    app.add_javascript("ga_tracker.js")


================================================
FILE: docs/source/guide/install.rst
================================================
Installation
============

Prerequisites
-------------

Condensa requires:

* A working Linux installation (we use Ubuntu 18.04)
* NVIDIA drivers and CUDA 10+ for GPU support
* Python 3.5 or newer
* PyTorch 1.0 or newer

Installation from Source
------------------------

Retrieve the latest source code from the Condensa repository:

.. code-block:: bash

   git clone https://github.com/NVlabs/condensa.git

Navigate to the source code directory and run the following:

.. code-block:: bash

   pip install -r requirements.txt

To check the installation, run the unit test suite:

.. code-block:: bash

   bash run_all_tests.sh -v


================================================
FILE: docs/source/guide/usage.rst
================================================
Usage
=====

The `notebooks`_ folder contains Jupyter notebooks with step-by-step walkthroughs
for various usage scenarios. In particular, check out the `AlexNet Compression`_ notebook
for a simple getting started guide.

The `examples`_ folder contains additional, more complex examples of using Condensa.

.. _notebooks: https://github.com/NVlabs/condensa/blob/master/notebooks
.. _AlexNet Compression: https://github.com/NVlabs/condensa/blob/master/notebooks/alexnet.ipynb
.. _examples: https://github.com/NVlabs/condensa/blob/master/examples


================================================
FILE: docs/source/index.rst
================================================
.. Condensa documentation master file, created by
   sphinx-quickstart on Tue Sep  4 15:17:30 2018.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

Condensa Documentation
======================

Condensa is a framework for **programmable model compression** in Python.
It comes with a set of built-in compression operators which may be used to
compose complex compression schemes targeting specific combinations of DNN,
hardware platform, and optimization objective.
Common programming abstractions such as conditionals, iteration, and
recursion are all natively supported.
To recover any accuracy lost during compression, Condensa uses a constrained
optimization formulation of model compression and employs an Augmented Lagrangian-based
algorithm as the optimizer.

Condensa is under active development, and bug reports, pull requests, and other feedback are all highly appreciated.


.. toctree::
   :maxdepth: 2
   :caption: Getting Started

   guide/install
   guide/usage

.. toctree::
   :maxdepth: 2
   :caption: Module API Reference

   modules/schemes
   modules/pi
   modules/compressor
   modules/opt
   modules/finetuner
   modules/tensor
   modules/functional
   modules/util

.. toctree::
   :maxdepth: 2
   :caption: Notes

   modules/lc

Indices and Tables
------------------

* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`


================================================
FILE: docs/source/modules/compressor.rst
================================================
Model Compressor
================
.. autoclass:: condensa.Compressor
   :members:

   .. automethod:: __init__


================================================
FILE: docs/source/modules/finetuner.rst
================================================
Model Fine-Tuner
================
.. autoclass:: condensa.finetune.FineTuner
   :members:

   .. automethod:: __init__


================================================
FILE: docs/source/modules/functional.rst
================================================
Aggregation Functions
=====================
.. automodule:: condensa.functional
   :members:


================================================
FILE: docs/source/modules/lc.rst
================================================
L-C Optimizer Usage
===================


================================================
FILE: docs/source/modules/opt.rst
================================================
Optimizers
==========
.. automodule:: condensa.opt

Direct Compression Optimizer
----------------------------
.. autoclass:: condensa.opt.DC
   :members:

   .. automethod:: __init__

L-C Optimizer
-------------
.. autoclass:: condensa.opt.LC
   :members:

   .. automethod:: __init__


================================================
FILE: docs/source/modules/pi.rst
================================================
Compression Operators
=====================
.. automodule:: condensa.pi
   :members:

.. automodule:: condensa.delta
   :members:


================================================
FILE: docs/source/modules/schemes.rst
================================================
Compression Schemes
===================
.. automodule:: condensa.schemes

Composition
-----------
.. autoclass:: condensa.schemes.Compose
   :members:

   .. automethod:: __init__

Unstructured Pruning
--------------------
.. autoclass:: condensa.schemes.Prune
   :members:

   .. automethod:: __init__

Quantization
------------
.. autoclass:: condensa.schemes.Quantize
   :members:

   .. automethod:: __init__

Neuron Pruning
--------------
.. autoclass:: condensa.schemes.NeuronPrune
   :members:

   .. automethod:: __init__

Filter Pruning
--------------
.. autoclass:: condensa.schemes.FilterPrune
   :members:

   .. automethod:: __init__

Structured Pruning
------------------
.. autoclass:: condensa.schemes.StructurePrune
   :members:

   .. automethod:: __init__

Block Pruning
-------------
.. autoclass:: condensa.schemes.BlockPrune
   :members:

   .. automethod:: __init__


================================================
FILE: docs/source/modules/tensor.rst
================================================
Tensor Operators
================
.. automodule:: condensa.tensor
   :members:


================================================
FILE: docs/source/modules/util.rst
================================================
Utilities
=========
.. automodule:: condensa.util
   :members:


================================================
FILE: examples/cifar/compress.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys
import argparse
import logging
import csv

import gzip
import pickle

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.nn.utils
import torchvision.datasets as datasets
import torch.utils.data as data
import torch.backends.cudnn as cudnn
from torchvision import datasets, transforms

import condensa
from condensa import schemes

import util
import models

if __name__ == '__main__':
    model_names = sorted(
        name for name in models.__dict__
        if not name.startswith("__") and callable(models.__dict__[name]))

    valid_schemes = ['PRUNE', 'PQ', 'FILTER']
    parser = argparse.ArgumentParser(description='CIFAR LC Compression Script')
    parser.add_argument('--arch',
                        default='AlexNet',
                        choices=model_names,
                        help='Model architecture: ' + ' | '.join(model_names) +
                        ' (default: alexnet)')
    parser.add_argument('--dataset', default='cifar10', type=str)
    parser.add_argument('--model', help='Pretrained model filename')
    parser.add_argument('--steps', type=int, help='Number of LC iterations')
    parser.add_argument('--scheme',
                        choices=valid_schemes,
                        required=True,
                        help='Compression scheme')
    parser.add_argument('--density',
                        required=True,
                        type=float,
                        help='Density for pruning')
    parser.add_argument('--align',
                        type=int,
                        default=None,
                        help='Alignment for structured pruning')
    parser.add_argument('--l_batch_size',
                        type=int,
                        default=128,
                        help='Batch size for L step')
    parser.add_argument('--val_batch_size',
                        type=int,
                        default=100,
                        help='Validation batch size')
    parser.add_argument('--lr',
                        type=float,
                        default=0.02,
                        help='Initial learning rate')
    parser.add_argument('--lr_end',
                        type=float,
                        default=None,
                        help='Ending learning rate')
    parser.add_argument('--lr_decay',
                        type=float,
                        default=None,
                        help='Learning rate decay')
    parser.add_argument('--lr_schedule',
                        type=int,
                        nargs='+',
                        default=None,
                        help='Decrease learning rate at these epochs.')
    parser.add_argument('--lr_multiplier',
                        type=float,
                        default=None,
                        help='Learning rate multiplier')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.95,
                        help='SGD momentum')
    parser.add_argument('--weight_decay',
                        type=float,
                        default=0,
                        help='SGD momentum')
    parser.add_argument('--mb_iterations_per_l',
                        type=int,
                        default=2000,
                        help='Minibatch iterations per L step')
    parser.add_argument('--mb_iterations_first_l',
                        type=int,
                        default=10000,
                        help='Minibatch iterations for first L step')
    parser.add_argument('--mu_init',
                        type=float,
                        default=0.001,
                        help='Initial value of mu')
    parser.add_argument('--mu_multiplier', type=float, help='mu multiplier')
    parser.add_argument('--mu_cap', type=float, default=10000, help='mu cap')
    parser.add_argument('--out',
                        default='compressed_model.pth',
                        help='Compressed output model filename')
    parser.add_argument('--csv',
                        default=None,
                        help='compression statistics CSV file')
    parser.add_argument('-v',
                        '--verbose',
                        help='verbose logging output',
                        action='store_true')

    args = parser.parse_args()

    logging.basicConfig(
        level=logging.INFO if args.verbose else logging.WARNING,
        format='%(message)s')

    if args.dataset == 'cifar10':
        dataset = datasets.CIFAR10
        num_classes = 10
    elif args.dataset == 'cifar100':
        dataset = datasets.CIFAR100
        num_classes = 100
    else:
        raise RuntimeError('Invalid dataset: must be CIFAR-10 or CIFAR-100')

    # Load model architecture
    if args.arch.endswith('resnet'):
        model = models.__dict__[args.arch](num_classes=num_classes)
    else:
        model = models.__dict__[args.arch](num_classes=num_classes)

    model.load_state_dict(torch.load(args.model))

    if args.scheme == 'PRUNE':
        scheme = schemes.Prune(args.density)
    elif args.scheme == 'PQ':
        scheme = schemes.Compose(
            [schemes.Prune(args.density),
             schemes.Quantize()])
    elif args.scheme == 'FILTER':
        scheme = schemes.FilterPrune(args.density)
    else:
        raise RuntimeError('Unknown scheme: {}'.format(args.scheme))

    print('SCHEME: {}'.format(scheme))

    trainloader,valloader = \
        util.cifar_train_val_loader(dataset,
                                    args.l_batch_size,
                                    args.val_batch_size)
    testloader = util.cifar_test_loader(dataset, args.val_batch_size)

    # Instantiate LC optimizer
    sgd_params = {'momentum': args.momentum, 'weight_decay': args.weight_decay}
    lc = condensa.opt.LC(steps=args.steps,
                         l_optimizer=condensa.opt.lc.SGD,
                         l_optimizer_params=sgd_params,
                         lr=args.lr,
                         lr_end=args.lr_end,
                         lr_decay=args.lr_decay,
                         lr_schedule=args.lr_schedule,
                         lr_multiplier=args.lr_multiplier,
                         mb_iterations_per_l=args.mb_iterations_per_l,
                         mb_iterations_first_l=args.mb_iterations_first_l,
                         mu_init=args.mu_init,
                         mu_multiplier=args.mu_multiplier,
                         mu_cap=args.mu_cap,
                         debugging_flags={'custom_model_statistics':
                                           condensa.util.cnn_statistics})

    criterion = nn.CrossEntropyLoss().cuda()
    # Compress model using Condensa
    compressor = condensa.Compressor(lc, scheme, model, trainloader,
                                     testloader, valloader, criterion)

    w = compressor.run()

    if args.out is not None:
        torch.save(w.state_dict(), args.out)
        logging.info('[Condensa] Compressed model written to disk')

    print('\n==== Profiling Results ====')
    for k, v in compressor.statistics.items():
        print('  ' + k + ':', v)
    print('')

    if args.csv is not None:
        with open(args.csv, 'w') as csv_file:
            writer = csv.writer(csv_file)
            for k, v in compressor.statistics.items():
                row = [k]
                if isinstance(v, list): row += [str(x) for x in v]
                else: row.append(str(v))
                writer.writerow(row)
        csv_file.close()
        logging.info('[Condensa] Compression stats written to disk')


================================================
FILE: examples/cifar/compress_alexnet.sh
================================================
#!/usr/bin/env bash

if [[ $# -eq 0 ]]; then
  echo "Usage: compress_alexnet.sh [scheme] [density] [#iterations]"
  exit 1
fi

SCHEME=${1}
DENSITY=${2}
STEPS=${3}

PREFIX=alexnet_${SCHEME}_${DENSITY//[\.]/_}

python compress.py\
       --arch alexnet --dataset cifar10\
       --lr 0.01 --lr_end 1e-4\
       --weight_decay 0\
       --momentum 0.95\
       --mb_iterations_per_l 3000\
       --mb_iterations_first_l 30000\
       --mu_init 1e-3 --mu_multiplier 1.1\
       --l_batch_size 128\
       --model trained/alexnet.pth\
       --scheme ${SCHEME}\
       --density ${DENSITY}\
       --out compressed/${PREFIX}.pth\
       --csv results/${PREFIX}.csv\
       -v --steps ${STEPS}


================================================
FILE: examples/cifar/finetune.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys
import argparse
import logging
import csv

import gzip
import pickle

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.nn.utils
import torchvision.datasets as datasets
import torch.utils.data as data
import torch.backends.cudnn as cudnn
from torchvision import datasets, transforms

import condensa

import util
import models

if __name__ == '__main__':
    model_names = sorted(
        name for name in models.__dict__
        if not name.startswith("__") and callable(models.__dict__[name]))

    parser = argparse.ArgumentParser(description='CIFAR fine-tuning script')
    parser.add_argument('--arch',
                        default='AlexNet',
                        choices=model_names,
                        help='Model architecture: ' + ' | '.join(model_names) +
                        ' (default: alexnet)')
    parser.add_argument('--dataset', default='cifar10', type=str)
    parser.add_argument('--model', help='Pretrained model filename')
    parser.add_argument('--epochs',
                        type=int,
                        help='Number of fine-tuning epochs')
    parser.add_argument('--batch_size',
                        type=int,
                        default=128,
                        help='Batch size for training')
    parser.add_argument('--val_batch_size',
                        type=int,
                        default=128,
                        help='Validation batch size')
    parser.add_argument('--lr', type=float, default=0.1, help='Learning rate')
    parser.add_argument('--lr_end',
                        type=float,
                        default=0.01,
                        help='Ending learning rate')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.9,
                        help='SGD momentum')
    parser.add_argument('--weight_decay',
                        type=float,
                        default=0,
                        help='SGD weight decay')
    parser.add_argument('--out',
                        default='finetuned.pth',
                        help='Fine-tuned output model filename')
    parser.add_argument('-v',
                        '--verbose',
                        help='verbose logging output',
                        action='store_true')

    args = parser.parse_args()

    logging.basicConfig(
        level=logging.INFO if args.verbose else logging.WARNING,
        format='%(message)s')

    if args.dataset == 'cifar10':
        dataset = datasets.CIFAR10
        num_classes = 10
    elif args.dataset == 'cifar100':
        dataset = datasets.CIFAR100
        num_classes = 100
    else:
        raise RuntimeError('Invalid dataset: must be cifar10 or cifar100')

    # Load model architecture
    if args.arch.endswith('resnet'):
        model = models.__dict__[args.arch](num_classes=num_classes)
    else:
        model = models.__dict__[args.arch](num_classes=num_classes)

    model.load_state_dict(torch.load(args.model))
    # Compute #nonzeros prior to fine-tuning
    nparams_w = torch.nn.utils.parameters_to_vector(
        model.parameters()).view(-1).nonzero().numel()

    # Only fine-tune fully-connected and convolutional layers
    layer_types = [torch.nn.Linear, torch.nn.Conv2d]

    trainloader,valloader = \
        util.cifar_train_val_loader(dataset,
                                    args.batch_size,
                                    args.val_batch_size)
    testloader = util.cifar_test_loader(dataset, args.val_batch_size)
    criterion = torch.nn.CrossEntropyLoss().cuda()
    ft = condensa.FineTuner(model, layer_types)
    w_ft = ft.run(epochs=args.epochs,
                  lr=args.lr,
                  lr_end=args.lr_end,
                  momentum=args.momentum,
                  weight_decay=args.weight_decay,
                  criterion=criterion,
                  trainloader=trainloader,
                  testloader=testloader,
                  valloader=valloader,
                  debugging_flags={'custom_model_statistics':
                                    condensa.util.cnn_statistics})
    nparams_wft = torch.nn.utils.parameters_to_vector(
        w_ft.parameters()).view(-1).nonzero().numel()
    print('#Nonzero parameters: before [{}], after [{}]'.format(
        nparams_w, nparams_wft))

    if args.out is not None:
        torch.save(w_ft.state_dict(), args.out)
        logging.info('[Condensa] Fine-tuned model written to disk')


================================================
FILE: examples/cifar/models/__init__.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import

from .resnet import *
from .vgg import *
from .alexnet import *


================================================
FILE: examples/cifar/models/alexnet.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import

import torch.nn as nn

__all__ = ['alexnet']

class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

def alexnet(**kwargs):
    model = AlexNet(**kwargs)
    return model


================================================
FILE: examples/cifar/models/resnet.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import

import torch.nn as nn
import math

__all__ = ['resnet20', 'resnet56', 'resnet110']

def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes,
                     out_planes,
                     kernel_size=3,
                     stride=stride,
                     padding=1,
                     bias=False)

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes,
                               planes,
                               kernel_size=3,
                               stride=stride,
                               padding=1,
                               bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class ResNet(nn.Module):
    def __init__(self, block, depth, num_classes=10):
        super(ResNet, self).__init__()
        assert (depth - 2) % 6 == 0, 'depth should be 6n+2'
        n = (depth - 2) // 6

        self.inplanes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 16, n)
        self.layer2 = self._make_layer(block, 32, n, stride=2)
        self.layer3 = self._make_layer(block, 64, n, stride=2)
        self.avgpool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes,
                          planes * block.expansion,
                          kernel_size=1,
                          stride=stride,
                          bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )
            downsample[0].condensa_nocompress = True

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

def resnet20(**kwargs):
    return ResNet(BasicBlock, 20, **kwargs)

def resnet56(**kwargs):
    return ResNet(Bottleneck, 56, **kwargs)

def resnet110(**kwargs):
    return ResNet(Bottleneck, 110, **kwargs)


================================================
FILE: examples/cifar/models/vgg.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import

import torch.nn as nn
import math

__all__ = [
    'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn',
    'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19'
]

class VGG(nn.Module):
    def __init__(self, features, num_classes=10):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Linear(512, num_classes)
        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                n = m.weight.size(1)
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

cfg = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256,
          'M', 512, 512, 'M', 512, 512, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256,
          'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256,
          'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

def vgg11(**kwargs):
    model = VGG(make_layers(cfg['A']), **kwargs)
    return model

def vgg11_bn(**kwargs):
    model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)
    return model

def vgg13(**kwargs):
    model = VGG(make_layers(cfg['B']), **kwargs)
    return model

def vgg13_bn(**kwargs):
    model = VGG(make_layers(cfg['B'], batch_norm=True), **kwargs)
    return model

def vgg16(**kwargs):
    model = VGG(make_layers(cfg['D']), **kwargs)
    return model

def vgg16_bn(**kwargs):
    model = VGG(make_layers(cfg['D'], batch_norm=True), **kwargs)
    return model

def vgg19(**kwargs):
    model = VGG(make_layers(cfg['E']), **kwargs)
    return model

def vgg19_bn(**kwargs):
    model = VGG(make_layers(cfg['E'], batch_norm=True), **kwargs)
    return model


================================================
FILE: examples/cifar/util.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import numpy as np

import torch
import torch.nn as nn
import torch.utils.data as data

import torchvision
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler

import condensa.data

def cifar_train_val_loader(dataset,
                           train_batch_size,
                           val_batch_size,
                           root='./data',
                           random_seed=42,
                           shuffle=True):
    """
    Splits the CIFAR training set into training and validation
    sets (9:1 split) and returns the corresponding data loaders.
    """
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
    ])
    trainset = dataset(root=root,
                       train=True,
                       download=True,
                       transform=transform_train)
    valset = dataset(root=root, train=True, download=True, transform=None)
    num_train = len(trainset)
    indices = list(range(num_train))
    split = 5000

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, val_idx = indices[split:], indices[:split]
    trainsampler = SubsetRandomSampler(train_idx)
    valsampler = SubsetRandomSampler(val_idx)

    meanstd = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    trainloader = condensa.data.GPUDataLoader(trainset,
                                              batch_size=train_batch_size,
                                              shuffle=False,
                                              num_workers=8,
                                              sampler=trainsampler,
                                              meanstd=meanstd)
    valloader =   condensa.data.GPUDataLoader(valset,
                                              batch_size=val_batch_size,
                                              shuffle=False,
                                              num_workers=8,
                                              sampler=valsampler,
                                              meanstd=meanstd)

    return (trainloader, valloader)

def cifar_test_loader(dataset, batch_size, root='./data'):
    """
    Construct a CIFAR test dataset loader.
    """
    testset = dataset(root=root, train=False, download=True, transform=None)
    meanstd = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    testloader = condensa.data.GPUDataLoader(testset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=8,
                                             meanstd=meanstd)
    return testloader


================================================
FILE: notebooks/AlexNet.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tutorial: Compressing AlexNet with Condensa"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this tutorial, we will walk through compressing the [AlexNet neural network](https://en.wikipedia.org/wiki/AlexNet) on the CIFAR-10 dataset using Condensa. We will target two different objectives: reducing total model memory footprint, and reducing the inference latency of the compressed model. \n",
    "\n",
    "We assume that Condensa is already installed and working (check out the [Installation Guide](https://nvlabs.github.io/condensa/guide/install.html) for instructions). If you'd like to follow along by executing the code in this notebook, please also make sure that [Jupyter](https://jupyter.org/) is installed on your local system."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Defining the Network"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's start by defining the AlexNet network architecture in PyTorch as shown below:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "\n",
    "class AlexNet(nn.Module):\n",
    "    def __init__(self, num_classes=10):\n",
    "        super(AlexNet, self).__init__()\n",
    "        self.features = nn.Sequential(\n",
    "            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=5),\n",
    "            nn.ReLU(inplace=True),\n",
    "            nn.MaxPool2d(kernel_size=2, stride=2),\n",
    "            nn.Conv2d(64, 192, kernel_size=5, padding=2),\n",
    "            nn.ReLU(inplace=True),\n",
    "            nn.MaxPool2d(kernel_size=2, stride=2),\n",
    "            nn.Conv2d(192, 384, kernel_size=3, padding=1),\n",
    "            nn.ReLU(inplace=True),\n",
    "            nn.Conv2d(384, 256, kernel_size=3, padding=1),\n",
    "            nn.ReLU(inplace=True),\n",
    "            nn.Conv2d(256, 256, kernel_size=3, padding=1),\n",
    "            nn.ReLU(inplace=True),\n",
    "            nn.MaxPool2d(kernel_size=2, stride=2),\n",
    "        )\n",
    "        self.classifier = nn.Linear(256, num_classes)\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.features(x)\n",
    "        x = x.view(x.size(0), -1)\n",
    "        x = self.classifier(x)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We instantiate this class into `model`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = AlexNet()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Pre-Trained Weights"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now that we have defined the network architecture, let us load a pre-trained set of weights into the model from the `AlexNet.pth` file included with this notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.load_state_dict(torch.load('AlexNet.pth'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Preparing for Compression"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's make sure CUDA is enabled in PyTorch."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "assert torch.cuda.is_available()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We now create PyTorch data loaders for the training, test, and validation datasets. To save space, we wrap the data loading code into two utility functions: `cifar_train_val_loader` and `cifar_test_loader` (please refer to `util.py` in the current `notebooks` folder for the full code)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import util\n",
    "import torchvision.datasets as datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = datasets.CIFAR10\n",
    "\n",
    "trainloader,valloader = util.cifar_train_val_loader(dataset, train_batch_size=128, val_batch_size=128)\n",
    "testloader = util.cifar_test_loader(dataset, batch_size=128)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The utilities above split the original training set into training and validation sets (using a 9:1 split) and perform data normalization for all datasets. They also utilize Condensa's `GPUDataLoader` to enable fast data prefetching and collation."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We now define our loss criterion:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "criterion = nn.CrossEntropyLoss().cuda()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally, we set our logging level to `INFO` so that Condensa prints out intermediate updates."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "logging.basicConfig(level=logging.INFO, format='%(message)s')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Two Different Compression Strategies"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this tutorial, we will explore two different ways of compressing the AlexNet network: one targeted at reducing the total model memory footprint (named `MEM`) and the other at reducing inference runtime latency (named `FLOP`)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### MEM Scheme"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `MEM` scheme aims to reduce the total model memory footprint (number of bytes required to store the non-zero elements of the compressed model). To this end, we perform a combination of _pruning_ (clipping model parameters to zero) and _quantization_ (using 16-bit floating point representation to store model weights instead of 32-bit). Expressing this scheme in Condensa is fairly straightforward using the built-in [`Compose`](https://nvlabs.github.io/condensa/modules/schemes.html#composition) scheme as shown below:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import condensa\n",
    "from condensa.schemes import Compose, Prune, Quantize\n",
    "\n",
    "MEM = Compose([Prune(0.02), Quantize(condensa.float16)])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here, the [`Compose`](https://nvlabs.github.io/condensa/modules/schemes.html#composition) operator successively applies pruning followed by quantization to the model. The pruning density, or the ratio of non-zero parameters in the compressed model to the original one, is specified as 0.02 (2%). Condensa includes a number of other common schemes, including structured and block pruning, among others. For a list of available schemes, please refer to [this page](https://nvlabs.github.io/condensa/modules/schemes.html) in the API documentation. Users may also define their own custom schemes as Python functions that invoke the compression and decompression operators available in Condensa (see [`schemes.py`](https://github.com/NVlabs/condensa/blob/master/condensa/schemes.py) for examples of how to define custom schemes)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### FLOP Scheme"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "While the `MEM` scheme is effective at reducing the number of non-zero elements in a model, this may not directly translate into improvements in actual inference runtime. Most modern CPUs and GPUs are unable to detect individual zero elements and bypass computations on them in hardware. Instead, to realize speedups on such architectures, we perform filter pruning, which removes entire filters (3D blocks) at once from convolutional layers. This enables the weight tensors to be physically reshaped in the compressed model. We call this the `FLOP` scheme in this tutorial, and use the [`FilterPrune`](https://nvlabs.github.io/condensa/modules/schemes.html#filter-pruning) scheme in Condensa to define it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from condensa.schemes import FilterPrune\n",
    "FLOP = condensa.schemes.FilterPrune(0.5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setting up the Optimizer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To recover any accuracy lost due to compression, Condensa comes with a set of _optimizers_. Each optimizer takes a pre-trained model, applies the compression scheme, and tries to recover the original accuracy either directly or iteratively. In this tutorial, we'll be using Condensa's [L-C optimizer](https://nvlabs.github.io/condensa/modules/opt.html#l-c-optimizer). We instantiate it as follows:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lc = condensa.opt.LC(steps=35,                             # L-C iterations\n",
    "                     l_optimizer=condensa.opt.lc.SGD,      # L-step sub-optimizer\n",
    "                     l_optimizer_params={'momentum':0.95}, # L-step sub-optimizer parameters\n",
    "                     lr=0.01,                              # Initial learning rate\n",
    "                     lr_end=1e-4,                          # Final learning rate\n",
    "                     mb_iterations_per_l=3000,             # Mini-batch iterations per L-step\n",
    "                     mb_iterations_first_l=30000,          # Mini-batch iterations for first L-step\n",
    "                     mu_init=1e-3,                         # Initial value of `mu`\n",
    "                     mu_multiplier=1.1,                    # Multiplier for `mu`\n",
    "                     mu_cap=10000,                         # Maximum value of `mu`\n",
    "                     debugging_flags={'custom_model_statistics':\n",
    "                                      condensa.util.cnn_statistics})\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Each optimizer in Condensa has its own set of hyper-parameters which must be specified manually by the user. A full description of hyper-parameter tuning is beyond the scope of this tutorial, but for additional information on what each hyper-parameter represents and tips on finding its optimal value, we refer you to the [Condensa paper](https://arxiv.org/abs/1911.02497). In this notebook, we run the L-C algorithm for 35 iterations using the hyper-parameter values shown above. L-C hyper-parameter values for a number of common convolutional neural networks are also included in the [`examples`](https://github.com/NVlabs/condensa/blob/master/examples/) folder."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compressing the Model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Once the optimizer is instantiated, we can go ahead and perform the actual compression using the [`Compressor`](https://nvlabs.github.io/condensa/modules/compressor.html#model-compressor) class and its [`run`](https://nvlabs.github.io/condensa/modules/compressor.html#condensa.compressor.Compressor.run) method. **Note:** the next two lines may take a while to execute!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "compressor_MEM  = condensa.Compressor(lc,\n",
    "                                      MEM,\n",
    "                                      model,\n",
    "                                      trainloader,\n",
    "                                      testloader,\n",
    "                                      valloader,\n",
    "                                      criterion)\n",
    "w_MEM  = compressor_MEM.run()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "compressor_FLOP = condensa.Compressor(lc,\n",
    "                                      FLOP,\n",
    "                                      model,\n",
    "                                      trainloader,\n",
    "                                      testloader,\n",
    "                                      valloader,\n",
    "                                      criterion)\n",
    "\n",
    "w_FLOP = compressor_FLOP.run()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We specify the optimizer, scheme, input model, training, test, and validation sets, and the loss criterion to create an instance of the [`Compressor`](https://nvlabs.github.io/condensa/modules/compressor.html#model-compressor) class. Since the optimizer is specified as a parameter, we are able to easily experiment with alternative optimizers in Condensa.\n",
    "\n",
    "In the above snippets, `w_MEM` and `w_FLOP` contain the models compressed using the `MEM` and `FLOP` schemes, respectively. We can now save these to disk:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.save(w_MEM.state_dict(), 'AlexNet_MEM.pth')\n",
    "torch.save(w_FLOP.state_dict(), 'AlexNet_FLOP.pth')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Condensa also records various statistics about the compression process. These can be retrieved using the `statistics` member of the compressor object as follows:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for k,v in compressor_MEM.statistics.items():\n",
    "    print('{}: {}'.format(k, v))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for k,v in compressor_FLOP.statistics.items():\n",
    "    print('{}: {}'.format(k, v))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We notice that Condensa achieves top-1 test accuracies of **77.49%** and **76.81%** for the `MEM` and `FLOP` schemes, respectively (compared to the baseline accuracy of **77.07%** for AlexNet). For more complex models, it is possible to further improve accuracies via [model fine-tuning](https://nvlabs.github.io/condensa/modules/finetuner.html)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Memory and Runtime Reductions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using the `MEM` scheme, we reduce the model memory footprint by **97.83x**. Additionally, we achieve a **55.6%** reduction in FLOPs using the `FLOP` scheme."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## More Info"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We provide additional real-world compression examples targeting complex networks such as ResNet50 and VGG-19 in Condensa's [examples folder](https://github.com/NVlabs/condensa/tree/master/examples). Be sure to check them out!\n",
    "\n",
    "For more details on the design and implementation of Condensa, and its performance on real-world networks, please refer to the [Condensa paper](https://arxiv.org/abs/1911.02497)."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}


================================================
FILE: notebooks/util.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import numpy as np

import torch
import torch.nn as nn
import torch.utils.data as data

import torchvision
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler

import condensa.data

def cifar_train_val_loader(dataset,
                           train_batch_size,
                           val_batch_size,
                           root='./data',
                           random_seed=42,
                           shuffle=True):
    """
    Splits the CIFAR training set into training and validation
    sets (9:1 split) and returns the corresponding data loaders.
    """
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
    ])
    trainset = dataset(root=root,
                       train=True,
                       download=True,
                       transform=transform_train)
    valset = dataset(root=root, train=True, download=True, transform=None)
    num_train = len(trainset)
    indices = list(range(num_train))
    split = 5000

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, val_idx = indices[split:], indices[:split]
    trainsampler = SubsetRandomSampler(train_idx)
    valsampler = SubsetRandomSampler(val_idx)

    meanstd = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    trainloader = condensa.data.GPUDataLoader(trainset,
                                              batch_size=train_batch_size,
                                              shuffle=False,
                                              num_workers=8,
                                              sampler=trainsampler,
                                              meanstd=meanstd)
    valloader =   condensa.data.GPUDataLoader(valset,
                                              batch_size=val_batch_size,
                                              shuffle=False,
                                              num_workers=8,
                                              sampler=valsampler,
                                              meanstd=meanstd)

    return (trainloader, valloader)

def cifar_test_loader(dataset, batch_size, root='./data'):
    """
    Construct a CIFAR test dataset loader.
    """
    testset = dataset(root=root, train=False, download=True, transform=None)
    meanstd = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    testloader = condensa.data.GPUDataLoader(testset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=8,
                                             meanstd=meanstd)
    return testloader


================================================
FILE: run_all_tests.sh
================================================
#!/bin/bash

VERBOSE=0
if [[ $1 == "-v" ]] || [[ $1 == "--verbose" ]]; then
  VERBOSE=1
fi

for f in $(find test -name '*.py'); do
  if [[ $VERBOSE -eq 1 ]]; then
    echo "[Condensa Test] $f"
  fi
  python3 $f
done


================================================
FILE: setup.cfg
================================================
[metadata]
description-file = README.md


================================================
FILE: setup.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from setuptools import setup
from setuptools import find_packages

cwd = os.path.dirname(os.path.abspath(__file__))
version = '0.5.0-beta'

def build_deps():
  version_path = os.path.join(cwd, 'condensa', 'version.py')
  with open(version_path, 'w') as f:
    f.write("__version__ = '{}'\n".format(version))

build_deps()

with open(os.path.join(cwd, 'README.md'), encoding='utf-8') as f:
    long_description = f.read()

install_requires = ['numpy',
                    'torch>=1.0.0',
                    'tqdm']

setup(name='condensa',
      version=version,
      description='Condensa Programmable Model Compression Framework',
      long_description=long_description,
      long_description_content_type='text/markdown',
      url='https://github.com/NVLabs/condensa',
      author='Saurav Muralidharan',
      author_email='sauravm@nvidia.com',
      license='Apache License 2.0',
      keywords=['compression', 'quantization', 'pruning'],
      install_requires=install_requires,
      packages=find_packages(),
      classifiers=[
        'Development Status :: 4 - Beta',
        'Intended Audience :: Developers',
        'Intended Audience :: Science/Research',
        'Topic :: Software Development :: Build Tools',
        'License :: OSI Approved :: Apache Software License',
        'Programming Language :: Python :: 3',
        'Topic :: Software Development :: Libraries',
        'Topic :: Scientific/Engineering :: Artificial Intelligence'
      ],
      )


================================================
FILE: test/schemes/test_prune.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch

import condensa
import condensa.schemes as schemes
import condensa.tensor as T
import condensa.functional as F

def test_prune(device):
    fc = torch.nn.Linear(100, 10, bias=True).to(device)
    scheme = schemes.Prune(0.5)
    threshold = scheme.threshold(fc)
    scheme.pi(fc)

    t = fc.weight.data.abs().view(-1)
    nzs = torch.index_select(t, 0, t.nonzero().view(-1))
    assert (nzs >= threshold).all()

def test_filter_prune(device):
    conv = torch.nn.Conv2d(3,
                           64,
                           kernel_size=11,
                           stride=4,
                           padding=5,
                           bias=True).to(device)

    criteria = F.l2norm
    scheme = schemes.FilterPrune(0.5, criteria=criteria, prune_bias=True)
    threshold = scheme.threshold(conv)
    scheme.pi(conv)

    # Check against threshold
    agg = T.aggregate_filters(conv.weight.data, criteria).view(-1)
    nzs = torch.index_select(agg, 0, agg.nonzero().view(-1))
    assert (nzs >= threshold).all()

    # Check biases: all zero filters must have corresponding zero biases
    zero_indices = (agg == 0).nonzero().view(-1)
    z = torch.index_select(conv.bias.data, 0, zero_indices)
    assert (z == 0.).all()

def test_neuron_prune(device):
    fc = torch.nn.Linear(100, 10, bias=True).to(device)

    criteria = F.l2norm
    scheme = schemes.NeuronPrune(0.5, criteria=criteria, prune_bias=True)
    threshold = scheme.threshold(fc)
    scheme.pi(fc)

    # Check against threshold
    agg = T.aggregate_neurons(fc.weight.data, criteria).view(-1)
    nzs = torch.index_select(agg, 0, agg.nonzero().view(-1))
    assert (nzs >= threshold).all()

    # Check biases: all zero neurons must have corresponding zero biases
    zero_indices = (agg == 0).nonzero().view(-1)
    z = torch.index_select(fc.bias.data, 0, zero_indices)
    assert (z == 0.).all()

def test_block_prune(device, blocksize=(10,10)):
    fc = torch.nn.Linear(100, 100, bias=False).to(device)

    criteria = F.l2norm
    scheme = schemes.BlockPrune(0.5, criteria=criteria, block_size=blocksize)
    threshold = scheme.threshold(fc)
    scheme.pi(fc)

    # Check against threshold
    agg = T.aggregate(fc.weight.data, blocksize, criteria).view(-1)
    nzs = torch.index_select(agg, 0, agg.nonzero().view(-1))
    assert (nzs >= threshold).all()

if __name__ == '__main__':
    test_prune('cpu')
    test_filter_prune('cpu')
    test_neuron_prune('cpu')
    test_block_prune('cpu')

    if torch.cuda.is_available():
        test_prune('cuda:0')
        test_filter_prune('cuda:0')
        test_neuron_prune('cuda:0')
        test_block_prune('cuda:0')


================================================
FILE: test/schemes/test_qz.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch

import condensa
from condensa import schemes

def test_float16(device):
    scheme = schemes.Quantize(condensa.float16)
    fc = torch.nn.Linear(100, 10).float().to(device)

    scheme.pi(fc)
    assert fc.weight.dtype == torch.float16
    scheme.delta(fc)
    assert fc.weight.dtype == torch.float32

if __name__ == '__main__':
    test_float16('cpu')
    if torch.cuda.is_available():
        test_float16('cpu')


================================================
FILE: test/tensor/test_mask_apply.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import torch

import condensa.tensor as T

def test_apply_mask(device):
    a = torch.randn(20).to(device)
    threshold = T.threshold(a, 0.3)
    mask = T.simple_mask(a, threshold)
    T.apply_mask_inplace(a, mask)

    for i in range(len(a)):
        assert a[i] == 0. or abs(a[i]) >= threshold

if __name__ == '__main__':
    test_apply_mask('cpu')
    if torch.cuda.is_available():
        test_apply_mask('cuda:0')


================================================
FILE: test/tensor/test_maskgen.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import torch

import condensa.tensor as T

def test_simple_mask(device):
    a = torch.randn(20).to(device)
    threshold = T.threshold(a, 0.3)
    mask = T.simple_mask(a, threshold)

    for i in range(len(a)):
        if abs(a[i]) >= threshold: assert mask[i] == 1
        else: assert mask[i] == 0

def test_block_mask(device):
    pass

if __name__ == '__main__':
    test_simple_mask('cpu')
    test_block_mask('cpu')
    if torch.cuda.is_available():
        test_simple_mask('cuda:0')
        test_block_mask('cuda:0')


================================================
FILE: test/tensor/test_util.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import torch

import condensa.tensor as T

def test_density(device):
    zeros = torch.zeros(10).to(device)
    ones = torch.ones(30).to(device)
    assert T.density(zeros) == 0.
    assert T.density(ones) == 1.
    assert T.density(torch.cat((zeros, ones))) == 0.75

def test_sparsity(device):
    zeros = torch.zeros(10).to(device)
    ones = torch.ones(30).to(device)
    assert T.sparsity(zeros) == 1.
    assert T.sparsity(ones) == 0.
    assert T.sparsity(torch.cat((zeros, ones))) == 0.25

def test_threshold(device):
    a = torch.IntTensor(np.arange(0, 30)).to(device)
    threshold2 = T.threshold(a, 0.2)
    threshold3 = T.threshold(a, 0.3)
    threshold5 = T.threshold(a, 0.5)

    assert threshold2.item() == 24
    assert threshold3.item() == 21
    assert threshold5.item() == 15

if __name__ == '__main__':
    test_density('cpu')
    test_sparsity('cpu')
    test_threshold('cpu')

    if torch.cuda.is_available():
        test_density('cpu')
        test_sparsity('cpu')
        test_threshold('cpu')


================================================
FILE: test/test_lr.py
================================================
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

import condensa
import condensa.lr as lr

def test_interval_lr():
    schedule = lr.IntervalLR(1., 1e-6, 100)
    assert schedule.learning_rate == 1.
    for i in range(0, 100):
        schedule.step()
    assert np.isclose(schedule.learning_rate, 1e-6)

def test_decayed_lr():
    schedule = lr.DecayedLR(100.0, [10, 20], gamma=0.1)
    for i in range(0, 30):
        schedule.step()
        if i == 10: assert schedule.learning_rate == 10.0
        elif i == 20: assert schedule.learning_rate == 1.0

def test_exp_decayed_lr():
    schedule = lr.ExpDecayedLR(1.0, 0.1)
    for i in range(0, 100):
        schedule.step()
    assert schedule.learning_rate == 1.0 * (0.1**100)

if __name__ == '__main__':
    test_interval_lr()
    test_decayed_lr()
    test_exp_decayed_lr()