Repository: NVlabs/condensa Branch: master Commit: e81e4f2e9738 Files: 61 Total size: 173.7 KB Directory structure: gitextract_kv0mug3a/ ├── .gitignore ├── .style.yapf ├── LICENSE ├── README.md ├── condensa/ │ ├── __init__.py │ ├── cfg.py │ ├── compressor.py │ ├── data.py │ ├── delta.py │ ├── dtypes.py │ ├── finetune.py │ ├── functional.py │ ├── lr.py │ ├── opt/ │ │ ├── __init__.py │ │ ├── direct/ │ │ │ ├── __init__.py │ │ │ └── dc.py │ │ └── lc/ │ │ ├── __init__.py │ │ ├── adam.py │ │ ├── lc.py │ │ └── sgd.py │ ├── pi.py │ ├── schemes.py │ ├── tensor.py │ ├── type_enums.py │ └── util.py ├── docs/ │ ├── Makefile │ ├── make.bat │ └── source/ │ ├── _static/ │ │ └── ga_tracker.js │ ├── conf.py │ ├── guide/ │ │ ├── install.rst │ │ └── usage.rst │ ├── index.rst │ └── modules/ │ ├── compressor.rst │ ├── finetuner.rst │ ├── functional.rst │ ├── lc.rst │ ├── opt.rst │ ├── pi.rst │ ├── schemes.rst │ ├── tensor.rst │ └── util.rst ├── examples/ │ └── cifar/ │ ├── compress.py │ ├── compress_alexnet.sh │ ├── finetune.py │ ├── models/ │ │ ├── __init__.py │ │ ├── alexnet.py │ │ ├── resnet.py │ │ └── vgg.py │ └── util.py ├── notebooks/ │ ├── AlexNet.ipynb │ ├── AlexNet.pth │ └── util.py ├── run_all_tests.sh ├── setup.cfg ├── setup.py └── test/ ├── schemes/ │ ├── test_prune.py │ └── test_qz.py ├── tensor/ │ ├── test_mask_apply.py │ ├── test_maskgen.py │ └── test_util.py └── test_lr.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ # Distribution / packaging .Python *.egg-info/ *.egg .eggs/ dist/ sdist/ # Jupyter Notebook .ipynb_checkpoints # Condensa-specific compressed/ trained/ results/ data/ build/ # version.py is auto-generated by setup.py version.py ================================================ FILE: .style.yapf ================================================ [style] based_on_style = pep8 blank_lines_around_top_level_definition = 1 column_limit = 79 indent_width = 4 ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2019 NVIDIA Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # A Programming System for Neural Network Compression **Note:** the original version of Condensa (contained in this branch) is no longer actively maintained. Please check out the [lite branch](https://github.com/NVlabs/condensa/tree/lite) for the most up-to-date version. Condensa is a framework for _programmable model compression_ in Python. It comes with a set of built-in compression operators which may be used to compose complex compression schemes targeting specific combinations of DNN architecture, hardware platform, and optimization objective. To recover any accuracy lost during compression, Condensa uses a constrained optimization formulation of model compression and employs an Augmented Lagrangian-based algorithm as the optimizer. **Status**: Condensa is under active development, and bug reports, pull requests, and other feedback are all highly appreciated. See the contributions section below for more details on how to contribute. ## Supported Operators and Schemes Condensa provides the following set of pre-built compression schemes: * [Unstructured Pruning](https://nvlabs.github.io/condensa/modules/schemes.html#unstructured-pruning) * [Filter and Neuron Pruning](https://nvlabs.github.io/condensa/modules/schemes.html#neuron-pruning) * [Block Pruning](https://nvlabs.github.io/condensa/modules/schemes.html#block-pruning) * [Quantization](https://nvlabs.github.io/condensa/modules/schemes.html#quantization) * [Scheme Composition](https://nvlabs.github.io/condensa/modules/schemes.html#composition) The schemes above are built using one or more [compression operators](https://nvlabs.github.io/condensa/modules/pi.html), which may be combined in various ways to define your own custom schemes. Please refer to the [documentation](https://nvlabs.github.io/condensa/index.html) for a detailed description of available operators and schemes. ## Prerequisites Condensa requires: * A working Linux installation (we use Ubuntu 18.04) * NVIDIA drivers and CUDA 10+ for GPU support * Python 3.5 or newer * PyTorch 1.0 or newer ## Installation The most straightforward way of installing Condensa is via `pip`: ```bash pip install condensa ``` ### Installation from Source Retrieve the latest source code from the Condensa repository: ```bash git clone https://github.com/NVlabs/condensa.git ``` Navigate to the source code directory and run the following: ```bash pip install -e . ``` ### Test out the Installation To check the installation, run the unit test suite: ```bash bash run_all_tests.sh -v ``` ## Getting Started The [AlexNet Notebook](https://github.com/NVlabs/condensa/blob/master/notebooks/AlexNet.ipynb) contains a simple step-by-step walkthrough of compressing a pre-trained model using Condensa. Check out the [`examples` folder](https://github.com/NVlabs/condensa/tree/master/examples/cifar) for additional, more complex examples of using Condensa (**note**: some examples require the `torchvision` package to be installed). ## Documentation Documentation is available [here](https://nvlabs.github.io/condensa/). Please also check out the [Condensa paper](https://arxiv.org/abs/1911.02497) for a detailed description of Condensa's motivation, features, and performance results. ## Contributing We appreciate all contributions, including bug fixes, new features and documentation, and additional tutorials. You can initiate contributions via Github pull requests. When making code contributions, please follow the `PEP 8` Python coding standard and provide unit tests for the new features. Finally, make sure to sign off your commits using the `-s` flag or adding `Signed-off-By: Name` in the commit message. ## Citing Condensa If you use Condensa for research, please consider citing the following paper: ``` @article{condensa2020, title={A Programmable Approach to Neural Network Compression}, author={V. {Joseph} and G. L. {Gopalakrishnan} and S. {Muralidharan} and M. {Garland} and A. {Garg}}, journal={IEEE Micro}, year={2020}, volume={40}, number={5}, pages={17-25}, doi={10.1109/MM.2020.3012391} } ``` ## Disclaimer Condensa is a research prototype and not an official NVIDIA product. Many features are still experimental and yet to be properly documented. ================================================ FILE: condensa/__init__.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. name = "condensa" from .version import __version__ from . import opt from .dtypes import * from .compressor import * from .finetune import * from .pi import * from .delta import * from .util import * from . import schemes from . import data ================================================ FILE: condensa/cfg.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. __CONDENSA_RECORD_MODE__ = False __CONDENSA_PI_PRECHECK__ = True ================================================ FILE: condensa/compressor.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch.nn class Compressor(object): """Condensa model compressor class.""" def __init__(self, opt, scheme, model, trainloader, testloader, valloader, criterion): """ Creates a `Compressor` instance. :param opt: Optimizer. :type opt: `condensa.Optimizer` :param scheme: Compression scheme (class). :param model: PyTorch model. :type model: `torch.nn.Module` :param trainloader: Training dataloader. :param testloader: Test dataloader. :param valloader: Validation dataloader. :param criterion: Loss criterion. """ assert isinstance(model, torch.nn.Module) self.opt = opt self.pi = scheme.pi self.delta = scheme.delta self.model = model self.trainloader = trainloader self.testloader = testloader self.valloader = valloader self.criterion = criterion self._statistics = None @property def statistics(self): """ Retrieves compressed model statistics. :return: Model statistics. :rtype: `dict` """ return self._statistics def run(self): """ Executes model compressor. :return: Compressed model. :rtype: `torch.nn.Module` """ w, statistics = self.opt.compress(self.model, self.pi, self.delta, self.trainloader, self.testloader, self.valloader, self.criterion) self._statistics = statistics return w ================================================ FILE: condensa/data.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys import numpy as np import torch import torch.utils.data as data import PIL def fast_collate(batch): """Fast batch collation. Based on version from NVIDIA Apex: https://github.com/NVIDIA/apex.""" imgs = [img[0] for img in batch] targets = torch.tensor([target[1] for target in batch], dtype=torch.int64) w = imgs[0].size[0] h = imgs[0].size[1] tensor = torch.zeros((len(imgs), 3, h, w), dtype=torch.uint8) for i, img in enumerate(imgs): nump_array = np.asarray(img, dtype=np.uint8) if (nump_array.ndim < 3): nump_array = np.expand_dims(nump_array, axis=-1) nump_array = np.rollaxis(nump_array, 2) tensor[i] += torch.from_numpy(nump_array) return tensor, targets class GPUDataLoader(object): """Custom data loader with support for prefetching and fast collation. Based on version from NVIDIA Apex: https://github.com/NVIDIA/apex.""" def __init__(self, dataset, batch_size, shuffle, num_workers, sampler=None, meanstd=None): if isinstance(dataset[0][0], PIL.Image.Image): nc = len(dataset[0][0].getbands()) else: raise RuntimeError( '[Condensa] GPUDataLoader only supports PIL image datasets') if not torch.cuda.is_available(): raise RuntimeError( '[Condensa] GPUDataLoader requires PyTorch CUDA support') if nc == 3: loader = data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, pin_memory=True, sampler=sampler, collate_fn=fast_collate, num_workers=num_workers) else: raise NotImplementedError( '[Condensa] GPUDataLoader currently only supports 3-channel images' ) self.base_loader = loader self.loader = iter(loader) self.stream = torch.cuda.Stream() if meanstd is not None: mean, std = meanstd self.mean = torch.tensor([x * 255 for x in mean]).cuda().view(1, nc, 1, 1) self.std = torch.tensor([x * 255 for x in std]).cuda().view(1, nc, 1, 1) self.preload() def __len__(self): return len(self.base_loader) def __iter__(self): self.loader = iter(self.base_loader) self.preload() return self def __next__(self): torch.cuda.current_stream().wait_stream(self.stream) input = self.next_input target = self.next_target if input is None and target is None: raise StopIteration input.record_stream(torch.cuda.current_stream()) target.record_stream(torch.cuda.current_stream()) self.preload() return input, target def preload(self): try: self.next_input, self.next_target = next(self.loader) except StopIteration: self.next_input = None self.next_target = None return with torch.cuda.stream(self.stream): self.next_input = self.next_input.cuda(non_blocking=True) self.next_target = self.next_target.cuda(non_blocking=True) self.next_input = self.next_input.float() if self.mean is not None and self.std is not None: self.next_input = self.next_input.sub_(self.mean).div_( self.std) ================================================ FILE: condensa/delta.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from condensa import dtypes def dequantize(module, dtype): """ De-quantizes module to given data type (inplace). :param module: PyTorch module. :type module: `torch.nn.Module` :param dtype: Target data type. """ if dtype.as_dtype_enum == dtypes.DT_FLOAT32: module.float() elif dtype.as_dtype_enum == dtypes.DT_FLOAT64: module.double() else: raise TypeError('Unknown data type specified for de-quantization') ================================================ FILE: condensa/dtypes.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np from .type_enums import * class DType(object): """Data type for quantization.""" def __init__(self, dtype): self._dtype = dtype @property def name(self): return _DTYPE_TO_STRING[self._dtype] @property def as_numpy_dtype(self): return _TO_NP[self._dtype] @property def as_dtype_enum(self): return self._dtype def __int__(self): return self._dtype def __str__(self): return "" % self.name float16 = DType(DT_FLOAT16) float32 = DType(DT_FLOAT32) float64 = DType(DT_FLOAT64) int8 = DType(DT_INT8) uint8 = DType(DT_UINT8) int16 = DType(DT_INT16) uint16 = DType(DT_UINT16) _DTYPE_TO_STRING = { DT_FLOAT16: "float16", DT_FLOAT32: "float32", DT_FLOAT64: "float64", DT_INT8: "int8", DT_UINT8: "uint8", DT_INT16: "int16", DT_UINT16: "uint16" } _TO_NP = { DT_FLOAT16: np.float16, DT_FLOAT32: np.float32, DT_FLOAT64: np.float64, DT_INT8: np.int8, DT_UINT8: np.uint8, DT_INT16: np.int16, DT_UINT16: np.uint16 } ================================================ FILE: condensa/finetune.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys import logging import numpy as np from copy import deepcopy import torch import torch.backends.cudnn as cudnn from tqdm import tqdm import condensa.tensor as T import condensa.util as util logger = logging.getLogger(__name__) class FineTuner(object): """Condensa model fine-tuner. Can be used for retraining compressed models while keeping all zero-valued parameters clipped to zero.""" def __init__(self, w, layer_types=None, biases=True): self.w = w self.layer_types = layer_types self.biases = biases self._compute_mask_inplace() def _compute_mask_inplace(self): with torch.no_grad(): for m in self.w.modules(): if type(m) in self.layer_types\ and not hasattr(m, 'condensa_nocompress'): if hasattr(m, 'weight'): m.mask_w = torch.gt(m.weight.data.abs(), 0.) if self.biases: if hasattr(m, 'bias') and m.bias is not None: m.mask_b = torch.gt(m.bias.data.abs(), 0.) def _apply_mask(self): with torch.no_grad(): for m in self.w.modules(): if hasattr(m, 'mask_w'): T.apply_mask_inplace(m.weight.data, m.mask_w) if hasattr(m, 'mask_b'): T.apply_mask_inplace(m.bias.data, m.mask_b) def run(self, epochs, lr, lr_end, momentum, weight_decay, criterion, trainloader, testloader, valloader, debugging_flags={}): """ Fine-tunes a compressed model. Currently only supports SGD. :param epochs: Number of epochs :type epochs: `int` :param lr: Learning rate :type lr: `float` :param lr_end: End learning rate :type lr_end: `float` :param momentum: Momentum :type momentum: float :param weight_decay: Weight decay :type weight_decay: float :param criterion: Loss criterion :param trainloader: Training dataloader :param testloader: Test dataloader :param valloader: Validation dataloader :param debugging_flags: Debugging flags :type debugging_flags: dict """ use_cuda = torch.cuda.is_available() validate = (valloader is not None) test = (testloader is not None) if use_cuda: cudnn.benchmark = True self.w = self.w.cuda() _model_stat_fn = debugging_flags['custom_model_statistics']\ if 'custom_model_statistics' in debugging_flags\ else util.empty_stat_fn if validate: val_loss, val_stats = _model_stat_fn(self.w, criterion, valloader) logging.info( '[Condensa:FineTuner] Original model val_loss: {:.2f}, {}' .format(val_loss, ', '.join(['{}:{}'.format(k, v) for k,v in val_stats.items()]))) if test: test_loss, test_stats = _model_stat_fn( self.w, criterion, testloader) logging.info( '[Condensa:FineTuner] Original model test_loss: {:.2f}, {} ' .format(test_loss, ', '.join(['{}:{}'.format(k, v) for k,v in test_stats.items()]))) l_alpha = np.exp((np.log(lr_end) - np.log(lr)) / float(epochs)) optimizer = torch.optim.SGD(self.w.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay, nesterov=False) with torch.no_grad(): best_model = deepcopy(self.w) best_loss = sys.float_info.max for epoch in range(epochs): # Switch to training mode self.w.train() nbatches = len(trainloader) if logger.isEnabledFor(logging.INFO): pbar = tqdm(total=nbatches, ascii=True) for input, target in trainloader: if torch.cuda.is_available(): if not input.is_cuda: input = input.cuda() if not target.is_cuda: target = target.cuda() output = self.w(input) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() # Apply mask self._apply_mask() if logger.isEnabledFor(logging.INFO): pbar.update() if logger.isEnabledFor(logging.INFO): pbar.close() # Switch to eval mode self.w.eval() if validate: val_loss, val_stats = _model_stat_fn( self.w, criterion, valloader) logging.info( '[Condensa:FineTuner] Epoch [{}], VAL loss: {:.2f}, {}' .format(epoch, val_loss, ', '.join(['{}:{}'.format(k, v) for k,v in val_stats.items()]))) if test: test_loss, test_stats = _model_stat_fn( self.w, criterion, testloader) logging.info( '[Condensa:FineTuner] Epoch [{}], TEST loss: {:.2f}, {}' .format(epoch, test_loss, ', '.join(['{}:{}'.format(k, v) for k,v in test_stats.items()]))) if validate: if val_loss < best_loss: logger.info( '[Condensa:FineTuner] SAVING MODEL based on VAL') best_loss = val_loss best_model = deepcopy(self.w) elif test: if test_loss < best_loss: logger.info( '[Condensa:FineTuner] SAVING MODEL based on TEST') best_loss = test_loss best_model = deepcopy(self.w) else: logger.info( '[Condensa:FineTuner] SAVING MODEL based on most recent') best_model = deepcopy(self.w) lr *= l_alpha for g in optimizer.param_groups: g['lr'] = lr return best_model ================================================ FILE: condensa/functional.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch def l2norm(tensor, dim, keepdim): """ Computes the l2-norm of elements in input tensor. :param tensor: PyTorch tensor. :type tensor: `torch.nn.Module` :param dim: Reduction dimension. :type dim: `int` :param keepdim: Whether the output has `dim` retained. :type keepdim: `bool` :return: l2-norm of input tensor. """ return torch.norm(tensor, 2, dim, keepdim) def max(tensor, dim, keepdim): """ Computes the maximum value of elements in input tensor. :param tensor: PyTorch tensor. :type tensor: `torch.nn.Module` :param dim: Reduction dimension. :type dim: `int` :param keepdim: Whether the output has `dim` retained. :type keepdim: `bool` :return: Max of input tensor. """ return torch.max(tensor, dim, keepdim)[0] def min(tensor, dim, keepdim): """ Computes the minimum value of elements in input tensor. :param tensor: PyTorch tensor. :type tensor: `torch.nn.Module` :param dim: Reduction dimension. :type dim: `int` :param keepdim: Whether the output has `dim` retained. :type keepdim: `bool` :return: Min of input tensor. """ return torch.min(tensor, dim, keepdim)[0] def mean(tensor, dim, keepdim): """ Computes the mean value of elements in input tensor. :param tensor: PyTorch tensor. :type tensor: `torch.nn.Module` :param dim: Reduction dimension. :type dim: `int` :param keepdim: Whether the output has `dim` retained. :type keepdim: `bool` :return: Mean value of input tensor. """ return torch.mean(tensor, dim, keepdim) def sum(tensor, dim, keepdim): """ Computes the sum of elements in input tensor. :param tensor: PyTorch tensor. :type tensor: `torch.nn.Module` :param dim: Reduction dimension. :type dim: `int` :param keepdim: Whether the output has `dim` retained. :type keepdim: `bool` :return: Sum of input tensor. """ return torch.sum(tensor, dim, keepdim) ================================================ FILE: condensa/lr.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np class IntervalLR(object): """Decays learning rate between two values.""" def __init__(self, begin, end, n): """ Construct an instance of `IntervalLR`. :param begin: Starting learning rate (LR). :type begin: `float` :param end: Ending LR. :type end: `float` :param n: Number of iterations. :type n: `int` """ self.alpha = np.exp((np.log(end) - np.log(begin)) / n) self.lr = begin def step(self): """Signal end of iteration.""" self.lr *= self.alpha @property def learning_rate(self): """Returns current learning rate.""" return self.lr class DecayedLR(object): """Decays learning rate at fixed intervals.""" def __init__(self, begin, schedule, gamma=0.1): """ Construct an instance of `DecayedLR`. :param begin: Starting LR. :type begin: `float` :param schedule: List of iterations when LR must be adjusted. :type schedule: `List/Tuple` :param gamma: LR multiplier. :type gamma: `float` """ self.gamma = gamma self.lr = begin self.schedule = schedule self.counter = 0 def step(self): """Signal end of iteration.""" if self.counter in self.schedule: self.lr *= self.gamma self.counter += 1 @property def learning_rate(self): """Returns current learning rate.""" return self.lr class ExpDecayedLR(object): """Decays learning rate exponentially.""" def __init__(self, begin, gamma): """ Construct an instance of `ExpDecayedLR`. :param begin: Starting LR. :type begin: `float` :param gamma: LR multiplier. :type gamma: `float` """ self.gamma = gamma self.lr = begin self.counter = 0 def step(self): """Signal end of iteration.""" self.counter += 1 @property def learning_rate(self): """Returns current learning rate.""" return self.lr * (self.gamma**self.counter) ================================================ FILE: condensa/opt/__init__.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from .direct.dc import DC from .lc.lc import LC from . import lc ================================================ FILE: condensa/opt/direct/__init__.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ================================================ FILE: condensa/opt/direct/dc.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from copy import deepcopy import torch from condensa.util import EventTimer class DC(object): """Condensa direct compression optimizer.""" def compress(self, w, pi, delta, trainloader, testloader, valloader, criterion): """ Performs model compression using direct optimization. :param w: PyTorch model. :type w: `torch.nn.Module` :param pi: Compression function. :param delta: Decompression function. :param trainloader: Training dataloader. :param testloader: Test dataloader. :param valloader: Validation dataloader. :param criterion: Loss criterion. """ statistics = dict() timer_dc = EventTimer() with torch.no_grad(): compressed = deepcopy(w) pi(compressed) statistics['total_elapsed'] = timer_dc.elapsed_seconds return compressed, statistics ================================================ FILE: condensa/opt/lc/__init__.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from .sgd import SGD from .adam import Adam ================================================ FILE: condensa/opt/lc/adam.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import math from collections import defaultdict import torch class Adam(object): """Custom Adam implementation for L-C optimizer.""" def __init__(self, w, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False): if not 0.0 <= lr: raise ValueError("Invalid learning rate: {}".format(lr)) if not 0.0 <= eps: raise ValueError("Invalid epsilon value: {}".format(eps)) if not 0.0 <= betas[0] < 1.0: raise ValueError("Invalid beta parameter at index 0: {}".format( betas[0])) if not 0.0 <= betas[1] < 1.0: raise ValueError("Invalid beta parameter at index 1: {}".format( betas[1])) try: self.w = w.module except AttributeError: self.w = w self.lr = lr self.betas = betas self.eps = eps self.weight_decay = weight_decay self.amsgrad = amsgrad self.state = defaultdict(dict) def zero_grad(self): """Zeroes out all gradients.""" for p in self.w.parameters(): if p.grad is not None: p.grad.detach_() p.grad.zero_() def reset_state(self): """Resets optimizer state.""" for p in self.w.parameters(): if 'state' in self.state[p]: self.state[p]['step'] = 0 if 'exp_avg' in self.state[p]: self.state[p]['exp_avg'] = torch.zeros_like(p.data) if 'exp_avg_sq' in self.state[p]: self.state[p]['exp_avg_sq'] = torch.zeros_like(p.data) if self.amsgrad and 'max_exp_avg_sq' in self.state[p]: self.state[p]['max_exp_avg_sq'] = torch.zeros_like(p.data) def _step(self, p, condense=False, mu=None, p_theta=None, p_lm=None): if p.grad is None: return grad = p.grad.data if grad.is_sparse: raise RuntimeError('Adam does not support sparse gradients.') state = self.state[p] # State initialization if len(state) == 0: state['step'] = 0 # Exponential moving average of gradient values state['exp_avg'] = torch.zeros_like(p.data) # Exponential moving average of squared gradient values state['exp_avg_sq'] = torch.zeros_like(p.data) if self.amsgrad: # Maintains max of all exp. moving avg. of sq. grad. values state['max_exp_avg_sq'] = torch.zeros_like(p.data) exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] if self.amsgrad: max_exp_avg_sq = state['max_exp_avg_sq'] beta1, beta2 = self.betas state['step'] += 1 if self.weight_decay != 0: grad.add_(self.weight_decay, p.data) if condense is True: assert (mu is not None and p_theta is not None and p_lm is not None) grad.add_(mu * (p.data - p_theta.data) - p_lm.data) # Decay the first and second moment running average coefficient exp_avg.mul_(beta1).add_(1 - beta1, grad) exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) if self.amsgrad: # Maintains the maximum of all 2nd moment running avg. till now torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) # Use the max. for normalizing running avg. of gradient denom = max_exp_avg_sq.sqrt().add_(self.eps) else: denom = exp_avg_sq.sqrt().add_(self.eps) bias_correction1 = 1 - beta1**state['step'] bias_correction2 = 1 - beta2**state['step'] step_size = self.learning_rate * math.sqrt( bias_correction2) / bias_correction1 p.data.addcdiv_(-step_size, exp_avg, denom) def step(self, lr, mu, theta, lm, closure=None): loss = None if closure is not None: loss = closure() self.learning_rate = lr for w_m, theta_m, lm_m in zip(self.w.modules(), theta.modules(), lm.modules()): if hasattr(theta_m, 'condense'): for pname in theta_m.condense: self._step(getattr(w_m, pname), True, mu, getattr(theta_m, pname), getattr(lm_m, pname)) params = set([name for name, _ in theta_m.named_parameters()]) rparams = params - theta_m.condense for pname in rparams: self._step(getattr(w_m, pname)) else: for w_p in w_m.parameters(recurse=False): self._step(w_p) return loss ================================================ FILE: condensa/opt/lc/lc.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys from copy import deepcopy import logging from collections import defaultdict import numpy as np import torch import torch.utils.data as data import torch.backends.cudnn as cudnn from tqdm import tqdm from condensa.util import EventTimer from condensa import cfg from condensa import util from .sgd import SGD from condensa.lr import * logger = logging.getLogger(__name__) class record_mode(object): def __enter__(self): cfg.__CONDENSA_RECORD_MODE__ = True def __exit__(self, *args): cfg.__CONDENSA_RECORD_MODE__ = False return False class LC(object): """Condensa L-C compression engine.""" def __init__(self, steps=30, l_optimizer=None, l_optimizer_params={}, lr=None, lr_end=None, lr_decay=None, lr_schedule=None, lr_multiplier=None, mb_iterations_per_l=0, mb_iterations_first_l=0, mu_init=0., mu_multiplier=1., mu_cap=10000, distributed=False, debugging_flags={}): """ Constructs an `LC` class instance. :param steps: Number of L-C iterations. :type steps: float :param l_optimizer: L-step optimizer to use. :param l_optimizer_params: L-step optimizer hyper-parameters. :type l_optimizer_params: dict :param lr: Starting learning rate. :type lr: float :param lr_end: Ending learning rate. :type lr_end: float :param lr_schedule: Learning rate schedule. :type lr_schedule: List :param lr_multiplier: Learning rate multiplier. :type lr_multiplier: float :param mb_iterations_per_l: Number of mini-batch iterations per L-step. :type mb_iterations_per_l: int :param mb_iterations_first_l: Number of mini-batch iterations for first L-step. :type mb_iterations_first_l: int :param mu_init: Initial value of `mu`. :type mu_init: float :param mu_multiplier: Mu multiplier. :type mu_multiplier: float :param mu_cap: Maximum permitted value for `mu`. :type mu_cap: float :param distributed: Enable/disable data-parallelism in L-step. :type distributed: bool :param debugging_flags: Debugging flags :type debugging_flags: dict """ self._engine_config = { k: v for k, v in locals().items() if k != 'self' } logger.info('[Condensa] LC ENGINE CONFIG [' + ', '.join('{!s}={!r}'.format(k, v) for k, v in self._engine_config.items()) + ']') if not 0 <= steps: raise ValueError( 'Invalid steps specified: {}'.format(steps)) if not isinstance(l_optimizer_params, dict): raise TypeError('l_optimizer_params must be a dictionary') if not 0. <= lr: raise ValueError('Invalid learning rate: {}'.format(lr)) if lr_schedule is not None and lr_multiplier is None: raise TypeError( 'Please specify multiplier when using fixed LR schedule') if not 0 < mb_iterations_per_l: raise ValueError( 'Invalid mb_iterations_per_l specified: {}'.format(mb_iterations_per_l)) if not 0 < mb_iterations_first_l: raise ValueError( 'Invalid mb_iterations_first_l specified: {}'.format(mb_iterations_first_l)) if not isinstance(debugging_flags, dict): raise TypeError('debugging_flags must be a dictionary') self.use_cuda = torch.cuda.is_available() self.steps = steps self.l_optimizer = l_optimizer if l_optimizer else SGD self.l_optimizer_params = l_optimizer_params self.lr = lr self.lr_end = lr_end self.lr_decay = lr_decay self.lr_schedule = lr_schedule self.lr_multiplier = lr_multiplier self.mb_iterations_per_l = mb_iterations_per_l self.mb_iterations_first_l = mb_iterations_first_l self.mu_init = mu_init self.mu_multiplier = mu_multiplier self.mu_cap = mu_cap self.distributed = distributed self.debugging_flags = debugging_flags def zero_(self, model): """ Zeroes out model parameters. :param model: PyTorch model. :type model: torch.nn.Module """ with torch.no_grad(): pflat = torch.nn.utils.parameters_to_vector( model.parameters()).fill_(0.) torch.nn.utils.vector_to_parameters(pflat, model.parameters()) def compress(self, w, pi, delta, trainloader, testloader, valloader, loss_fn): """ Main L-C compression method. :param w: Input model. :type w: torch.nn.Module :param pi: Compression function. :param delta: Decompression function. :param trainloader: Training dataloader. :param testloader: Test dataloader. :param valloader: Validation dataloader. :param loss_fn: Loss criterion. """ statistics = {} # Save engine configuration statistics.update(self._engine_config) _model_stat_fn = self.debugging_flags['custom_model_statistics']\ if 'custom_model_statistics' in self.debugging_flags\ else util.empty_stat_fn _disable_train_stats = self.debugging_flags['disable_train_stats']\ if 'disable_train_stats' in self.debugging_flags\ else False timer_lc = EventTimer() if self.use_cuda: cudnn.benchmark = True logger.debug("[Condensa] cuDNN VERSION: {}".format(cudnn.version())) validate = (valloader is not None) test = (testloader is not None) # Copy model to GPU0 memory if self.use_cuda: w = w.cuda(0) # Mark all compressible modules in w with record_mode(): pi(w) with torch.no_grad(): theta = deepcopy(w) self.zero_(theta) with torch.no_grad(): lm = deepcopy(w) self.zero_(lm) with torch.no_grad(): best_model = deepcopy(w) # Enable data-parallelism in L step if self.use_cuda and self.distributed: ngpus = torch.cuda.device_count() logger.info('[Condensa] {} GPUs enabled for L-step'.format(ngpus)) w = torch.nn.DataParallel(w) mu = 0. learning_rate = self.lr optimizer = self.l_optimizer(w, lr=learning_rate, **self.l_optimizer_params) optimizer.reset_state() if not _disable_train_stats: w_train_loss, w_train_stats = _model_stat_fn(w, loss_fn, trainloader) logger.info('[Condensa] w TRAIN\tloss={:.5f}, {}' .format(w_train_loss, ', '.join(['{}:{}'.format(k, v) for k,v in w_train_stats.items()]))) if validate: w_val_loss, w_val_stats = _model_stat_fn(w, loss_fn, valloader) logger.info('[Condensa] w VAL\tloss={:.5f}, {}' .format(w_val_loss, ', '.join(['{}:{}'.format(k, v) for k,v in w_val_stats.items()]))) if test: w_test_loss, w_test_stats = _model_stat_fn(w, loss_fn, testloader) logger.info('[Condensa] w TEST\tloss={:.5f}, {}' .format(w_test_loss, ', '.join(['{}:{}'.format(k, v) for k,v in w_test_stats.items()]))) best_loss = sys.float_info.max train_losses = [] if validate: val_losses = [] if test: test_losses = [] outer_lr_scheduler = None if self.lr_decay is not None: outer_lr_scheduler = ExpDecayedLR(self.lr, self.lr_decay) elif self.lr_schedule is not None: outer_lr_scheduler = DecayedLR(self.lr, self.lr_schedule, self.lr_multiplier) for j in range(0, self.steps): n_sgd_iter = (self.mb_iterations_first_l if j == 1 else self.mb_iterations_per_l) # Set up outer learning rate learning_rate = self.lr if outer_lr_scheduler is not None: learning_rate = outer_lr_scheduler.learning_rate logger.info( '[Condensa] LC Iteration {}:\tmu={:.5f}, lr={:.5f}'.format( j, mu, learning_rate)) inner_lr_scheduler = None if self.lr_end is not None: inner_lr_scheduler = IntervalLR(learning_rate, self.lr_end, n_sgd_iter) # L step # Switch to training mode i = 0 w.train() iterator = iter(trainloader) if logger.isEnabledFor(logging.INFO) and j>0: pbar = tqdm(total=n_sgd_iter, ascii=True) while True: if j == 0: logger.info('[Condensa] Skipping first L-step') break if j == 1 and i >= self.mb_iterations_first_l: break if j > 1 and i >= self.mb_iterations_per_l: break try: inputs, targets = next(iterator) except StopIteration: iterator = iter(trainloader) inputs, targets = next(iterator) if self.use_cuda: if not inputs.is_cuda: inputs = inputs.cuda() if not targets.is_cuda: targets = targets.cuda(non_blocking=True) outputs = w(inputs) loss = loss_fn(outputs, targets) optimizer.zero_grad() loss.backward() optimizer.step(learning_rate, mu, theta, lm) if inner_lr_scheduler is not None: inner_lr_scheduler.step() learning_rate = inner_lr_scheduler.learning_rate if logger.isEnabledFor(logging.INFO): pbar.update() i += 1 if logger.isEnabledFor(logging.INFO) and j>0: pbar.close() logger.info('') if self.use_cuda: torch.cuda.synchronize() w.eval() # C step and theta update try: theta.load_state_dict(w.module.state_dict()) except AttributeError: theta.load_state_dict(w.state_dict()) if mu > 0: try: wmodules = w.module.modules() except AttributeError: wmodules = w.modules() with record_mode(): pi(theta) with torch.no_grad(): for w_m, theta_m, lm_m in zip(wmodules, theta.modules(), lm.modules()): if hasattr(theta_m, 'condense'): for pname in theta_m.condense: getattr(theta_m, pname).data = ( getattr(w_m, pname).detach() - getattr(lm_m, pname).data / mu) pi(theta) if not _disable_train_stats: nested_train_loss, nested_train_stats = _model_stat_fn(theta, loss_fn, trainloader) train_losses.append(nested_train_loss) logger.info( '[Condensa] Nested (theta) TRAIN\tloss={:.5f}, {}' .format(nested_train_loss, ', '.join(['{}:{}'.format(k, v) for k,v in nested_train_stats.items()]))) if validate: nested_val_loss, nested_val_stats = _model_stat_fn(theta, loss_fn, valloader) val_losses.append(nested_val_loss) logger.info( '[Condensa] Nested (theta) VAL\tloss={:.5f}, {}' .format(nested_val_loss, ', '.join(['{}:{}'.format(k, v) for k,v in nested_val_stats.items()]))) if test: nested_test_loss, nested_test_stats = _model_stat_fn(theta, loss_fn, testloader) test_losses.append(nested_test_loss) logger.info( '[Condensa] Nested (theta) TEST\tloss={:.5f}, {}' .format(nested_test_loss, ', '.join(['{}:{}'.format(k, v) for k,v in nested_test_stats.items()]))) if validate: if nested_val_loss < best_loss: logger.info('[Condensa] Saving model based on VAL') best_loss = nested_val_loss # Deep-copy required here to preserve dtypes best_model = deepcopy(theta) elif test: if nested_test_loss < best_loss: logger.info('[Condensa] Saving model based on TEST') best_loss = nested_test_loss # Deep-copy required here to preserve dtypes best_model = deepcopy(theta) else: logger.info('[Condensa] Saving model based on most recent') best_model = deepcopy(theta) # theta <- delta(theta) delta(theta) # LM update if mu > 0: try: wmodules = w.module.modules() except AttributeError: wmodules = w.modules() for w_m, theta_m, lm_m in zip(wmodules, theta.modules(), lm.modules()): if hasattr(theta_m, 'condense'): for pname in theta_m.condense: getattr( lm_m, pname).data = (getattr(lm_m, pname).data - mu * (getattr(w_m, pname).detach() - getattr(theta_m, pname).data)) optimizer.reset_state() # Update mu mu = self._update_mu(mu, self.mu_init, self.mu_multiplier, self.mu_cap) # Update LR schedule if outer_lr_scheduler is not None: outer_lr_scheduler.step() statistics['elapsed_lc'] = timer_lc.elapsed_seconds statistics['train_losses'] = train_losses if test: statistics['test_losses'] = test_losses if validate: statistics['val_losses'] = val_losses return best_model, statistics def _update_mu(self, mu, mu_init, mu_multiplier, mu_cap): if mu > mu_cap: return mu if mu != 0: return mu * mu_multiplier else: return mu_init ================================================ FILE: condensa/opt/lc/sgd.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from collections import defaultdict import torch class SGD(object): """Custom SGD implementation for L-C optimizer.""" def __init__(self, w, lr=None, momentum=None, weight_decay=0): """ Creates instance of `SGD`. :param w: PyTorch model. :type w: torch.nn.Module :param lr: Learning rate. :type lr: float :param momentum: SGD momentum. :type momentum: float :param weight_decay: Weight decay amount (L2 regularation). :type weight_decay: float """ if lr is None or momentum is None: raise ValueError('Learning rate and momentum are required') if lr < 0.0: raise ValueError('Invalid learning rate: {}'.format(lr)) if momentum < 0.0: raise ValueError('Invalid momentum value: {}'.format(momentum)) if weight_decay < 0.0: raise ValueError( 'Invalid weight decay value: {}'.format(weight_decay)) try: self.w = w.module except AttributeError: self.w = w self.lr = lr self.momentum = momentum self.weight_decay = weight_decay self.state = defaultdict(dict) def zero_grad(self): """Zeroes out all gradients.""" for p in self.w.parameters(): if p.grad is not None: p.grad.detach_() p.grad.zero_() def reset_state(self): """Resets optimizer state.""" for p in self.w.parameters(): if 'velocity' in self.state[p]: self.state[p]['velocity'] = torch.zeros_like(p.data) def _step(self, p, condense=False, mu=None, p_theta=None, p_lm=None): if p.grad is None: return lr = self.learning_rate d_p = p.grad.data if self.weight_decay != 0: d_p.add_(self.weight_decay, p.data) if condense is True: assert (mu is not None and p_theta is not None and p_lm is not None) d_p.add_(mu * (p.data - p_theta.data) - p_lm.data) update = p.data - lr * (d_p) if 'velocity' not in self.state[p]: velocity = torch.zeros_like(p.data) else: velocity = self.state[p]['velocity'] x = self.momentum * velocity + update - p.data self.state[p]['velocity'] = x p.data = self.momentum * x + update def step(self, lr, mu, theta, lm, closure=None): """ Takes one optimizer step. :param lr: Current learning rate. :type lr: float :param mu: L-C mu hyper-parameter value. :type mu: float :param theta: Compressed model. :type theta: torch.nn.Module :param lm: Lagrange multiplier. :type lm: torch.nn.Module :param closure: Loss closure. """ loss = None if closure is not None: loss = closure() self.learning_rate = lr for w_m, theta_m, lm_m in zip(self.w.modules(), theta.modules(), lm.modules()): if hasattr(theta_m, 'condense'): for pname in theta_m.condense: self._step(getattr(w_m, pname), True, mu, getattr(theta_m, pname), getattr(lm_m, pname)) params = set([name for name, _ in theta_m.named_parameters()]) rparams = params - theta_m.condense for pname in rparams: self._step(getattr(w_m, pname)) else: for w_p in w_m.parameters(recurse=False): self._step(w_p) return loss ================================================ FILE: condensa/pi.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch import torch.nn from condensa import dtypes from condensa import cfg import condensa.tensor as T def __precheck(module): if not cfg.__CONDENSA_PI_PRECHECK__: return if len(list(module.children())) > 0: raise RuntimeError('Only leaf modules may be compressed') for name, _ in module.named_parameters(): if name != 'weight' and name != 'bias': raise NotImplementedError( 'Unknown parameter {} detected'.format(name)) def quantize(module, dtype): """ Quantizes module to given data type (inplace). :param module: PyTorch module. :type module: `torch.nn.Module` :param dtype: Target data type. """ __precheck(module) parameters = ['weight'] #parameters = [name for name, _ in module.named_parameters()] if hasattr(module, 'condense'): module.condense |= set(parameters) else: module.condense = set(parameters) if not cfg.__CONDENSA_RECORD_MODE__: if dtype.as_dtype_enum == dtypes.DT_FLOAT16: module.half() elif dtype.as_dtype_enum == dtypes.DT_FLOAT32: module.float() else: raise TypeError('Unknown data type specified for quantization') def prune(module, threshold, parameter='weight'): """ Prunes module parameters based on magnitude (inplace). :param module: PyTorch module. :type module: `torch.nn.Module` :param threshold: Magnitude threshold for pruning. :type threshold: `float` :param parameter: Module parameter to prune (default: 'weight') :type parameter: str """ __precheck(module) if not hasattr(module, parameter): raise ValueError('Could not find parameter \'{}\' in module', parameter) if hasattr(module, 'condense'): module.condense.add(parameter) else: module.condense = set([parameter]) if not cfg.__CONDENSA_RECORD_MODE__: p = getattr(module, parameter) pdata = p.data.view(-1) mask = T.simple_mask(pdata, threshold).type(pdata.type()) T.apply_mask_inplace(pdata, mask) p.data = pdata.view_as(p).data #if cfg.__CONDENSA_SAVE_MASK__: module.mask = mask.view_as(p).data def blockprune(module, threshold, block_size, criteria, align=None, parameter='weight'): """ Prunes blocks of module parameters based on magnitude (inplace). :param module: PyTorch module. :type module: `torch.nn.Module` :param threshold: Magnitude threshold for pruning. :type threshold: `float` :param block_size: Block size for pruning. :type block_size: `Tuple` :param criteria: Aggregation function for thresholding. :type criteria: `condensa.functional` :param align: Alignment of compressed parameters. :type align: `int` :param parameter: Module parameter to prune (default: 'weight') :type parameter: str """ __precheck(module) if not hasattr(module, parameter): raise ValueError('Could not find parameter \'{}\' in module', parameter) p = getattr(module, parameter) ndim = p.dim() bdim = len(block_size) if ndim != bdim: raise RuntimeError( 'Block must have same dimensions as parameter \'{}\''.format( parameter)) if hasattr(module, 'condense'): module.condense.add(parameter) else: module.condense = set([parameter]) if not cfg.__CONDENSA_RECORD_MODE__: mask = T.block_mask(p.data, threshold, block_size, criteria, align) T.apply_mask_inplace(p.data, mask) return mask return None def neuron_prune(module, threshold, criteria, align=None, prune_bias=True): """ Prunes neurons based on magnitude (inplace). :param module: PyTorch module. :type module: `torch.nn.Module` :param threshold: Magnitude threshold for pruning. :type threshold: `float` :param criteria: Aggregation function for thresholding. :type criteria: `condensa.functional` :param align: Alignment of compressed parameters. :type align: `int` :param prune_bias: Whether to prune corresponding biases. :type prune_bias: `bool` """ __precheck(module) parameter = 'weight' if not hasattr(module, parameter): raise ValueError('Could not find parameter \'{}\' in module', parameter) shape = getattr(module, parameter).data.shape if len(shape) != 2: raise NotImplementedError( 'Row pruning currently only supported for 2D parameters') if hasattr(module, 'condense'): module.condense.add(parameter) else: module.condense = set([parameter]) if not cfg.__CONDENSA_RECORD_MODE__: block_size = (1, shape[1]) mask = blockprune(module, threshold, block_size, criteria, align, parameter) # Prune corresponding bias tensor if module.bias is not None and prune_bias is True: assert mask.ndimension() == 2 T.apply_mask_inplace(module.bias.data, mask[:, 0]) def filter_prune(module, threshold, criteria, align=None, prune_bias=True): """ Prunes 3D blocks (filters) of module parameters based on magnitude (inplace). :param module: PyTorch module. :type module: `torch.nn.Module` :param threshold: Magnitude threshold for pruning. :type threshold: `float` :param criteria: Aggregation function for thresholding. :type criteria: `condensa.functional` :param align: Alignment of compressed parameters. :type align: `int` :param prune_bias: Whether to prune corresponding biases. :type prune_bias: `bool` """ __precheck(module) parameter = 'weight' if not hasattr(module, parameter): raise ValueError('Could not find parameter \'{}\' in module', parameter) p = getattr(module, parameter) ndim = p.dim() if ndim != 4: raise RuntimeError('Filter pruning requires a 4D parameter') if hasattr(module, 'condense'): module.condense.add(parameter) else: module.condense = set([parameter]) if not cfg.__CONDENSA_RECORD_MODE__: block_size = (1, *p.data.shape[1:]) mask = T.block_mask(p.data, threshold, block_size, criteria, align) T.apply_mask_inplace(p.data, mask) # Prune corresponding bias tensor if module.bias is not None and prune_bias is True: assert mask.ndimension() == 4 T.apply_mask_inplace(module.bias.data, mask[:, 0, 0, 0]) ================================================ FILE: condensa/schemes.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch import condensa import condensa.tensor as T import condensa.functional as F class Compose(object): """Composes two or more schemes together.""" def __init__(self, schemes): """ Creates a `Compose` instance. :param schemes: List of schemes to compose. :type schemes: `list` """ if not isinstance(schemes, list): raise TypeError('Please specify schemes to compose as a list') self.schemes = schemes def pi(self, module): """ Applies compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ for s in self.schemes: s.pi(module) def delta(self, module): """ Applies de-compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ for s in reversed(self.schemes): s.delta(module) def __repr__(self): return ''.format(self.schemes) class Prune(object): """Prunes network to given density.""" def __init__(self, density): """ Creates a `Prune` instance. :param density: Target density. :type density: `float` """ self.density = density self.layer_types = [torch.nn.Linear, torch.nn.Conv2d] def threshold(self, module): """ Computes magnitude threshold. :param module: PyTorch module. :type module: `torch.nn.Module` """ vec = [] for m in module.modules(): if type(m) in self.layer_types and not hasattr( m, 'condensa_nocompress'): vec.append(m.weight.data.view(-1)) return T.threshold(torch.cat(vec), self.density) def pi(self, module): """ Applies compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ threshold = self.threshold(module) for m in module.modules(): if type(m) in self.layer_types and not hasattr( m, 'condensa_nocompress'): condensa.prune(m, threshold) def delta(self, module): """ Applies de-compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ pass def __repr__(self): return ''.format(self.density) class Quantize(object): """Quantizes network to given data-type.""" def __init__(self, dtype=condensa.float16): """ Creates `Quantize` class instance. :param dtype: Target data type (default: float16). """ self.dtype = dtype self.layer_types = [torch.nn.Linear, torch.nn.Conv2d] def pi(self, module): """ Applies compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ for m in module.modules(): if type(m) in self.layer_types and not hasattr( m, 'condensa_nocompress'): condensa.quantize(m, self.dtype) def delta(self, module): """ Applies de-compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ for m in module.modules(): if type(m) in self.layer_types and not hasattr( m, 'condensa_nocompress'): condensa.dequantize(m, condensa.float32) def __repr__(self): return ''.format(self.dtype) class NeuronPrune(object): """Prunes neurons from fully-connected layers.""" def __init__(self, density, align=None, criteria=F.l2norm, prune_bias=True): """ Creates an instance of `NeuronPrune`. :param density: Target density. :type density: `float` :param align: Tensor alignment in compressed model. :type align: `int` :param criteria: Neuron aggregation criteria (default: l2norm). :type criteria: `condensa.functional` :param prune_bias: Whether to prune corresponding biases (default: True). :type prune_bias: `bool` """ self.density = density self.align = align self.criteria = criteria self.prune_bias = prune_bias def threshold(self, module): """ Computes magnitude threshold. :param module: PyTorch module. :type module: `torch.nn.Module` """ vec = [] for m in module.modules(): if isinstance(m, torch.nn.Linear) and not hasattr( m, 'condensa_nocompress'): agg = T.aggregate_neurons(m.weight.data, self.criteria) vec.append(agg.view(-1)) return T.threshold(torch.cat(vec), self.density) def pi(self, module): """ Applies compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ threshold = self.threshold(module) for m in module.modules(): if isinstance(m, torch.nn.Linear) and not hasattr( m, 'condensa_nocompress'): condensa.neuron_prune(m, threshold, align=self.align, criteria=self.criteria, prune_bias=self.prune_bias) def delta(self, module): """ Applies de-compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ pass def __repr__(self): return ''.format( self.density, self.align, self.criteria, self.prune_bias) class FilterPrune(object): """Prunes filters from convolutional layers.""" def __init__(self, density, align=None, criteria=F.l2norm, prune_bias=True): """ Creates an instance of `FilterPrune`. :param density: Target density. :type density: `float` :param align: Tensor alignment in compressed model. :type align: `int` :param criteria: Filter aggregation criteria (default: l2norm). :type criteria: `condensa.functional` :param prune_bias: Whether to prune corresponding biases (default: True). :type prune_bias: `bool` """ self.density = density self.align = align self.criteria = criteria self.prune_bias = prune_bias def threshold(self, module): """ Computes magnitude threshold. :param module: PyTorch module. :type module: `torch.nn.Module` """ vec = [] for m in module.modules(): if isinstance(m, torch.nn.Conv2d) and not hasattr( m, 'condensa_nocompress'): agg = T.aggregate_filters(m.weight.data, self.criteria) vec.append(agg.view(-1)) return T.threshold(torch.cat(vec), self.density) def pi(self, module): """ Applies compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ threshold = self.threshold(module) for m in module.modules(): if isinstance(m, torch.nn.Conv2d) and not hasattr( m, 'condensa_nocompress'): condensa.filter_prune(m, threshold, align=self.align, criteria=self.criteria, prune_bias=self.prune_bias) def delta(self, module): """ Applies de-compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ pass def __repr__(self): return ''.format( self.density, self.align, self.criteria, self.prune_bias) class StructurePrune(object): """Combines neuron and filter pruning using a single threshold value.""" def __init__(self, density, align=None, criteria=F.l2norm, prune_bias=True): """ Creates an instance of `StructurePrune`. :param density: Target density. :type density: `float` :param align: Tensor alignment in compressed model. :type align: `int` :param criteria: Structure aggregation criteria (default: l2norm). :type criteria: `condensa.functional` :param prune_bias: Whether to prune corresponding biases (default: True). :type prune_bias: `bool` """ self.density = density self.align = align self.criteria = criteria self.prune_bias = prune_bias def threshold(self, module): """ Computes magnitude threshold. :param module: PyTorch module. :type module: `torch.nn.Module` """ vec = [] for m in module.modules(): if isinstance(m, torch.nn.Linear) and not hasattr( m, 'condensa_nocompress'): agg = T.aggregate_neurons(m.weight.data, self.criteria) vec.append(agg.view(-1)) if isinstance(m, torch.nn.Conv2d) and not hasattr( m, 'condensa_nocompress'): agg = T.aggregate_filters(m.weight.data, self.criteria) vec.append(agg.view(-1)) return T.threshold(torch.cat(vec), self.density) def pi(self, module): """ Applies compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ threshold = self.threshold(module) for m in module.modules(): if isinstance(m, torch.nn.Linear) and not hasattr( m, 'condensa_nocompress'): condensa.neuron_prune(m, threshold, align=self.align, criteria=self.criteria, prune_bias=self.prune_bias) if isinstance(m, torch.nn.Conv2d) and not hasattr( m, 'condensa_nocompress'): condensa.filter_prune(m, threshold, align=self.align, criteria=self.criteria, prune_bias=self.prune_bias) def delta(self, module): """ Applies de-compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ pass def __repr__(self): return ''.format( self.density, self.align, self.criteria, self.prune_bias) class BlockPrune(object): """Prunes blocks in Linear layers.""" def __init__(self, density, block_size, criteria=F.l2norm): """ Creates an instance of `BlockPrune`. :param density: Target density. :type density: `float` :param block_size: Target block size. :type block_size: `Tuple` :param criteria: Structure aggregation criteria (default: l2norm). :type criteria: `condensa.functional` """ self.density = density self.block_size = block_size self.criteria = criteria self.layer_types = [torch.nn.Linear] def threshold(self, module): """ Computes magnitude threshold. :param module: PyTorch module. :type module: `torch.nn.Module` """ vec = [] for m in module.modules(): if type(m) in self.layer_types and not hasattr( m, 'condensa_nocompress'): agg = T.aggregate(m.weight.data, self.block_size, self.criteria) vec.append(agg.view(-1)) return T.threshold(torch.cat(vec), self.density) def pi(self, module): """ Applies compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ threshold = self.threshold(module) for m in module.modules(): if type(m) in self.layer_types and not hasattr( m, 'condensa_nocompress'): condensa.blockprune(m, threshold, block_size=self.block_size, criteria=self.criteria) def delta(self, module): """ Applies de-compression scheme to module. :param module: PyTorch module. :type module: `torch.nn.Module` """ pass def __repr__(self): return ''.format( self.density, self.block_size) ================================================ FILE: condensa/tensor.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import torch def density(tensor): """ Computes the ratio of nonzeros to total elements in a tensor. :param tensor: PyTorch tensor :type tensor: `torch.Tensor` :return: Ratio of nonzeros to total elements :rtype: `float` """ t = tensor.view(-1) return float(t.nonzero().numel()) / float(t.numel()) def sparsity(tensor): """ Computes the ratio of zeros to total elements in a tensor. :param tensor: PyTorch tensor :type tensor: torch.Tensor :return: Ratio of zeros to total elements :rtype: `float` """ return 1. - density(tensor) def threshold(tensor, density): """ Computes a magnitude-based threshold for given tensor. :param tensor: PyTorch tensor :type tensor: `torch.Tensor` :param density: Desired ratio of nonzeros to total elements :type density: `float` :return: Magnitude threshold :rtype: `float` """ tf = tensor.abs().view(-1) numel = int(density * tf.numel()) if numel == 0: raise RuntimeError('Provided density value causes model to be zero.') topk, _ = torch.topk(tf.abs(), numel, sorted=True) return topk.data[-1] def aggregate(tensor, blocksize, criteria): """ Aggregates tensor dimensions according to criteria. :param tensor: PyTorch tensor :type tensor: `torch.Tensor` :param blocksize: Size of blocks to aggregate :type blocksize: `Tuple(int)` :param criteria: Aggregation criteria :type criteria: `condensa.functional` :return: Aggregated tensor :rtype: `torch.Tensor` """ if tensor.dim() != len(blocksize): raise RuntimeError('Tensor and block dimensions do not match') ndim = tensor.dim() blocksize_flat = np.prod(np.array(blocksize)) shape = np.array(tensor.shape) repeats = (shape / blocksize).astype(int) divcheck = (shape % blocksize).astype(int) if not np.all(divcheck == 0): raise TypeError('Block size must be divisible by tensor size') tmpshape = np.column_stack([repeats, blocksize]).ravel() order = np.arange(len(tmpshape)) order = np.concatenate([order[::2], order[1::2]]) blocks = tensor.abs().reshape(tuple(tmpshape)) blocks = blocks.permute(tuple(order)).reshape(-1, *blocksize) agg = criteria(blocks.reshape(-1, blocksize_flat), dim=1, keepdim=True) return agg def aggregate_neurons(tensor, criteria): """ Aggregates neurons (rows) in given weight matrix. :param tensor: PyTorch tensor :type tensor: `torch.Tensor` :param criteria: Aggregation criteria :type criteria: `condensa.functional` :return: Neuron-aggregated tensor :rtype: `torch.Tensor` """ return aggregate(tensor, (1, tensor.shape[1]), criteria) def aggregate_filters(tensor, criteria): """ Aggregates 3D filters in given weight tensor. :param tensor: PyTorch tensor :type tensor: `torch.Tensor` :param criteria: Aggregation criteria :type criteria: `condensa.functional` :return: Filter-aggregated tensor :rtype: `torch.Tensor` """ return aggregate(tensor, (1, *tensor.shape[1:]), criteria) def simple_mask(tensor, threshold, align=None): """ Computes a simple binary mask for given magnitude threshold. :param tensor: PyTorch tensor :type tensor: `torch.Tensor` :param threshold: magnitude threshold for pruning :type threshold: `float` :return: Mask :rtype: `torch.Tensor` """ assert tensor.dim() == 1 if align is None: return torch.ge(tensor.abs(), threshold) else: size = tensor.size(0) if size < align: raise RuntimeError('Tensor too small for given alignment') t = tensor.abs() nnz = torch.ge(t, threshold).nonzero().size(0) nnz = int(nnz / align) * align _, indices = torch.topk(t, nnz) ones = torch.ones(nnz, dtype=tensor.dtype, layout=tensor.layout, device=tensor.device) mask = torch.zeros_like(tensor).scatter_(0, indices, ones) return mask def block_mask(tensor, threshold, blocksize, criteria, align=None): """ Computes an n-D binary mask for given magnitude threshold. :param tensor: PyTorch tensor :type tensor: `torch.Tensor` :param threshold: magnitude threshold for pruning :type threshold: `float` :param blocksize: desired block size (Tuple) :type blocksize: `Tuple` :param criteria: aggregation function for thresholding (default: max) :type criteria: `condensa.functional` :return: Mask :rtype: `torch.Tensor` """ # Original implementation at: https://stackoverflow.com/questions/42297115 # /numpy-split-cube-into-cubes/42298440#42298440 if tensor.dim() != len(blocksize): raise RuntimeError('Tensor and block dimensions do not match') ndim = tensor.dim() blocksize_flat = np.prod(np.array(blocksize)) shape = np.array(tensor.shape) repeats = (shape / blocksize).astype(int) divcheck = (shape % blocksize).astype(int) if not np.all(divcheck == 0): raise TypeError('Block size must be divisible by tensor size') tmpshape = np.column_stack([repeats, blocksize]).ravel() order = np.arange(len(tmpshape)) order = np.concatenate([order[::2], order[1::2]]) blocks = tensor.abs().reshape(tuple(tmpshape)) blocks = blocks.permute(tuple(order)).reshape(-1, *blocksize) agg = criteria(blocks.reshape(-1, blocksize_flat), dim=1, keepdim=True) mask = simple_mask(agg.view(-1), threshold, align) mask = mask.view(agg.shape).expand(-1, blocksize_flat).reshape(blocks.shape) N, newshape = mask.shape[0], mask.shape[1:] repeats = (shape / newshape).astype(int) tmpshape = np.concatenate([repeats, newshape]) order = np.arange(len(tmpshape)).reshape(2, -1).ravel(order='F') return mask.reshape(tuple(tmpshape)).permute(tuple(order)).reshape( tuple(shape)) def apply_mask(tensor, mask): """ Computes masked version of tensor. :param tensor: PyTorch tensor :type tensor: `torch.Tensor` :param mask: Binary mask :type mask: `torch.Tensor` :return: Masked version of `tensor` :rtype: `torch.Tensor` """ #assert isinstance(tensor, torch.Tensor) return torch.mul(tensor, mask.type(tensor.type())) def apply_mask_inplace(tensor, mask): """ Applies binary mask in-place. :param tensor: PyTorch tensor :type tensor: `torch.Tensor` :param mask: Binary mask :type mask: `torch.Tensor` """ #assert isinstance(tensor, torch.Tensor) tensor.mul_(mask.type(tensor.type())) ================================================ FILE: condensa/type_enums.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Supported data types DT_FLOAT16 = 1 DT_FLOAT32 = 2 DT_FLOAT64 = 3 DT_INT8 = 4 DT_UINT8 = 5 DT_INT16 = 6 DT_UINT16 = 7 ================================================ FILE: condensa/util.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import time import sys import numpy as np import logging from tqdm import tqdm import torch.nn.utils import torch.utils.data as data from torch.autograd import Variable import condensa.tensor as T logger = logging.getLogger(__name__) class AverageMeter(object): """Computes and stores the average and current value""" def __init__(self): self.reset() def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count def to_python_float(t): if hasattr(t, 'item'): return t.item() else: return t[0] def is_leaf_node(module): """ Checks if given module is a leaf module. :param module: PyTorch module :type module: `torch.nn.Module` :return: Boolean value representing whether module is a leaf. :rtype: `bool` """ return list(module.children()) == [] def magnitude_threshold(module, density): """ Computes a magnitude-based threshold for given module. :param module: PyTorch module :type module: `torch.nn.Module` :param density: Desired ratio of nonzeros to total elements :type density: `float` :return: Magnitude threshold :rtype: `float` """ params = torch.nn.utils.parameters_to_vector(module.parameters()) return T.threshold(params, density) def empty_stat_fn(model, criterion, dataloader): """ Empty model statistics function: returns loss. :param model: PyTorch model :type model: `torch.nn.Module` :param loss_fn: Loss function :param dataloader: Data loader to use :return: Tuple of loss, dictionary of statistics :rtype: `Tuple(float, dict)` """ return (loss(model, criterion, dataloader), {}) def accuracy(output, target, topk=(1, )): """ Computes the precision@k for the specified values of k :param output: Predicted output batch :type output: `torch.Tensor` :param target: Actual output batch :type target: `torch.Tensor` :param topk: Top-k value :type topk: `Tuple` :return: Model accuracy :rtype: `float` """ maxk = max(topk) batch_size = target.size(0) _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) res = [] for k in topk: correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) res.append(correct_k.mul_(100.0 / batch_size)) return res def loss(model, criterion, dataloader): """ Computes loss on given dataset. :param model: PyTorch model :type model: `torch.nn.Module` :param loss_fn: Loss function :param dataloader: Data loader to use :return: Loss :rtype: `float` """ losses = AverageMeter() model.eval() pzero = list(model.parameters())[0] if (pzero.dtype != torch.float32 and pzero.dtype != torch.float16): raise NotImplementedError('Only FP16 and FP32 weights are supported') cast2fp16 = (isinstance(pzero, torch.HalfTensor) or isinstance(pzero, torch.cuda.HalfTensor)) loss = 0. with torch.no_grad(): for input, target in dataloader: if torch.cuda.is_available(): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) if cast2fp16: input = input.half() output = model(input) loss = criterion(output, target) losses.update(to_python_float(loss.data), input.size(0)) return losses.avg def cnn_statistics(model, criterion, dataloader): """ Computes accuracy of given CNN model. :param model: PyTorch model :type model: `torch.nn.Module` :param criterion: Loss function :param dataloader: Data loader to use :return: Top-1 and Top-5 accuracies :rtype: Tuple(top1, top5) """ losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() pzero = list(model.parameters())[0] if (pzero.dtype != torch.float32 and pzero.dtype != torch.float16): raise NotImplementedError('Only FP16 and FP32 weights are supported') cast2fp16 = (isinstance(pzero, torch.HalfTensor) or isinstance(pzero, torch.cuda.HalfTensor)) loss = 0. correct = 0. with torch.no_grad(): for input, target in dataloader: if torch.cuda.is_available(): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) if cast2fp16: input = input.half() output = model(input) loss = criterion(output, target) prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(to_python_float(loss.data), input.size(0)) top1.update(to_python_float(prec1), input.size(0)) top5.update(to_python_float(prec5), input.size(0)) return (losses.avg, {'top1': top1.avg, 'top5': top5.avg}) def compressed_model_stats(w, wc): """ Retrieve various statistics for compressed model. :param w: Original model :type w: `torch.nn.Module` :param wc: Compressed model :type wc: `torch.nn.Module` :return: Dictionary of compressed model statistics :rtype: `dict` """ stats = dict() nparams_w = dict() nparams_wc = dict() nparams_w['total_nnz'] = torch.nn.utils.parameters_to_vector( w.parameters()).view(-1).nonzero().numel() nparams_wc['total_nnz'] = torch.nn.utils.parameters_to_vector( wc.parameters()).view(-1).nonzero().numel() for (name_w, m_w), (name_wc, m_wc) in zip(w.named_modules(), wc.named_modules()): if type(m_w) == torch.nn.Linear or type(m_w) == torch.nn.Conv2d: nparams_w[name_w] = torch.nn.utils.parameters_to_vector( m_w.parameters()).view(-1).nonzero().numel() nparams_wc[name_wc] = torch.nn.utils.parameters_to_vector( m_wc.parameters()).view(-1).nonzero().numel() stats['num_params'] = nparams_w stats['num_params_compressed'] = nparams_wc return stats def pretrain(epochs, model, trainloader, criterion, optimizer): """ No-frills pre-training method. :param epochs: Number of epochs :type epochs: `int` :param model: PyTorch model :type model: `torch.nn.Module` :param trainloader: Training dataloader :param criterion: Loss criterion :param optimizer: Optimizer to use """ _config = {'epochs': epochs} logging.info('[Condensa] PRETRAIN CONFIG [' + ', '.join('{!s}={!r}'.format(k, v) for k, v in _config.items()) + ']') use_cuda = torch.cuda.is_available() if use_cuda: model = model.cuda() model = torch.nn.DataParallel(model) mb_iterator = iter(trainloader) model.train() for j in range(0, epochs): if logger.isEnabledFor(logging.INFO): pbar = tqdm(total=len(trainloader), ascii=True, desc='Epoch {}'.format(j)) for input, target in trainloader: if torch.cuda.is_available(): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) input, target = Variable(input), Variable(target) optimizer.zero_grad() output = model(input) loss = criterion(output, target) loss.backward() optimizer.step() if logger.isEnabledFor(logging.INFO): pbar.update() if logger.isEnabledFor(logging.INFO): pbar.close() logging.info('') class EventTimer(object): """Simple timer class.""" def __init__(self): """Constructor. Begins timing.""" self.begin = time.perf_counter() def reset(self): """Reset timer.""" self.begin = time.perf_counter() @property def elapsed_seconds(self): """Returns elapsed seconds.""" return (time.perf_counter() - self.begin) ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = Condensa SOURCEDIR = source BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=source set BUILDDIR=build set SPHINXPROJ=Condensa if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd ================================================ FILE: docs/source/_static/ga_tracker.js ================================================ window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'UA-146596996-1'); ================================================ FILE: docs/source/conf.py ================================================ # -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath('../..')) sys.setrecursionlimit(1500) import condensa # -- Project information ----------------------------------------------------- project = u'Condensa' copyright = u'2019, NVIDIA Corporation' author = u'Saurav Muralidharan' # The short X.Y version version = '0.5' # The full version, including alpha/beta/rc tags release = '0.5-beta' # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig', 'sphinx.ext.napoleon', ] napoleon_use_ivar = True # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # if os.environ.get('READTHEDOCS') != 'True': try: import sphinx_rtd_theme except ImportError: pass # assume we have sphinx >= 1.3 else: html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'Condensadoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'Condensa.tex', u'Condensa Documentation', u'Saurav Muralidharan', 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'condensa', u'Condensa Documentation', [author], 1) ] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'Condensa', u'Condensa Documentation', author, 'Condensa', 'Programmable Model Compression', 'Miscellaneous'), ] # -- Extension configuration ------------------------------------------------- #autoclass_content = 'both' def setup(app): """ Insert Google Analytics tracker. Based on this Stackoverflow suggestion: https://stackoverflow.com/a/41885884 """ app.add_javascript("https://www.googletagmanager.com/gtag/js?id=UA-146596996-1") app.add_javascript("ga_tracker.js") ================================================ FILE: docs/source/guide/install.rst ================================================ Installation ============ Prerequisites ------------- Condensa requires: * A working Linux installation (we use Ubuntu 18.04) * NVIDIA drivers and CUDA 10+ for GPU support * Python 3.5 or newer * PyTorch 1.0 or newer Installation from Source ------------------------ Retrieve the latest source code from the Condensa repository: .. code-block:: bash git clone https://github.com/NVlabs/condensa.git Navigate to the source code directory and run the following: .. code-block:: bash pip install -r requirements.txt To check the installation, run the unit test suite: .. code-block:: bash bash run_all_tests.sh -v ================================================ FILE: docs/source/guide/usage.rst ================================================ Usage ===== The `notebooks`_ folder contains Jupyter notebooks with step-by-step walkthroughs for various usage scenarios. In particular, check out the `AlexNet Compression`_ notebook for a simple getting started guide. The `examples`_ folder contains additional, more complex examples of using Condensa. .. _notebooks: https://github.com/NVlabs/condensa/blob/master/notebooks .. _AlexNet Compression: https://github.com/NVlabs/condensa/blob/master/notebooks/alexnet.ipynb .. _examples: https://github.com/NVlabs/condensa/blob/master/examples ================================================ FILE: docs/source/index.rst ================================================ .. Condensa documentation master file, created by sphinx-quickstart on Tue Sep 4 15:17:30 2018. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Condensa Documentation ====================== Condensa is a framework for **programmable model compression** in Python. It comes with a set of built-in compression operators which may be used to compose complex compression schemes targeting specific combinations of DNN, hardware platform, and optimization objective. Common programming abstractions such as conditionals, iteration, and recursion are all natively supported. To recover any accuracy lost during compression, Condensa uses a constrained optimization formulation of model compression and employs an Augmented Lagrangian-based algorithm as the optimizer. Condensa is under active development, and bug reports, pull requests, and other feedback are all highly appreciated. .. toctree:: :maxdepth: 2 :caption: Getting Started guide/install guide/usage .. toctree:: :maxdepth: 2 :caption: Module API Reference modules/schemes modules/pi modules/compressor modules/opt modules/finetuner modules/tensor modules/functional modules/util .. toctree:: :maxdepth: 2 :caption: Notes modules/lc Indices and Tables ------------------ * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: docs/source/modules/compressor.rst ================================================ Model Compressor ================ .. autoclass:: condensa.Compressor :members: .. automethod:: __init__ ================================================ FILE: docs/source/modules/finetuner.rst ================================================ Model Fine-Tuner ================ .. autoclass:: condensa.finetune.FineTuner :members: .. automethod:: __init__ ================================================ FILE: docs/source/modules/functional.rst ================================================ Aggregation Functions ===================== .. automodule:: condensa.functional :members: ================================================ FILE: docs/source/modules/lc.rst ================================================ L-C Optimizer Usage =================== ================================================ FILE: docs/source/modules/opt.rst ================================================ Optimizers ========== .. automodule:: condensa.opt Direct Compression Optimizer ---------------------------- .. autoclass:: condensa.opt.DC :members: .. automethod:: __init__ L-C Optimizer ------------- .. autoclass:: condensa.opt.LC :members: .. automethod:: __init__ ================================================ FILE: docs/source/modules/pi.rst ================================================ Compression Operators ===================== .. automodule:: condensa.pi :members: .. automodule:: condensa.delta :members: ================================================ FILE: docs/source/modules/schemes.rst ================================================ Compression Schemes =================== .. automodule:: condensa.schemes Composition ----------- .. autoclass:: condensa.schemes.Compose :members: .. automethod:: __init__ Unstructured Pruning -------------------- .. autoclass:: condensa.schemes.Prune :members: .. automethod:: __init__ Quantization ------------ .. autoclass:: condensa.schemes.Quantize :members: .. automethod:: __init__ Neuron Pruning -------------- .. autoclass:: condensa.schemes.NeuronPrune :members: .. automethod:: __init__ Filter Pruning -------------- .. autoclass:: condensa.schemes.FilterPrune :members: .. automethod:: __init__ Structured Pruning ------------------ .. autoclass:: condensa.schemes.StructurePrune :members: .. automethod:: __init__ Block Pruning ------------- .. autoclass:: condensa.schemes.BlockPrune :members: .. automethod:: __init__ ================================================ FILE: docs/source/modules/tensor.rst ================================================ Tensor Operators ================ .. automodule:: condensa.tensor :members: ================================================ FILE: docs/source/modules/util.rst ================================================ Utilities ========= .. automodule:: condensa.util :members: ================================================ FILE: examples/cifar/compress.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import sys import argparse import logging import csv import gzip import pickle import torch import torch.nn as nn import torch.nn.parallel import torch.nn.utils import torchvision.datasets as datasets import torch.utils.data as data import torch.backends.cudnn as cudnn from torchvision import datasets, transforms import condensa from condensa import schemes import util import models if __name__ == '__main__': model_names = sorted( name for name in models.__dict__ if not name.startswith("__") and callable(models.__dict__[name])) valid_schemes = ['PRUNE', 'PQ', 'FILTER'] parser = argparse.ArgumentParser(description='CIFAR LC Compression Script') parser.add_argument('--arch', default='AlexNet', choices=model_names, help='Model architecture: ' + ' | '.join(model_names) + ' (default: alexnet)') parser.add_argument('--dataset', default='cifar10', type=str) parser.add_argument('--model', help='Pretrained model filename') parser.add_argument('--steps', type=int, help='Number of LC iterations') parser.add_argument('--scheme', choices=valid_schemes, required=True, help='Compression scheme') parser.add_argument('--density', required=True, type=float, help='Density for pruning') parser.add_argument('--align', type=int, default=None, help='Alignment for structured pruning') parser.add_argument('--l_batch_size', type=int, default=128, help='Batch size for L step') parser.add_argument('--val_batch_size', type=int, default=100, help='Validation batch size') parser.add_argument('--lr', type=float, default=0.02, help='Initial learning rate') parser.add_argument('--lr_end', type=float, default=None, help='Ending learning rate') parser.add_argument('--lr_decay', type=float, default=None, help='Learning rate decay') parser.add_argument('--lr_schedule', type=int, nargs='+', default=None, help='Decrease learning rate at these epochs.') parser.add_argument('--lr_multiplier', type=float, default=None, help='Learning rate multiplier') parser.add_argument('--momentum', type=float, default=0.95, help='SGD momentum') parser.add_argument('--weight_decay', type=float, default=0, help='SGD momentum') parser.add_argument('--mb_iterations_per_l', type=int, default=2000, help='Minibatch iterations per L step') parser.add_argument('--mb_iterations_first_l', type=int, default=10000, help='Minibatch iterations for first L step') parser.add_argument('--mu_init', type=float, default=0.001, help='Initial value of mu') parser.add_argument('--mu_multiplier', type=float, help='mu multiplier') parser.add_argument('--mu_cap', type=float, default=10000, help='mu cap') parser.add_argument('--out', default='compressed_model.pth', help='Compressed output model filename') parser.add_argument('--csv', default=None, help='compression statistics CSV file') parser.add_argument('-v', '--verbose', help='verbose logging output', action='store_true') args = parser.parse_args() logging.basicConfig( level=logging.INFO if args.verbose else logging.WARNING, format='%(message)s') if args.dataset == 'cifar10': dataset = datasets.CIFAR10 num_classes = 10 elif args.dataset == 'cifar100': dataset = datasets.CIFAR100 num_classes = 100 else: raise RuntimeError('Invalid dataset: must be CIFAR-10 or CIFAR-100') # Load model architecture if args.arch.endswith('resnet'): model = models.__dict__[args.arch](num_classes=num_classes) else: model = models.__dict__[args.arch](num_classes=num_classes) model.load_state_dict(torch.load(args.model)) if args.scheme == 'PRUNE': scheme = schemes.Prune(args.density) elif args.scheme == 'PQ': scheme = schemes.Compose( [schemes.Prune(args.density), schemes.Quantize()]) elif args.scheme == 'FILTER': scheme = schemes.FilterPrune(args.density) else: raise RuntimeError('Unknown scheme: {}'.format(args.scheme)) print('SCHEME: {}'.format(scheme)) trainloader,valloader = \ util.cifar_train_val_loader(dataset, args.l_batch_size, args.val_batch_size) testloader = util.cifar_test_loader(dataset, args.val_batch_size) # Instantiate LC optimizer sgd_params = {'momentum': args.momentum, 'weight_decay': args.weight_decay} lc = condensa.opt.LC(steps=args.steps, l_optimizer=condensa.opt.lc.SGD, l_optimizer_params=sgd_params, lr=args.lr, lr_end=args.lr_end, lr_decay=args.lr_decay, lr_schedule=args.lr_schedule, lr_multiplier=args.lr_multiplier, mb_iterations_per_l=args.mb_iterations_per_l, mb_iterations_first_l=args.mb_iterations_first_l, mu_init=args.mu_init, mu_multiplier=args.mu_multiplier, mu_cap=args.mu_cap, debugging_flags={'custom_model_statistics': condensa.util.cnn_statistics}) criterion = nn.CrossEntropyLoss().cuda() # Compress model using Condensa compressor = condensa.Compressor(lc, scheme, model, trainloader, testloader, valloader, criterion) w = compressor.run() if args.out is not None: torch.save(w.state_dict(), args.out) logging.info('[Condensa] Compressed model written to disk') print('\n==== Profiling Results ====') for k, v in compressor.statistics.items(): print(' ' + k + ':', v) print('') if args.csv is not None: with open(args.csv, 'w') as csv_file: writer = csv.writer(csv_file) for k, v in compressor.statistics.items(): row = [k] if isinstance(v, list): row += [str(x) for x in v] else: row.append(str(v)) writer.writerow(row) csv_file.close() logging.info('[Condensa] Compression stats written to disk') ================================================ FILE: examples/cifar/compress_alexnet.sh ================================================ #!/usr/bin/env bash if [[ $# -eq 0 ]]; then echo "Usage: compress_alexnet.sh [scheme] [density] [#iterations]" exit 1 fi SCHEME=${1} DENSITY=${2} STEPS=${3} PREFIX=alexnet_${SCHEME}_${DENSITY//[\.]/_} python compress.py\ --arch alexnet --dataset cifar10\ --lr 0.01 --lr_end 1e-4\ --weight_decay 0\ --momentum 0.95\ --mb_iterations_per_l 3000\ --mb_iterations_first_l 30000\ --mu_init 1e-3 --mu_multiplier 1.1\ --l_batch_size 128\ --model trained/alexnet.pth\ --scheme ${SCHEME}\ --density ${DENSITY}\ --out compressed/${PREFIX}.pth\ --csv results/${PREFIX}.csv\ -v --steps ${STEPS} ================================================ FILE: examples/cifar/finetune.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import sys import argparse import logging import csv import gzip import pickle import torch import torch.nn as nn import torch.nn.parallel import torch.nn.utils import torchvision.datasets as datasets import torch.utils.data as data import torch.backends.cudnn as cudnn from torchvision import datasets, transforms import condensa import util import models if __name__ == '__main__': model_names = sorted( name for name in models.__dict__ if not name.startswith("__") and callable(models.__dict__[name])) parser = argparse.ArgumentParser(description='CIFAR fine-tuning script') parser.add_argument('--arch', default='AlexNet', choices=model_names, help='Model architecture: ' + ' | '.join(model_names) + ' (default: alexnet)') parser.add_argument('--dataset', default='cifar10', type=str) parser.add_argument('--model', help='Pretrained model filename') parser.add_argument('--epochs', type=int, help='Number of fine-tuning epochs') parser.add_argument('--batch_size', type=int, default=128, help='Batch size for training') parser.add_argument('--val_batch_size', type=int, default=128, help='Validation batch size') parser.add_argument('--lr', type=float, default=0.1, help='Learning rate') parser.add_argument('--lr_end', type=float, default=0.01, help='Ending learning rate') parser.add_argument('--momentum', type=float, default=0.9, help='SGD momentum') parser.add_argument('--weight_decay', type=float, default=0, help='SGD weight decay') parser.add_argument('--out', default='finetuned.pth', help='Fine-tuned output model filename') parser.add_argument('-v', '--verbose', help='verbose logging output', action='store_true') args = parser.parse_args() logging.basicConfig( level=logging.INFO if args.verbose else logging.WARNING, format='%(message)s') if args.dataset == 'cifar10': dataset = datasets.CIFAR10 num_classes = 10 elif args.dataset == 'cifar100': dataset = datasets.CIFAR100 num_classes = 100 else: raise RuntimeError('Invalid dataset: must be cifar10 or cifar100') # Load model architecture if args.arch.endswith('resnet'): model = models.__dict__[args.arch](num_classes=num_classes) else: model = models.__dict__[args.arch](num_classes=num_classes) model.load_state_dict(torch.load(args.model)) # Compute #nonzeros prior to fine-tuning nparams_w = torch.nn.utils.parameters_to_vector( model.parameters()).view(-1).nonzero().numel() # Only fine-tune fully-connected and convolutional layers layer_types = [torch.nn.Linear, torch.nn.Conv2d] trainloader,valloader = \ util.cifar_train_val_loader(dataset, args.batch_size, args.val_batch_size) testloader = util.cifar_test_loader(dataset, args.val_batch_size) criterion = torch.nn.CrossEntropyLoss().cuda() ft = condensa.FineTuner(model, layer_types) w_ft = ft.run(epochs=args.epochs, lr=args.lr, lr_end=args.lr_end, momentum=args.momentum, weight_decay=args.weight_decay, criterion=criterion, trainloader=trainloader, testloader=testloader, valloader=valloader, debugging_flags={'custom_model_statistics': condensa.util.cnn_statistics}) nparams_wft = torch.nn.utils.parameters_to_vector( w_ft.parameters()).view(-1).nonzero().numel() print('#Nonzero parameters: before [{}], after [{}]'.format( nparams_w, nparams_wft)) if args.out is not None: torch.save(w_ft.state_dict(), args.out) logging.info('[Condensa] Fine-tuned model written to disk') ================================================ FILE: examples/cifar/models/__init__.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from .resnet import * from .vgg import * from .alexnet import * ================================================ FILE: examples/cifar/models/alexnet.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import import torch.nn as nn __all__ = ['alexnet'] class AlexNet(nn.Module): def __init__(self, num_classes=10): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=5), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), ) self.classifier = nn.Linear(256, num_classes) def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x def alexnet(**kwargs): model = AlexNet(**kwargs) return model ================================================ FILE: examples/cifar/models/resnet.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import import torch.nn as nn import math __all__ = ['resnet20', 'resnet56', 'resnet110'] def conv3x3(in_planes, out_planes, stride=1): "3x3 convolution with padding" return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * 4) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, block, depth, num_classes=10): super(ResNet, self).__init__() assert (depth - 2) % 6 == 0, 'depth should be 6n+2' n = (depth - 2) // 6 self.inplanes = 16 self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(16) self.relu = nn.ReLU(inplace=True) self.layer1 = self._make_layer(block, 16, n) self.layer2 = self._make_layer(block, 32, n, stride=2) self.layer3 = self._make_layer(block, 64, n, stride=2) self.avgpool = nn.AvgPool2d(8) self.fc = nn.Linear(64 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) downsample[0].condensa_nocompress = True layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.fc(x) return x def resnet20(**kwargs): return ResNet(BasicBlock, 20, **kwargs) def resnet56(**kwargs): return ResNet(Bottleneck, 56, **kwargs) def resnet110(**kwargs): return ResNet(Bottleneck, 110, **kwargs) ================================================ FILE: examples/cifar/models/vgg.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import import torch.nn as nn import math __all__ = [ 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19' ] class VGG(nn.Module): def __init__(self, features, num_classes=10): super(VGG, self).__init__() self.features = features self.classifier = nn.Linear(512, num_classes) self._initialize_weights() def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): n = m.weight.size(1) m.weight.data.normal_(0, 0.01) m.bias.data.zero_() def make_layers(cfg, batch_norm=False): layers = [] in_channels = 3 for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v return nn.Sequential(*layers) cfg = { 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], } def vgg11(**kwargs): model = VGG(make_layers(cfg['A']), **kwargs) return model def vgg11_bn(**kwargs): model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs) return model def vgg13(**kwargs): model = VGG(make_layers(cfg['B']), **kwargs) return model def vgg13_bn(**kwargs): model = VGG(make_layers(cfg['B'], batch_norm=True), **kwargs) return model def vgg16(**kwargs): model = VGG(make_layers(cfg['D']), **kwargs) return model def vgg16_bn(**kwargs): model = VGG(make_layers(cfg['D'], batch_norm=True), **kwargs) return model def vgg19(**kwargs): model = VGG(make_layers(cfg['E']), **kwargs) return model def vgg19_bn(**kwargs): model = VGG(make_layers(cfg['E'], batch_norm=True), **kwargs) return model ================================================ FILE: examples/cifar/util.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import numpy as np import torch import torch.nn as nn import torch.utils.data as data import torchvision import torchvision.transforms as transforms from torch.utils.data.sampler import SubsetRandomSampler import condensa.data def cifar_train_val_loader(dataset, train_batch_size, val_batch_size, root='./data', random_seed=42, shuffle=True): """ Splits the CIFAR training set into training and validation sets (9:1 split) and returns the corresponding data loaders. """ transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), ]) trainset = dataset(root=root, train=True, download=True, transform=transform_train) valset = dataset(root=root, train=True, download=True, transform=None) num_train = len(trainset) indices = list(range(num_train)) split = 5000 if shuffle: np.random.seed(random_seed) np.random.shuffle(indices) train_idx, val_idx = indices[split:], indices[:split] trainsampler = SubsetRandomSampler(train_idx) valsampler = SubsetRandomSampler(val_idx) meanstd = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) trainloader = condensa.data.GPUDataLoader(trainset, batch_size=train_batch_size, shuffle=False, num_workers=8, sampler=trainsampler, meanstd=meanstd) valloader = condensa.data.GPUDataLoader(valset, batch_size=val_batch_size, shuffle=False, num_workers=8, sampler=valsampler, meanstd=meanstd) return (trainloader, valloader) def cifar_test_loader(dataset, batch_size, root='./data'): """ Construct a CIFAR test dataset loader. """ testset = dataset(root=root, train=False, download=True, transform=None) meanstd = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) testloader = condensa.data.GPUDataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=8, meanstd=meanstd) return testloader ================================================ FILE: notebooks/AlexNet.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Tutorial: Compressing AlexNet with Condensa" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this tutorial, we will walk through compressing the [AlexNet neural network](https://en.wikipedia.org/wiki/AlexNet) on the CIFAR-10 dataset using Condensa. We will target two different objectives: reducing total model memory footprint, and reducing the inference latency of the compressed model. \n", "\n", "We assume that Condensa is already installed and working (check out the [Installation Guide](https://nvlabs.github.io/condensa/guide/install.html) for instructions). If you'd like to follow along by executing the code in this notebook, please also make sure that [Jupyter](https://jupyter.org/) is installed on your local system." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Defining the Network" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's start by defining the AlexNet network architecture in PyTorch as shown below:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import torch.nn as nn\n", "\n", "class AlexNet(nn.Module):\n", " def __init__(self, num_classes=10):\n", " super(AlexNet, self).__init__()\n", " self.features = nn.Sequential(\n", " nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=5),\n", " nn.ReLU(inplace=True),\n", " nn.MaxPool2d(kernel_size=2, stride=2),\n", " nn.Conv2d(64, 192, kernel_size=5, padding=2),\n", " nn.ReLU(inplace=True),\n", " nn.MaxPool2d(kernel_size=2, stride=2),\n", " nn.Conv2d(192, 384, kernel_size=3, padding=1),\n", " nn.ReLU(inplace=True),\n", " nn.Conv2d(384, 256, kernel_size=3, padding=1),\n", " nn.ReLU(inplace=True),\n", " nn.Conv2d(256, 256, kernel_size=3, padding=1),\n", " nn.ReLU(inplace=True),\n", " nn.MaxPool2d(kernel_size=2, stride=2),\n", " )\n", " self.classifier = nn.Linear(256, num_classes)\n", "\n", " def forward(self, x):\n", " x = self.features(x)\n", " x = x.view(x.size(0), -1)\n", " x = self.classifier(x)\n", " return x" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We instantiate this class into `model`:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = AlexNet()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Pre-Trained Weights" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now that we have defined the network architecture, let us load a pre-trained set of weights into the model from the `AlexNet.pth` file included with this notebook." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.load_state_dict(torch.load('AlexNet.pth'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preparing for Compression" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's make sure CUDA is enabled in PyTorch." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "assert torch.cuda.is_available()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We now create PyTorch data loaders for the training, test, and validation datasets. To save space, we wrap the data loading code into two utility functions: `cifar_train_val_loader` and `cifar_test_loader` (please refer to `util.py` in the current `notebooks` folder for the full code)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import util\n", "import torchvision.datasets as datasets" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dataset = datasets.CIFAR10\n", "\n", "trainloader,valloader = util.cifar_train_val_loader(dataset, train_batch_size=128, val_batch_size=128)\n", "testloader = util.cifar_test_loader(dataset, batch_size=128)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The utilities above split the original training set into training and validation sets (using a 9:1 split) and perform data normalization for all datasets. They also utilize Condensa's `GPUDataLoader` to enable fast data prefetching and collation." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We now define our loss criterion:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "criterion = nn.CrossEntropyLoss().cuda()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Finally, we set our logging level to `INFO` so that Condensa prints out intermediate updates." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import logging\n", "logging.basicConfig(level=logging.INFO, format='%(message)s')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Two Different Compression Strategies" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this tutorial, we will explore two different ways of compressing the AlexNet network: one targeted at reducing the total model memory footprint (named `MEM`) and the other at reducing inference runtime latency (named `FLOP`)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### MEM Scheme" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The `MEM` scheme aims to reduce the total model memory footprint (number of bytes required to store the non-zero elements of the compressed model). To this end, we perform a combination of _pruning_ (clipping model parameters to zero) and _quantization_ (using 16-bit floating point representation to store model weights instead of 32-bit). Expressing this scheme in Condensa is fairly straightforward using the built-in [`Compose`](https://nvlabs.github.io/condensa/modules/schemes.html#composition) scheme as shown below:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import condensa\n", "from condensa.schemes import Compose, Prune, Quantize\n", "\n", "MEM = Compose([Prune(0.02), Quantize(condensa.float16)])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here, the [`Compose`](https://nvlabs.github.io/condensa/modules/schemes.html#composition) operator successively applies pruning followed by quantization to the model. The pruning density, or the ratio of non-zero parameters in the compressed model to the original one, is specified as 0.02 (2%). Condensa includes a number of other common schemes, including structured and block pruning, among others. For a list of available schemes, please refer to [this page](https://nvlabs.github.io/condensa/modules/schemes.html) in the API documentation. Users may also define their own custom schemes as Python functions that invoke the compression and decompression operators available in Condensa (see [`schemes.py`](https://github.com/NVlabs/condensa/blob/master/condensa/schemes.py) for examples of how to define custom schemes)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### FLOP Scheme" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "While the `MEM` scheme is effective at reducing the number of non-zero elements in a model, this may not directly translate into improvements in actual inference runtime. Most modern CPUs and GPUs are unable to detect individual zero elements and bypass computations on them in hardware. Instead, to realize speedups on such architectures, we perform filter pruning, which removes entire filters (3D blocks) at once from convolutional layers. This enables the weight tensors to be physically reshaped in the compressed model. We call this the `FLOP` scheme in this tutorial, and use the [`FilterPrune`](https://nvlabs.github.io/condensa/modules/schemes.html#filter-pruning) scheme in Condensa to define it." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from condensa.schemes import FilterPrune\n", "FLOP = condensa.schemes.FilterPrune(0.5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setting up the Optimizer" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To recover any accuracy lost due to compression, Condensa comes with a set of _optimizers_. Each optimizer takes a pre-trained model, applies the compression scheme, and tries to recover the original accuracy either directly or iteratively. In this tutorial, we'll be using Condensa's [L-C optimizer](https://nvlabs.github.io/condensa/modules/opt.html#l-c-optimizer). We instantiate it as follows:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lc = condensa.opt.LC(steps=35, # L-C iterations\n", " l_optimizer=condensa.opt.lc.SGD, # L-step sub-optimizer\n", " l_optimizer_params={'momentum':0.95}, # L-step sub-optimizer parameters\n", " lr=0.01, # Initial learning rate\n", " lr_end=1e-4, # Final learning rate\n", " mb_iterations_per_l=3000, # Mini-batch iterations per L-step\n", " mb_iterations_first_l=30000, # Mini-batch iterations for first L-step\n", " mu_init=1e-3, # Initial value of `mu`\n", " mu_multiplier=1.1, # Multiplier for `mu`\n", " mu_cap=10000, # Maximum value of `mu`\n", " debugging_flags={'custom_model_statistics':\n", " condensa.util.cnn_statistics})\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Each optimizer in Condensa has its own set of hyper-parameters which must be specified manually by the user. A full description of hyper-parameter tuning is beyond the scope of this tutorial, but for additional information on what each hyper-parameter represents and tips on finding its optimal value, we refer you to the [Condensa paper](https://arxiv.org/abs/1911.02497). In this notebook, we run the L-C algorithm for 35 iterations using the hyper-parameter values shown above. L-C hyper-parameter values for a number of common convolutional neural networks are also included in the [`examples`](https://github.com/NVlabs/condensa/blob/master/examples/) folder." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compressing the Model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Once the optimizer is instantiated, we can go ahead and perform the actual compression using the [`Compressor`](https://nvlabs.github.io/condensa/modules/compressor.html#model-compressor) class and its [`run`](https://nvlabs.github.io/condensa/modules/compressor.html#condensa.compressor.Compressor.run) method. **Note:** the next two lines may take a while to execute!" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "compressor_MEM = condensa.Compressor(lc,\n", " MEM,\n", " model,\n", " trainloader,\n", " testloader,\n", " valloader,\n", " criterion)\n", "w_MEM = compressor_MEM.run()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "compressor_FLOP = condensa.Compressor(lc,\n", " FLOP,\n", " model,\n", " trainloader,\n", " testloader,\n", " valloader,\n", " criterion)\n", "\n", "w_FLOP = compressor_FLOP.run()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We specify the optimizer, scheme, input model, training, test, and validation sets, and the loss criterion to create an instance of the [`Compressor`](https://nvlabs.github.io/condensa/modules/compressor.html#model-compressor) class. Since the optimizer is specified as a parameter, we are able to easily experiment with alternative optimizers in Condensa.\n", "\n", "In the above snippets, `w_MEM` and `w_FLOP` contain the models compressed using the `MEM` and `FLOP` schemes, respectively. We can now save these to disk:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "torch.save(w_MEM.state_dict(), 'AlexNet_MEM.pth')\n", "torch.save(w_FLOP.state_dict(), 'AlexNet_FLOP.pth')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Condensa also records various statistics about the compression process. These can be retrieved using the `statistics` member of the compressor object as follows:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for k,v in compressor_MEM.statistics.items():\n", " print('{}: {}'.format(k, v))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for k,v in compressor_FLOP.statistics.items():\n", " print('{}: {}'.format(k, v))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We notice that Condensa achieves top-1 test accuracies of **77.49%** and **76.81%** for the `MEM` and `FLOP` schemes, respectively (compared to the baseline accuracy of **77.07%** for AlexNet). For more complex models, it is possible to further improve accuracies via [model fine-tuning](https://nvlabs.github.io/condensa/modules/finetuner.html)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Memory and Runtime Reductions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Using the `MEM` scheme, we reduce the model memory footprint by **97.83x**. Additionally, we achieve a **55.6%** reduction in FLOPs using the `FLOP` scheme." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## More Info" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We provide additional real-world compression examples targeting complex networks such as ResNet50 and VGG-19 in Condensa's [examples folder](https://github.com/NVlabs/condensa/tree/master/examples). Be sure to check them out!\n", "\n", "For more details on the design and implementation of Condensa, and its performance on real-world networks, please refer to the [Condensa paper](https://arxiv.org/abs/1911.02497)." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 } ================================================ FILE: notebooks/util.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import numpy as np import torch import torch.nn as nn import torch.utils.data as data import torchvision import torchvision.transforms as transforms from torch.utils.data.sampler import SubsetRandomSampler import condensa.data def cifar_train_val_loader(dataset, train_batch_size, val_batch_size, root='./data', random_seed=42, shuffle=True): """ Splits the CIFAR training set into training and validation sets (9:1 split) and returns the corresponding data loaders. """ transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), ]) trainset = dataset(root=root, train=True, download=True, transform=transform_train) valset = dataset(root=root, train=True, download=True, transform=None) num_train = len(trainset) indices = list(range(num_train)) split = 5000 if shuffle: np.random.seed(random_seed) np.random.shuffle(indices) train_idx, val_idx = indices[split:], indices[:split] trainsampler = SubsetRandomSampler(train_idx) valsampler = SubsetRandomSampler(val_idx) meanstd = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) trainloader = condensa.data.GPUDataLoader(trainset, batch_size=train_batch_size, shuffle=False, num_workers=8, sampler=trainsampler, meanstd=meanstd) valloader = condensa.data.GPUDataLoader(valset, batch_size=val_batch_size, shuffle=False, num_workers=8, sampler=valsampler, meanstd=meanstd) return (trainloader, valloader) def cifar_test_loader(dataset, batch_size, root='./data'): """ Construct a CIFAR test dataset loader. """ testset = dataset(root=root, train=False, download=True, transform=None) meanstd = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) testloader = condensa.data.GPUDataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=8, meanstd=meanstd) return testloader ================================================ FILE: run_all_tests.sh ================================================ #!/bin/bash VERBOSE=0 if [[ $1 == "-v" ]] || [[ $1 == "--verbose" ]]; then VERBOSE=1 fi for f in $(find test -name '*.py'); do if [[ $VERBOSE -eq 1 ]]; then echo "[Condensa Test] $f" fi python3 $f done ================================================ FILE: setup.cfg ================================================ [metadata] description-file = README.md ================================================ FILE: setup.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os from setuptools import setup from setuptools import find_packages cwd = os.path.dirname(os.path.abspath(__file__)) version = '0.5.0-beta' def build_deps(): version_path = os.path.join(cwd, 'condensa', 'version.py') with open(version_path, 'w') as f: f.write("__version__ = '{}'\n".format(version)) build_deps() with open(os.path.join(cwd, 'README.md'), encoding='utf-8') as f: long_description = f.read() install_requires = ['numpy', 'torch>=1.0.0', 'tqdm'] setup(name='condensa', version=version, description='Condensa Programmable Model Compression Framework', long_description=long_description, long_description_content_type='text/markdown', url='https://github.com/NVLabs/condensa', author='Saurav Muralidharan', author_email='sauravm@nvidia.com', license='Apache License 2.0', keywords=['compression', 'quantization', 'pruning'], install_requires=install_requires, packages=find_packages(), classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', 'Topic :: Software Development :: Build Tools', 'License :: OSI Approved :: Apache Software License', 'Programming Language :: Python :: 3', 'Topic :: Software Development :: Libraries', 'Topic :: Scientific/Engineering :: Artificial Intelligence' ], ) ================================================ FILE: test/schemes/test_prune.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch import condensa import condensa.schemes as schemes import condensa.tensor as T import condensa.functional as F def test_prune(device): fc = torch.nn.Linear(100, 10, bias=True).to(device) scheme = schemes.Prune(0.5) threshold = scheme.threshold(fc) scheme.pi(fc) t = fc.weight.data.abs().view(-1) nzs = torch.index_select(t, 0, t.nonzero().view(-1)) assert (nzs >= threshold).all() def test_filter_prune(device): conv = torch.nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=5, bias=True).to(device) criteria = F.l2norm scheme = schemes.FilterPrune(0.5, criteria=criteria, prune_bias=True) threshold = scheme.threshold(conv) scheme.pi(conv) # Check against threshold agg = T.aggregate_filters(conv.weight.data, criteria).view(-1) nzs = torch.index_select(agg, 0, agg.nonzero().view(-1)) assert (nzs >= threshold).all() # Check biases: all zero filters must have corresponding zero biases zero_indices = (agg == 0).nonzero().view(-1) z = torch.index_select(conv.bias.data, 0, zero_indices) assert (z == 0.).all() def test_neuron_prune(device): fc = torch.nn.Linear(100, 10, bias=True).to(device) criteria = F.l2norm scheme = schemes.NeuronPrune(0.5, criteria=criteria, prune_bias=True) threshold = scheme.threshold(fc) scheme.pi(fc) # Check against threshold agg = T.aggregate_neurons(fc.weight.data, criteria).view(-1) nzs = torch.index_select(agg, 0, agg.nonzero().view(-1)) assert (nzs >= threshold).all() # Check biases: all zero neurons must have corresponding zero biases zero_indices = (agg == 0).nonzero().view(-1) z = torch.index_select(fc.bias.data, 0, zero_indices) assert (z == 0.).all() def test_block_prune(device, blocksize=(10,10)): fc = torch.nn.Linear(100, 100, bias=False).to(device) criteria = F.l2norm scheme = schemes.BlockPrune(0.5, criteria=criteria, block_size=blocksize) threshold = scheme.threshold(fc) scheme.pi(fc) # Check against threshold agg = T.aggregate(fc.weight.data, blocksize, criteria).view(-1) nzs = torch.index_select(agg, 0, agg.nonzero().view(-1)) assert (nzs >= threshold).all() if __name__ == '__main__': test_prune('cpu') test_filter_prune('cpu') test_neuron_prune('cpu') test_block_prune('cpu') if torch.cuda.is_available(): test_prune('cuda:0') test_filter_prune('cuda:0') test_neuron_prune('cuda:0') test_block_prune('cuda:0') ================================================ FILE: test/schemes/test_qz.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch import condensa from condensa import schemes def test_float16(device): scheme = schemes.Quantize(condensa.float16) fc = torch.nn.Linear(100, 10).float().to(device) scheme.pi(fc) assert fc.weight.dtype == torch.float16 scheme.delta(fc) assert fc.weight.dtype == torch.float32 if __name__ == '__main__': test_float16('cpu') if torch.cuda.is_available(): test_float16('cpu') ================================================ FILE: test/tensor/test_mask_apply.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import torch import condensa.tensor as T def test_apply_mask(device): a = torch.randn(20).to(device) threshold = T.threshold(a, 0.3) mask = T.simple_mask(a, threshold) T.apply_mask_inplace(a, mask) for i in range(len(a)): assert a[i] == 0. or abs(a[i]) >= threshold if __name__ == '__main__': test_apply_mask('cpu') if torch.cuda.is_available(): test_apply_mask('cuda:0') ================================================ FILE: test/tensor/test_maskgen.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import torch import condensa.tensor as T def test_simple_mask(device): a = torch.randn(20).to(device) threshold = T.threshold(a, 0.3) mask = T.simple_mask(a, threshold) for i in range(len(a)): if abs(a[i]) >= threshold: assert mask[i] == 1 else: assert mask[i] == 0 def test_block_mask(device): pass if __name__ == '__main__': test_simple_mask('cpu') test_block_mask('cpu') if torch.cuda.is_available(): test_simple_mask('cuda:0') test_block_mask('cuda:0') ================================================ FILE: test/tensor/test_util.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import torch import condensa.tensor as T def test_density(device): zeros = torch.zeros(10).to(device) ones = torch.ones(30).to(device) assert T.density(zeros) == 0. assert T.density(ones) == 1. assert T.density(torch.cat((zeros, ones))) == 0.75 def test_sparsity(device): zeros = torch.zeros(10).to(device) ones = torch.ones(30).to(device) assert T.sparsity(zeros) == 1. assert T.sparsity(ones) == 0. assert T.sparsity(torch.cat((zeros, ones))) == 0.25 def test_threshold(device): a = torch.IntTensor(np.arange(0, 30)).to(device) threshold2 = T.threshold(a, 0.2) threshold3 = T.threshold(a, 0.3) threshold5 = T.threshold(a, 0.5) assert threshold2.item() == 24 assert threshold3.item() == 21 assert threshold5.item() == 15 if __name__ == '__main__': test_density('cpu') test_sparsity('cpu') test_threshold('cpu') if torch.cuda.is_available(): test_density('cpu') test_sparsity('cpu') test_threshold('cpu') ================================================ FILE: test/test_lr.py ================================================ # Copyright 2019 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import condensa import condensa.lr as lr def test_interval_lr(): schedule = lr.IntervalLR(1., 1e-6, 100) assert schedule.learning_rate == 1. for i in range(0, 100): schedule.step() assert np.isclose(schedule.learning_rate, 1e-6) def test_decayed_lr(): schedule = lr.DecayedLR(100.0, [10, 20], gamma=0.1) for i in range(0, 30): schedule.step() if i == 10: assert schedule.learning_rate == 10.0 elif i == 20: assert schedule.learning_rate == 1.0 def test_exp_decayed_lr(): schedule = lr.ExpDecayedLR(1.0, 0.1) for i in range(0, 100): schedule.step() assert schedule.learning_rate == 1.0 * (0.1**100) if __name__ == '__main__': test_interval_lr() test_decayed_lr() test_exp_decayed_lr()