Repository: nibuiro/CondConv-pytorch Branch: master Commit: 888fd03b5e1e Files: 6 Total size: 12.5 KB Directory structure: gitextract_okqmngl0/ ├── LICENSE ├── README.md ├── condconv/ │ ├── __init__.py │ └── condconv.py ├── requirements.txt └── setup.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2018 mishimori Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # CondConv Implementation of [CondConv: Conditionally Parameterized Convolutions for Efficient Inference](https://arxiv.org/abs/1904.04971) in PyTorch. ## Abstract Convolutional layers are one of the basic building blocks of modern deep neural networks. One fundamental assumption is that convolutional kernels should be shared for all examples in a dataset. We propose conditionally parameterized convolutions (CondConv), which learn specialized convolutional kernels for each example. Replacing normal convolutions with CondConv enables us to increase the size and capacity of a network, while maintaining efficient inference. We demonstrate that scaling networks with CondConv improves the performance and inference cost trade-off of several existing convolutional neural network architectures on both classification and detection tasks. On ImageNet classification, our CondConv approach applied to EfficientNet-B0 achieves state-ofthe-art performance of 78.3% accuracy with only 413M multiply-adds. Code and checkpoints for the CondConv Tensorflow layer and CondConv-EfficientNet models are available at: https://github.com/tensorflow/tpu/tree/master/ models/official/efficientnet/condconv. ## Installation pip install git+https://github.com/nibuiro/CondConv-pytorch.git ## Usage For 2D inputs (CondConv2D): ```python import torch from condconv import CondConv2D class Model(nn.Module): def __init__(self, num_experts): super(Model, self).__init__() self.condconv2d = CondConv2D(10, 128, kernel_size=1, num_experts=num_experts, dropout_rate=dropout_rate) def forward(self, x): x = self.condconv2d(x) ``` ## Reference [Yang et al., 2019] CondConv: Conditionally Parameterized Convolutions for Efficient Inference ================================================ FILE: condconv/__init__.py ================================================ from .condconv import CondConv1D, CondConv2D __all__ = ['CondConv1D', 'CondConv2D'] ================================================ FILE: condconv/condconv.py ================================================ import functools import torch from torch import nn import torch.nn.functional as F from torch.nn.modules.conv import _ConvNd from torch.nn.modules.utils import _pair from torch.nn.parameter import Parameter class _routing(nn.Module): def __init__(self, in_channels, num_experts, dropout_rate): super(_routing, self).__init__() self.dropout = nn.Dropout(dropout_rate) self.fc = nn.Linear(in_channels, num_experts) def forward(self, x): x = torch.flatten(x) x = self.dropout(x) x = self.fc(x) return F.sigmoid(x) class CondConv2D(_ConvNd): r"""Learn specialized convolutional kernels for each example. As described in the paper `CondConv: Conditionally Parameterized Convolutions for Efficient Inference`_ , conditionally parameterized convolutions (CondConv), which challenge the paradigm of static convolutional kernels by computing convolutional kernels as a function of the input. Args: in_channels (int): Number of channels in the input image out_channels (int): Number of channels produced by the convolution kernel_size (int or tuple): Size of the convolving kernel stride (int or tuple, optional): Stride of the convolution. Default: 1 padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 padding_mode (string, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` dilation (int or tuple, optional): Spacing between kernel elements. Default: 1 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` num_experts (int): Number of experts per layer Shape: - Input: :math:`(N, C_{in}, H_{in}, W_{in})` - Output: :math:`(N, C_{out}, H_{out}, W_{out})` where .. math:: H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor .. math:: W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor Attributes: weight (Tensor): the learnable weights of the module of shape :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},` :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]})`. The values of these weights are sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}` bias (Tensor): the learnable bias of the module of shape (out_channels). If :attr:`bias` is ``True``, then the values of these weights are sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}` .. _CondConv: Conditionally Parameterized Convolutions for Efficient Inference: https://arxiv.org/abs/1904.04971 """ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', num_experts=3, dropout_rate=0.2): kernel_size = _pair(kernel_size) stride = _pair(stride) padding = _pair(padding) dilation = _pair(dilation) super(CondConv2D, self).__init__( in_channels, out_channels, kernel_size, stride, padding, dilation, False, _pair(0), groups, bias, padding_mode) self._avg_pooling = functools.partial(F.adaptive_avg_pool2d, output_size=(1, 1)) self._routing_fn = _routing(in_channels, num_experts, dropout_rate) self.weight = Parameter(torch.Tensor( num_experts, out_channels, in_channels // groups, *kernel_size)) self.reset_parameters() def _conv_forward(self, input, weight): if self.padding_mode != 'zeros': return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode), weight, self.bias, self.stride, _pair(0), self.dilation, self.groups) return F.conv2d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) def forward(self, inputs): b, _, _, _ = inputs.size() res = [] for input in inputs: input = input.unsqueeze(0) pooled_inputs = self._avg_pooling(input) routing_weights = self._routing_fn(pooled_inputs) kernels = torch.sum(routing_weights[: ,None, None, None, None] * self.weight, 0) out = self._conv_forward(input, kernels) res.append(out) return torch.cat(res, dim=0) class CondConv1D(_ConvNd): r"""Learn specialized convolutional kernels for each example. As described in the paper `CondConv: Conditionally Parameterized Convolutions for Efficient Inference`_ , conditionally parameterized convolutions (CondConv), which challenge the paradigm of static convolutional kernels by computing convolutional kernels as a function of the input. Args: in_channels (int): Number of channels in the input image out_channels (int): Number of channels produced by the convolution kernel_size (int): Size of the convolving kernel stride (int, optional): Stride of the convolution. Default: 1 padding (int, optional): Zero-padding added to both sides of the input. Default: 0 padding_mode (string, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` dilation (int, optional): Spacing between kernel elements. Default: 1 groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True`` num_experts (int): Number of experts per layer Shape: - Input: :math:`(N, C_{in}, L_{in})` - Output: :math:`(N, C_{out}, L_{out})` Attributes: weight (Tensor): the learnable weights of the module of shape :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}}, \text{kernel\_size})`. The values of these weights are sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where :math:`k = \frac{groups}{C_\text{in} * \text{kernel\_size}}` bias (Tensor): the learnable bias of the module of shape (out_channels). If :attr:`bias` is ``True``, then the values of these weights are sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where :math:`k = \frac{groups}{C_\text{in} * \text{kernel\_size}}` .. _CondConv: Conditionally Parameterized Convolutions for Efficient Inference: https://arxiv.org/abs/1904.04971 """ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', num_experts=3, dropout_rate=0.2): kernel_size = (kernel_size,) stride = (stride,) padding = (padding,) dilation = (dilation,) super(CondConv1D, self).__init__( in_channels, out_channels, kernel_size, stride, padding, dilation, False, (0,), groups, bias, padding_mode) self._avg_pooling = functools.partial(F.adaptive_avg_pool1d, output_size=1) self._routing_fn = _routing(in_channels, num_experts, dropout_rate) self.weight = Parameter(torch.Tensor( num_experts, out_channels, in_channels // groups, *kernel_size)) self.reset_parameters() def _conv_forward(self, input, weight): if self.padding_mode != 'zeros': return F.conv1d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode), weight, self.bias, self.stride, 0, self.dilation, self.groups) return F.conv1d(input, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) def forward(self, inputs): b, _, _ = inputs.size() res = [] for input in inputs: input = input.unsqueeze(0) pooled_inputs = self._avg_pooling(input) routing_weights = self._routing_fn(pooled_inputs) kernels = torch.sum(routing_weights[: ,None, None, None] * self.weight, 0) out = self._conv_forward(input, kernels) res.append(out) return torch.cat(res, dim=0) ================================================ FILE: requirements.txt ================================================ torch>=0.4.1 ================================================ FILE: setup.py ================================================ from setuptools import setup, find_packages with open('requirements.txt', encoding='utf-8') as f: required = f.read().splitlines() with open('README.md', encoding='utf-8') as f: long_description = f.read() setup( name='condconv', version='1.0.0', packages=find_packages(), long_description=long_description, long_description_content_type='text/markdown', install_requires=required, url='https://github.com/nibuiro/CondConv-pytorch', license='MIT', author='nibuiro', author_email='immay1999@gmail.com', description='Implementation of condconv: Conditionally Parameterized Convolutions for Efficient Inference. ' )