Repository: pathak22/unsupervised-video
Branch: master
Commit: 10780c7a3cb9
Files: 11
Total size: 32.2 KB
Directory structure:
gitextract_o89s89cr/
├── .gitignore
├── LICENSE
├── README.md
├── image_transform_layer.py
├── models/
│ ├── download_caffe_models.sh
│ ├── download_torch_models.sh
│ └── download_torch_motion_model.sh
└── motionseg/
├── DeepMaskAlexNet.lua
├── SpatialSymmetricPadding.lua
├── load_motionmodel.lua
└── utilsModel.lua
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
models/*.tar.gz
models/caffemodels/
models/torchmodels/
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2017 Deepak Pathak
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
## Learning Features by Watching Objects Move ##
In CVPR 2017. [[Project Website]](http://cs.berkeley.edu/~pathak/unsupervised_video/).
[Deepak Pathak](https://people.eecs.berkeley.edu/~pathak/), [Ross Girshick](http://www.rossgirshick.info/), [Piotr Dollár](https://pdollar.github.io/), [Trevor Darrell](https://people.eecs.berkeley.edu/~trevor/), [Bharath Hariharan](http://home.bharathh.info/)
University of California, Berkeley
Facebook AI Research (FAIR)
This is the code for our [CVPR 2017 paper on Unsupervised Learning using unlabeled videos](http://cs.berkeley.edu/~pathak/unsupervised_video/). This repository contains models trained by the unsupervised motion grouping algorithm both in Caffe and Torch. If you find this work useful in your research, please cite:
@inproceedings{pathakCVPR17learning,
Author = {Pathak, Deepak and Girshick, Ross and Doll\'{a}r,
Piotr and Darrell, Trevor and Hariharan, Bharath},
Title = {Learning Features by Watching Objects Move},
Booktitle = {Computer Vision and Pattern Recognition ({CVPR})},
Year = {2017}
}
### 1) Fetching Models for Unsupervised Transfer
The models below only contains the layer that are used for unsupervised transfer learning. For the full model that contains motion segmentation, see next section.
1. Clone the repository
```Shell
git clone https://github.com/pathak22/unsupervised-video.git
```
2. Fetch caffe models
```Shell
cd unsupervised-video/
bash ./models/download_caffe_models.sh
# This will populate the `./models/` folder with trained models.
```
The models were initially trained in Torch and then converted to caffe. Hence, please include pycaffe based `image_transform_layer.py` in your folder. It converts the scale and mean of the input image as needed.
3. Fetch torch models
```Shell
cd unsupervised-video/
bash ./models/download_torch_models.sh
# This will populate the `./models/` folder with trained models.
```
### 2) Fetching Motion Segmentation models
Follow the instructions below to download full motion segmentation model trained on the automatically selected 205K videos from YFCC100m. I trained it in Torch, but you can train your own model from the full data [available here](https://people.eecs.berkeley.edu/~pathak/unsupervised_video/index.html#data) in any deep learning package using the training details from paper.
```Shell
cd unsupervised-video/
bash ./models/download_torch_motion_model.sh
# This will populate the `./models/` folder with trained model.
cd motionseg/
th load_motionmodel.lua -input ../models/motionSegmenter_fullModel.t7
```
### 3) Additional Software Packages
We are releasing software packages which were developed in the project, but could be generally useful for computer vision research. If you find them useful, please consider citing our work. These include:
(a) uNLC [github]: Implementation of unsupervised bottom-up video segmentation algorithm which is unsupervised adaptation of NLC algorithm by Faktor and Irani, BMVC 2014. For additional details, see section 5.1 in the paper.
(b) PyFlow [github]: This is python wrapper around Ce Liu's C++ implementation of Coarse2Fine Optical Flow. This is used inside uNLC implementation, and also generally useful as an independent package.
================================================
FILE: image_transform_layer.py
================================================
"""
Transform images for compatibility with models trained with
https://github.com/facebook/fb.resnet.torch.
Usage in model prototxt:
layer {
name: 'data_xform'
type: 'Python'
bottom: 'data_caffe'
top: 'data'
python_param {
module: 'image_transform_layer'
layer: 'TorchImageTransformLayer'
}
}
"""
import caffe
import numpy as np
class TorchImageTransformLayer(caffe.Layer):
def setup(self, bottom, top):
# (1, 3, 1, 1) shaped arrays
self.PIXEL_MEANS = \
np.array([[[[0.485]],
[[0.456]],
[[0.406]]]])
self.PIXEL_STDS = \
np.array([[[[0.229]],
[[0.224]],
[[0.225]]]])
top[0].reshape(*(bottom[0].shape))
def forward(self, bottom, top):
ims = bottom[0].data
# 1. Permute BGR to RGB and normalize to [0, 1]
ims = ims[:, [2, 1, 0], :, :] / 255.0
# 2. Remove channel means
ims -= self.PIXEL_MEANS
# 3. Standardize channels
ims /= self.PIXEL_STDS
top[0].reshape(*(ims.shape))
top[0].data[...] = ims
def backward(self, top, propagate_down, bottom):
"""This layer does not propagate gradients."""
pass
def reshape(self, bottom, top):
"""Reshaping happens during the call to forward."""
pass
================================================
FILE: models/download_caffe_models.sh
================================================
#!/bin/bash
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/" && pwd )"
cd $DIR
FILE=caffemodels.tar.gz
URL=https://dl.fbaipublicfiles.com/unsupervised-video/$FILE
CHECKSUM=29e4a50f4fc77b0563a201f28577a895
if [ ! -f $FILE ]; then
echo "Downloading the unsupervised video caffemodels (829MB)..."
wget $URL -O $FILE
echo "Unzipping..."
tar zxvf $FILE
echo "Downloading Done."
else
echo "File already exists. Checking md5..."
fi
os=`uname -s`
if [ "$os" = "Linux" ]; then
checksum=`md5sum $FILE | awk '{ print $1 }'`
elif [ "$os" = "Darwin" ]; then
checksum=`cat $FILE | md5`
elif [ "$os" = "SunOS" ]; then
checksum=`digest -a md5 -v $FILE | awk '{ print $4 }'`
fi
if [ "$checksum" = "$CHECKSUM" ]; then
echo "Checksum is correct. File was correctly downloaded."
exit 0
else
echo "Checksum is incorrect. DELETE and download again."
fi
================================================
FILE: models/download_torch_models.sh
================================================
#!/bin/bash
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/" && pwd )"
cd $DIR
FILE=torchmodels.tar.gz
URL=https://dl.fbaipublicfiles.com/unsupervised-video/$FILE
CHECKSUM=6ead77d7b387b51426ccc5d3c95f78bb
if [ ! -f $FILE ]; then
echo "Downloading the unsupervised video torchmodels (803MB)..."
wget $URL -O $FILE
echo "Unzipping..."
tar zxvf $FILE
echo "Downloading Done."
else
echo "File already exists. Checking md5..."
fi
os=`uname -s`
if [ "$os" = "Linux" ]; then
checksum=`md5sum $FILE | awk '{ print $1 }'`
elif [ "$os" = "Darwin" ]; then
checksum=`cat $FILE | md5`
elif [ "$os" = "SunOS" ]; then
checksum=`digest -a md5 -v $FILE | awk '{ print $4 }'`
fi
if [ "$checksum" = "$CHECKSUM" ]; then
echo "Checksum is correct. File was correctly downloaded."
exit 0
else
echo "Checksum is incorrect. DELETE and download again."
fi
================================================
FILE: models/download_torch_motion_model.sh
================================================
#!/bin/bash
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/" && pwd )"
cd $DIR
FILE=torchmodels_motion.tar.gz
URL=https://dl.fbaipublicfiles.com/unsupervised-video/$FILE
CHECKSUM=497efcdf10630cf6fd83d9b367765934
if [ ! -f $FILE ]; then
echo "Downloading the unsupervised video motion segmentation torchmodel (238MB)..."
wget $URL -O $FILE
echo "Unzipping..."
tar zxvf $FILE
echo "Downloading Done."
else
echo "File already exists. Checking md5..."
fi
os=`uname -s`
if [ "$os" = "Linux" ]; then
checksum=`md5sum $FILE | awk '{ print $1 }'`
elif [ "$os" = "Darwin" ]; then
checksum=`cat $FILE | md5`
elif [ "$os" = "SunOS" ]; then
checksum=`digest -a md5 -v $FILE | awk '{ print $4 }'`
fi
if [ "$checksum" = "$CHECKSUM" ]; then
echo "Checksum is correct. File was correctly downloaded."
exit 0
else
echo "Checksum is incorrect. DELETE and download again."
fi
================================================
FILE: motionseg/DeepMaskAlexNet.lua
================================================
--[[ DeepMask model:
When initialized, it creates/load the common trunk, the maskBranch and the
scoreBranch or the colorBranch or the flowBranch.
---- deepmask class members:
-- self.trunk: the common trunk (pre-trained resnet50)
-- self.maskBranch: the mask head architecture
-- self.scoreBranch: the score head architecture
-- self.colorBranch: the colorization head architecture
-- self.flowBranch: the flow head architecture
]]
require 'nn'
require 'nnx'
require 'cunn'
require 'cudnn'
local utils = paths.dofile('utilsModel.lua')
paths.dofile('SpatialSymmetricPadding.lua')
local DeepMask,_ = torch.class('nn.DeepMask','nn.Container')
-- function: conv2linear
local function linear2conv(x)
if torch.typename(x):find('Linear') then
-- hard-coding for fc6 and fc7: kSz=kernelSize=inputFeatureMapSize
local kSz = x.weight:size(2) > 5000 and 6 or 1
local nInp = x.weight:size(2)/(kSz*kSz)
local nOut = x.weight:size(1)
local w = torch.reshape(x.weight,nOut,nInp,kSz,kSz)
local y = cudnn.SpatialConvolution(nInp,nOut,kSz,kSz,1,1,0,0)
y.weight:copy(w)
y.gradWeight:copy(w)
if x.bias~=nil then
y.bias:copy(x.bias)
y.gradBias:copy(x.gradBias)
end
return y
elseif torch.typename(x):find('cudnn.BatchNormalization') then
x.nDim = 4
return x
else
return x
end
end
--------------------------------------------------------------------------------
-- function: constructor
function DeepMask:__init(config)
self.color = config.color
self.flow = config.flow
if config.noFC then
print('| create AlexNet (w/o FCs) Trunk')
else
print('| create AlexNet (including FCs) Trunk')
end
if config.symmPad then
print('| using symmetric padding')
else
print('| no symmetric padding')
end
if config.centralCrop then
print('| using central cropping')
else
print('| no central cropping')
end
if config.bottleneck then
print('| using bottleneck')
else
print('| no bottleneck')
end
-- create common trunk
self:createTrunk(config)
local npt = 0
local p1 = self.trunk:parameters()
for k,v in pairs(p1) do npt = npt+v:nElement() end
print(string.format('| number of paramaters trunk: %d', npt))
if self.flow then
-- create flow head
self:createFlowBranch(config)
local p5, npf = self.flowBranch:parameters(), 0
for k,v in pairs(p5) do npf = npf+v:nElement() end
print(string.format('| number of paramaters flow branch: %d', npf))
print(string.format('| number of paramaters total: %d', npt+npf))
return
end
-- create mask head
self:createMaskBranch(config)
local npm = 0
local p2 = self.maskBranch:parameters()
for k,v in pairs(p2) do npm = npm+v:nElement() end
print(string.format('| number of paramaters mask branch: %d', npm))
if self.color then
-- create colorization head
self:createColorBranch(config)
local p4, npc = self.colorBranch:parameters(), 0
for k,v in pairs(p4) do npc = npc+v:nElement() end
print(string.format('| number of paramaters color branch: %d', npc))
print(string.format('| number of paramaters total: %d', npt+npm+npc))
else
-- create score head
self:createScoreBranch(config)
local p3, nps = self.scoreBranch:parameters(), 0
for k,v in pairs(p3) do nps = nps+v:nElement() end
print(string.format('| number of paramaters score branch: %d', nps))
print(string.format('| number of paramaters total: %d', npt+nps+npm))
end
end
--------------------------------------------------------------------------------
-- function: create common trunk
function DeepMask:createTrunk(config)
-- size of feature maps at end of trunk
if config.padAlexNet then
if config.iSz==180 then
-- self.fSz = config.noFC and 12 or 5 -- alexnet_padded w/o dilation
self.fSz = 12 -- alexnet_padded w/ dilation
else
print('Unknown size setting !! Cant create AlexNet trunk')
os.exit()
end
else
-- iSz=227 ; for w/ FC
-- iSz=179 ; for w/o FC
if config.iSz==160 then
self.fSz = config.noFC and 8 or -1
elseif config.iSz==179 then
self.fSz = config.noFC and 10 or -1
elseif config.iSz==227 then
self.fSz = config.noFC and 13 or 1
else
print('Unknown size setting !! Cant create AlexNet trunk')
os.exit()
end
end
self.channels = config.noFC and 128 or 4096
self.bottleneck = self.channels*self.fSz*self.fSz
-- load trunk
local trunk
print(' | creating trunk:')
if #config.useImagenet > 0 then
print(string.format(' | using Imagenet pre-trained AlexNet: %s',
config.useImagenet))
trunk = torch.load(config.useImagenet)
-- Format of sgross's old fb.resnet training code
if trunk.state ~= nil then
trunk = trunk.state.network
end
-- remove DataParallelTable
if torch.type(trunk) == 'nn.DataParallelTable' then
trunk = trunk:get(1)
end
if config.useBN then
print(' | keeping BatchNorm in pre-trained model (if present)')
else
print(' | fixing BatchNorm in pre-trained model (if present)')
utils.BNtoFixed(trunk, true)
end
elseif config.useBN then
print(' | using AlexNet with BatchNorm from scratch !')
local alexnet = paths.dofile('./models/alexnetbn.lua')
trunk = alexnet()
else
print(' | using AlexNet without BatchNorm from scratch !')
local alexnet = config.padAlexNet and paths.dofile(
'./models/alexnet_padded.lua') or paths.dofile('./models/alexnet.lua')
trunk = alexnet()
end
-- print(' | loaded trunk model:')
-- print(trunk)
-- remove fc8
trunk:remove();
if config.noFC then
-- remove fc7
trunk:remove();trunk:remove();
if torch.typename(trunk.modules[#trunk.modules]):find('BatchNorm') then
trunk:remove();
end
trunk:remove();
-- remove fc6
trunk:remove();trunk:remove();
if torch.typename(trunk.modules[#trunk.modules]):find('BatchNorm') then
trunk:remove();
end
trunk:remove();
if torch.typename(trunk.modules[#trunk.modules]):find('View') then
trunk:remove();
end
-- remove pool5
trunk:remove();
-- crop central pad : see DataSamplerCoco.wSz
if config.centralCrop then
trunk:add(nn.SpatialZeroPadding(-1,-1,-1,-1))
end
-- add common extra layers
trunk:add(cudnn.SpatialConvolution(256,128,1,1,1,1))
if config.useBN then
trunk:add(cudnn.SpatialBatchNormalization(128))
end
trunk:add(nn.ReLU(true))
else
if #config.useImagenet > 0 then
print(' | FC to Conv conversion in pre-trained model')
local startFCLayer = 16
if config.useBN then
startFCLayer = 19
end
local j=startFCLayer
for i=startFCLayer,#trunk.modules do
if not torch.typename(trunk.modules[i]):find('View') then
trunk.modules[j] = linear2conv(trunk.modules[i])
j=j+1
end
end
for j=j,#trunk.modules do
trunk:remove()
end
end
-- crop central pad : see DataSamplerCoco.wSz
if config.centralCrop then
trunk:add(nn.SpatialZeroPadding(-1,-1,-1,-1))
end
end
-- trunk:add(nn.View(config.batch,self.bottleneck))
-- low-rank bottleneck
if config.bottleneck then
trunk:add(nn.Linear(self.bottleneck,512))
if config.useBN then
trunk:add(cudnn.BatchNormalization(512))
end
self.bottleneck = 512
end
-- mirrorPadding
if config.symmPad then
utils.updatePadding(trunk, nn.SpatialSymmetricPadding)
end
self.trunk = trunk:cuda()
print(' | finalized trunk model:')
print(trunk)
return trunk
end
--------------------------------------------------------------------------------
-- function: create mask branch
function DeepMask:createMaskBranch(config)
local maskBranch = nn.Sequential()
-- maskBranch
if not config.bottleneck then
maskBranch:add(nn.View(config.batch,self.bottleneck))
end
maskBranch:add(nn.Linear(self.bottleneck,config.oSz*config.oSz))
self.maskBranch = nn.Sequential():add(maskBranch:cuda())
-- upsampling layer
if config.gSz > config.oSz then
local upSample = nn.Sequential()
upSample:add(nn.Copy('torch.CudaTensor','torch.FloatTensor'))
upSample:add(nn.View(config.batch,config.oSz,config.oSz))
upSample:add(nn.SpatialReSamplingEx{owidth=config.gSz,oheight=config.gSz,
mode='bilinear'})
upSample:add(nn.View(config.batch,config.gSz*config.gSz))
upSample:add(nn.Copy('torch.FloatTensor','torch.CudaTensor'))
self.maskBranch:add(upSample)
end
print(' | finalized mask model:')
print(self.maskBranch)
return self.maskBranch
end
--------------------------------------------------------------------------------
-- function: create score branch
function DeepMask:createScoreBranch(config)
local scoreBranch = nn.Sequential()
if not config.bottleneck then
scoreBranch:add(nn.View(config.batch,self.bottleneck))
end
scoreBranch:add(nn.Dropout(.5))
scoreBranch:add(nn.Linear(self.bottleneck,1024))
if config.useBN then
scoreBranch:add(cudnn.BatchNormalization(1024))
end
scoreBranch:add(nn.Threshold(0, 1e-6))
scoreBranch:add(nn.Dropout(.5))
scoreBranch:add(nn.Linear(1024,1))
self.scoreBranch = scoreBranch:cuda()
print(' | finalized score model:')
print(self.scoreBranch)
return self.scoreBranch
end
--------------------------------------------------------------------------------
-- function: create colorization branch
function DeepMask:createColorBranch(config)
if config.bottleneck then
print('config.bottleneck in trunk is not supported with Color Task !!')
os.exit()
end
local colorBranch = nn.Sequential()
colorBranch:add(nn.SpatialFullConvolution(self.channels,256,4,4,2,2,1,1))
colorBranch:add(nn.ReLU(true))
colorBranch:add(cudnn.SpatialConvolution(256,313,3,3,1,1,1,1))
colorBranch:add(nn.SpatialUpSamplingBilinear({oheight=config.cgSz,
owidth=config.cgSz}))
self.colorBranch = colorBranch:cuda()
print(' | finalized color model:')
print(self.colorBranch)
return self.colorBranch
end
--------------------------------------------------------------------------------
-- function: create flow branch
function DeepMask:createFlowBranch(config)
if config.bottleneck then
print('config.bottleneck in trunk is not supported with Flow Task !!')
os.exit()
end
local flowBranch = nn.Sequential()
flowBranch:add(cudnn.SpatialConvolution(self.channels,
config.numCl,3,3,1,1,1,1))
-- upsample if fgSz > 12 (e.g. 100)
-- flowBranch:add(nn.SpatialUpSamplingBilinear({oheight=config.fgSz,
-- owidth=config.fgSz}))
self.flowBranch = flowBranch:cuda()
print(' | finalized flow model:')
print(self.flowBranch)
return self.flowBranch
end
--------------------------------------------------------------------------------
-- function: training
function DeepMask:training()
self.trunk:training()
if self.flow then
self.flowBranch:training()
return
end
self.maskBranch:training()
if self.color then
self.colorBranch:training()
else
self.scoreBranch:training()
end
end
--------------------------------------------------------------------------------
-- function: evaluate
function DeepMask:evaluate()
self.trunk:evaluate()
if self.flow then
self.flowBranch:evaluate()
return
end
self.maskBranch:evaluate()
if self.color then
self.colorBranch:evaluate()
else
self.scoreBranch:evaluate()
end
end
--------------------------------------------------------------------------------
-- function: to cuda
function DeepMask:cuda()
self.trunk:cuda()
if self.flow then
self.flowBranch:cuda()
return
end
self.maskBranch:cuda()
if self.color then
self.colorBranch:cuda()
else
self.scoreBranch:cuda()
end
end
--------------------------------------------------------------------------------
-- function: to float
function DeepMask:float()
self.trunk:float()
if self.flow then
self.flowBranch:float()
return
end
self.maskBranch:float()
if self.color then
self.colorBranch:float()
else
self.scoreBranch:float()
end
end
--------------------------------------------------------------------------------
-- function: inference (used for full scene inference)
function DeepMask:inference()
self:cuda()
utils.linear2convTrunk(self.trunk,self.fSz)
self.trunk:evaluate()
self.trunk:forward(torch.CudaTensor(1,3,800,800))
if self.flow then
utils.linear2convHead(self.flowBranch)
self.flowBranch:evaluate()
self.flowBranch:forward(torch.CudaTensor(1,512,300,300))
return
end
utils.linear2convHead(self.maskBranch.modules[1])
self.maskBranch = self.maskBranch.modules[1]
self.maskBranch:evaluate()
self.maskBranch:forward(torch.CudaTensor(1,512,300,300))
if self.color then
utils.linear2convHead(self.colorBranch)
self.colorBranch:evaluate()
self.colorBranch:forward(torch.CudaTensor(1,512,300,300))
else
utils.linear2convHead(self.scoreBranch)
self.scoreBranch:evaluate()
self.scoreBranch:forward(torch.CudaTensor(1,512,300,300))
end
end
--------------------------------------------------------------------------------
-- function: clone
function DeepMask:clone(...)
local f = torch.MemoryFile("rw"):binary()
f:writeObject(self)
f:seek(1)
local clone = f:readObject()
f:close()
if select('#',...) > 0 then
clone.trunk:share(self.trunk,...)
if self.flow then
clone.flowBranch:share(self.flowBranch,...)
return clone
end
clone.maskBranch:share(self.maskBranch,...)
if self.color then
clone.colorBranch:share(self.colorBranch,...)
else
clone.scoreBranch:share(self.scoreBranch,...)
end
end
return clone
end
return DeepMask
================================================
FILE: motionseg/SpatialSymmetricPadding.lua
================================================
--[[----------------------------------------------------------------------------
Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
This source code is licensed under the BSD-style license found in the
LICENSE file in the root directory of this source tree. An additional grant
of patent rights can be found in the PATENTS file in the same directory.
SpatialSymmetricPadding module
The forward(A) pads input array A with mirror reflections of itself
It is the same function as Matlab padarray(A, padsize, 'symmetric' )
The updateGradInput(input, gradOutput) is inherited from nn.SpatialZeroPadding
where the padded region is treated as constant and
the gradients would not be accumulated in the backward pass
------------------------------------------------------------------------------]]
local SpatialSymmetricPadding, parent =
torch.class('nn.SpatialSymmetricPadding', 'nn.SpatialZeroPadding')
function SpatialSymmetricPadding:__init(pad_l, pad_r, pad_t, pad_b)
parent.__init(self, pad_l, pad_r, pad_t, pad_b)
end
function SpatialSymmetricPadding:updateOutput(input)
assert(input:dim()==4, "only Dimension=4 implemented")
-- sizes
local h = input:size(3) + self.pad_t + self.pad_b
local w = input:size(4) + self.pad_l + self.pad_r
if w < 1 or h < 1 then error('input is too small') end
self.output:resize(input:size(1), input:size(2), h, w)
self.output:zero()
-- crop input if necessary
local c_input = input
if self.pad_t < 0 then
c_input = c_input:narrow(3, 1 - self.pad_t, c_input:size(3) + self.pad_t)
end
if self.pad_b < 0 then
c_input = c_input:narrow(3, 1, c_input:size(3) + self.pad_b)
end
if self.pad_l < 0 then
c_input = c_input:narrow(4, 1 - self.pad_l, c_input:size(4) + self.pad_l)
end
if self.pad_r < 0 then
c_input = c_input:narrow(4, 1, c_input:size(4) + self.pad_r)
end
-- crop outout if necessary
local c_output = self.output
if self.pad_t > 0 then
c_output = c_output:narrow(3, 1 + self.pad_t, c_output:size(3) - self.pad_t)
end
if self.pad_b > 0 then
c_output = c_output:narrow(3, 1, c_output:size(3) - self.pad_b)
end
if self.pad_l > 0 then
c_output = c_output:narrow(4, 1 + self.pad_l, c_output:size(4) - self.pad_l)
end
if self.pad_r > 0 then
c_output = c_output:narrow(4, 1, c_output:size(4) - self.pad_r)
end
-- copy input to output
c_output:copy(c_input)
-- symmetric padding that fills in values on the padded region
if w<2*self.pad_l or w<2*self.pad_r or h<2*self.pad_t or h<2*self.pad_b then
error('input is too small')
end
for i=1,self.pad_t do
self.output:narrow(3,self.pad_t-i+1,1):copy(
self.output:narrow(3,i+self.pad_t,1))
end
for i=1,self.pad_b do
self.output:narrow(3,self.output:size(3)-self.pad_b+i,1):copy(
self.output:narrow(3,self.output:size(3)-self.pad_b-i+1,1))
end
for i=1,self.pad_l do
self.output:narrow(4,self.pad_l-i+1,1):copy(
self.output:narrow(4,i+self.pad_l,1))
end
for i=1,self.pad_r do
self.output:narrow(4,self.output:size(4)-self.pad_r+i,1):copy(
self.output:narrow(4,self.output:size(4)-self.pad_r-i+1,1))
end
return self.output
end
================================================
FILE: motionseg/load_motionmodel.lua
================================================
require 'nn';
require 'cunn';
require 'cudnn';
paths.dofile('DeepMaskAlexNet.lua');
local cmd = torch.CmdLine()
cmd:text()
cmd:text('Helper script for loading model')
cmd:text()
cmd:option('-input', '', 'Path to input Torch model to be converted')
local config = cmd:parse(arg)
local model = torch.load(config.input);
print(model)
model = model:float()
model:evaluate()
================================================
FILE: motionseg/utilsModel.lua
================================================
-- utility functions for models
local utils = {}
--------------------------------------------------------------------------------
-- SpatialConstDiagonal module
-- all BN modules in resnet to be transformed into SpatialConstDiagonal
if not nn.SpatialConstDiagonal then
local module, parent = torch.class('nn.SpatialConstDiagonal', 'nn.Module')
function module:__init(nOutputPlane, inplace)
parent.__init(self)
self.a = torch.Tensor(1,nOutputPlane,1,1)
self.b = torch.Tensor(1,nOutputPlane,1,1)
self.inplace = inplace
self:reset()
end
function module:reset()
self.a:fill(1)
self.b:zero()
end
function module:updateOutput(input)
if self.inplace then
self.output:set(input)
else
self.output:resizeAs(input):copy(input)
end
self.output:cmul(self.a:expandAs(input))
self.output:add(self.b:expandAs(input))
return self.output
end
function module:updateGradInput(input, gradOutput)
if self.inplace then
self.gradInput:set(gradOutput)
else
self.gradInput:resizeAs(gradOutput):copy(gradOutput)
end
self.gradInput:cmul(self.a:expandAs(gradOutput))
return self.gradInput
end
end
--------------------------------------------------------------------------------
-- function: goes over a net and recursively replaces modules
-- using callback function
local function replace(self, callback)
local out = callback(self)
if self.modules then
for i=#self.modules,1,-1 do
local m = self.modules[i]
local mm = replace(m, callback)
if mm then self.modules[i] = mm else self:remove(i) end
end
end
return out
end
--------------------------------------------------------------------------------
-- function: replace BN layer to SpatialConstDiagonal
function utils.BNtoFixed(net, ip)
return replace(
net,
function(x)
if torch.typename(x):find'SpatialBatchNormalization' then
local no = x.running_mean:numel()
local y = nn.SpatialConstDiagonal(no, ip):type(x._type)
if x.running_var then
x.running_std = x.running_var:pow(-0.5)
end
y.a:copy(x.running_std)
y.b:add(-1,x.running_mean):cmul(x.running_std)
if x.affine then
y.a:cmul(x.weight)
y.b:cmul(x.weight):add(x.bias)
end
return y
else
return x
end
end
)
end
--------------------------------------------------------------------------------
-- function: replace 0-padding of 3x3 conv into mirror-padding
function utils.updatePadding(net, nn_padding)
if torch.typename(net) == "nn.Sequential" or
torch.typename(net) == "nn.ConcatTable" then
for i = #net.modules,1,-1 do
local out = utils.updatePadding(net:get(i), nn_padding)
if out ~= -1 then
local pw, ph = out[1], out[2]
net.modules[i] = nn.Sequential():add(nn_padding(pw,pw,ph,ph))
:add(net.modules[i]):cuda()
end
end
else
if torch.typename(net) == "nn.SpatialConvolution" or
torch.typename(net) == "cudnn.SpatialConvolution" then
if (net.kW == 3 and net.kH == 3) or (net.kW==7 and net.kH==7) or
(net.kW == 5 and net.kH == 5) then
local pw, ph = net.padW, net.padH
net.padW, net.padH = 0, 0
return {pw,ph}
end
end
end
return -1
end
--------------------------------------------------------------------------------
-- function: linear2convTrunk
function utils.linear2convTrunk(net,fSz)
return replace(
net,
function(x)
if torch.typename(x):find('Linear') then
local nInp,nOut = x.weight:size(2)/(fSz*fSz),x.weight:size(1)
local w = torch.reshape(x.weight,nOut,nInp,fSz,fSz)
local y = cudnn.SpatialConvolution(nInp,nOut,fSz,fSz,1,1)
y.weight:copy(w)
y.gradWeight:copy(w)
y.bias:copy(x.bias)
return y
elseif torch.typename(x):find('cudnn.BatchNormalization') or
torch.typename(x):find('nn.BatchNormalization') then
-- x.nDim = 4
-- return x
local nOut = x.running_mean:size(1)
local y = cudnn.SpatialBatchNormalization(nOut)
y.weight:copy(x.weight)
y.bias:copy(x.bias)
y.gradWeight:copy(x.gradWeight)
y.gradBias:copy(x.gradBias)
y.running_mean:copy(x.running_mean)
-- y.running_var:copy(x.running_var)
-- y.save_mean:copy(x.save_mean)
-- y.save_std:copy(x.save_std)
return y
elseif torch.typename(x):find('Threshold') then
return cudnn.ReLU()
elseif not torch.typename(x):find('View') and
not torch.typename(x):find('SpatialZeroPadding') then
return x
end
end
)
end
--------------------------------------------------------------------------------
-- function: linear2convHeads
function utils.linear2convHead(net)
return replace(
net,
function(x)
if torch.typename(x):find('Linear') then
local nInp,nOut = x.weight:size(2),x.weight:size(1)
local w = torch.reshape(x.weight,nOut,nInp,1,1)
local y = cudnn.SpatialConvolution(nInp,nOut,1,1,1,1)
y.weight:copy(w)
y.gradWeight:copy(w)
y.bias:copy(x.bias)
return y
elseif torch.typename(x):find('cudnn.BatchNormalization') or
torch.typename(x):find('nn.BatchNormalization') then
-- x.nDim = 4
-- return x
local nOut = x.running_mean:size(1)
local y = cudnn.SpatialBatchNormalization(nOut)
y.weight:copy(x.weight)
y.bias:copy(x.bias)
y.gradWeight:copy(x.gradWeight)
y.gradBias:copy(x.gradBias)
y.running_mean:copy(x.running_mean)
-- y.running_var:copy(x.running_var)
-- y.save_mean:copy(x.save_mean)
-- y.save_std:copy(x.save_std)
return y
elseif torch.typename(x):find('Threshold') then
return cudnn.ReLU()
elseif not torch.typename(x):find('View') and
not torch.typename(x):find('Copy') then
return x
end
end
)
end
return utils