Repository: itayhubara/BinaryNet
Branch: master
Commit: c23b86285cd1
Files: 18
Total size: 98.9 KB
Directory structure:
gitextract_kj09dah_/
├── Data.lua
├── Dockerfile/
│ └── binarynet-torch-gpu-cuda-8.0
├── Main_BinaryNet_Cifar10.lua
├── Main_BinaryNet_MNIST.lua
├── Main_BinaryNet_SVHN.lua
├── Models/
│ ├── BatchNormalizationShiftPow2.lua
│ ├── BinarizedNeurons.lua
│ ├── BinaryLinear.lua
│ ├── BinaryNet_Cifar10_Model.lua
│ ├── BinaryNet_MNIST_Model.lua
│ ├── BinaryNet_SVHN_Model.lua
│ ├── BinarySpatialConvolution.lua
│ ├── SpatialBatchNormalizationShiftPow2.lua
│ └── cudnnBinarySpatialConvolution.lua
├── README.md
├── SqrHingeEmbeddingCriterion.lua
├── adaMax_binary_clip_shift.lua
└── adam_binary_clip_b.lua
================================================
FILE CONTENTS
================================================
================================================
FILE: Data.lua
================================================
--[[
This code create the training test and validation datasets and preform diffrent kinds of preprocessing
This code is based on elad hoffer Data.lua file from ConvNet-torch library (https://github.com/eladhoffer/ConvNet-torch.git) and uses:
- Elad Hoffer DataProvidor.torch library: https://github.com/eladhoffer/DataProvider.torch.git
- Nicholas Leonard dp library: https://github.com/nicholas-leonard/dp.git
- Koray Kavukcuoglu dp library: https://github.com/koraykv/unsup.git
]]
require 'dp'
local DataProvider = require 'DataProvider'
local opt = opt or {}
local Dataset = opt.dataset or 'Cifar10'
local PreProcDir = opt.preProcDir or './PreProcData/'
local Whiten = opt.whiten or false
local NormelizeWhiten = opt.NormelizeWhiten or false
local DataPath = opt.datapath or '/home/itayh/Datasets/'
local normalization = opt.normalization or 'simple'
local format = opt.format or 'rgb'
local TestData
local TrainData
local ValidData
local Classes
if Dataset =='Cifar100' then
local file_valid = paths.concat(PreProcDir, format .. 'whiten_valid.t7')
local file_train = paths.concat(PreProcDir, format .. 'whiten_train.t7')
local file_test = paths.concat(PreProcDir, format .. 'whiten_test.t7')
if (paths.filep(file_valid) and paths.filep(file_train) and paths.filep(file_test)) then
ValidData=torch.load(file_valid)
TrainData=torch.load(file_train)
TestData=torch.load(file_test)
else
if paths.dirp(PreProcDir)==false then
sys.execute('mkdir PreProcData/Cifar100')
end
input_preprocess = {}
table.insert(input_preprocess, dp.ZCA())
ds = dp.Cifar100{scale={0,1}, valid_ratio=0.1,input_preprocess = input_preprocess}
ValidData = {data=ds:validSet():inputs():input():clone():float(), label=ds:validSet():targets():input():clone():byte() }
TrainData = {data=ds:trainSet():inputs():input():float(), label=ds:trainSet():targets():input():byte() }
TestData = {data=ds:testSet():inputs():input():float() , label=ds:testSet():targets():input():byte() }
collectgarbage()
torch.save(file_valid,ValidData)
torch.save(file_train,TrainData)
torch.save(file_test,TestData)
end
elseif Dataset == 'Cifar10' then
local file_valid = paths.concat(PreProcDir, format .. 'whiten_valid.t7')
local file_train = paths.concat(PreProcDir, format .. 'whiten_train.t7')
local file_test = paths.concat(PreProcDir, format .. 'whiten_test.t7')
if (paths.filep(file_valid) and paths.filep(file_train) and paths.filep(file_test)) then
ValidData=torch.load(file_valid)
TrainData=torch.load(file_train)
TestData=torch.load(file_test)
else
if paths.dirp(PreProcDir)==false then
sys.execute('mkdir PreProcData/Cifar10')
end
input_preprocess = {}
table.insert(input_preprocess, dp.ZCA())
ds = dp.Cifar10{scale={0,1},valid_ratio=0.1,input_preprocess = input_preprocess} --,input_preprocess = input_preprocess} scale={0,1},
ValidData = {data=ds:validSet():inputs():input():float(), label=ds:validSet():targets():input():clone():byte() }
TrainData = {data=ds:trainSet():inputs():input():float(), label=ds:trainSet():targets():input():byte() }
TestData = {data=ds:testSet():inputs():input():float(), label=ds:testSet():targets():input():byte() }
collectgarbage()
torch.save(file_valid,ValidData)
torch.save(file_train,TrainData)
torch.save(file_test,TestData)
end
Classes = {'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'}
elseif Dataset == 'MNIST' then
local file_valid = paths.concat(PreProcDir, format .. '_valid.t7')
local file_train = paths.concat(PreProcDir, format .. '_train.t7')
local file_test = paths.concat(PreProcDir, format .. '_test.t7')
if (paths.filep(file_valid) and paths.filep(file_train) and paths.filep(file_test)) then
ValidData=torch.load(file_valid)
TrainData=torch.load(file_train)
TestData=torch.load(file_test)
else
if paths.dirp(PreProcDir)==false then
sys.execute('mkdir PreProcData/MNIST')
end
ds = dp.Mnist{scale={0,1}}
ValidData = {data=ds:validSet():inputs():input():clone():float(), label=ds:validSet():targets():input():clone():byte() }
TrainData = {data=ds:trainSet():inputs():input():float(), label=ds:trainSet():targets():input():byte() }
TestData = {data=ds:testSet():inputs():input():float() , label=ds:testSet():targets():input():byte() }
collectgarbage()
torch.save(file_valid,ValidData)
torch.save(file_train,TrainData)
torch.save(file_test,TestData)
end
Classes = {1,2,3,4,5,6,7,8,9,0}
elseif Dataset == 'SVHN' then
local LCNfile_valid = paths.concat(PreProcDir, format .. 'GCN_LCN_valid.t7')
local LCNfile_train = paths.concat(PreProcDir, format .. 'GCN_LCN_train.t7')
local LCNfile_test = paths.concat(PreProcDir, format .. 'GCN_LCN_test.t7')
print(LCNfile_valid)
if (paths.filep(LCNfile_valid) and paths.filep(LCNfile_train) and paths.filep(LCNfile_test)) then
ValidData=torch.load(LCNfile_valid)
TrainData=torch.load(LCNfile_train)
TestData=torch.load(LCNfile_test)
else
if paths.dirp(PreProcDir)==false then
sys.execute('mkdir PreProcData/SVHN')
end
local input_preprocess = {}
table.insert(input_preprocess, dp.GCN{batch_size=5000,use_std=true,sqrt_bias=10})
table.insert(input_preprocess, dp.LeCunLCN{kernel_size=9,divide_by_std=true,batch_size=5000,progress=true}) --,kernel_size=31,kernel_std=32})
ds = dp.Svhn{scale={0,1}, input_preprocess = input_preprocess}
ValidData = {data=ds:validSet():inputs():input():float(), label=ds:validSet():targets():input():byte() }; ValidData.data:div( ValidData.data:max())
TrainData = {data=ds:trainSet():inputs():input():float(), label=ds:trainSet():targets():input():byte() }; TrainData.data:div( TrainData.data:max())
TestData = {data=ds:testSet():inputs():input():float(), label=ds:testSet():targets():input():byte() }; TestData.data:div( TestData.data:max())
collectgarbage()
torch.save(LCNfile_valid,ValidData)
torch.save(LCNfile_train,TrainData)
torch.save(LCNfile_test,TestData)
end
Classes = {1,2,3,4,5,6,7,8,9,0}
end
TrainData.data = TrainData.data:float()
TestData.data = TestData.data:float()
local TrainDataProvider = DataProvider.Container{
Name = 'TrainingData',
CachePrefix = nil,
CacheFiles = false,
Source = {TrainData.data,TrainData.label},
MaxNumItems = 1e6,
CopyData = false,
TensorType = 'torch.FloatTensor',
}
local TestDataProvider = DataProvider.Container{
Name = 'TestData',
CachePrefix = nil,
CacheFiles = false,
Source = {TestData.data, TestData.label},
MaxNumItems = 1e6,
CopyData = false,
TensorType = 'torch.FloatTensor',
}
local ValidDataProvider = DataProvider.Container{
Name = 'ValidData',
CachePrefix = nil,
CacheFiles = false,
Source = {ValidData.data, ValidData.label},
MaxNumItems = 1e6,
CopyData = false,
TensorType = 'torch.FloatTensor',
}
--Preprocesss
if format == 'yuv' then
require 'image'
TrainDataProvider:apply(image.rgb2yuv)
TestDataProvider:apply(image.rgb2yuv)
end
if Whiten then
require 'unsup'
local meanfile = paths.concat(PreProcDir, format .. 'imageMean.t7')
local mean, P, invP
local Pfile = paths.concat(PreProcDir,format .. 'P.t7')
local invPfile = paths.concat(PreProcDir,format .. 'invP.t7')
if (paths.filep(Pfile) and paths.filep(invPfile) and paths.filep(meanfile)) then
P = torch.load(Pfile)
invP = torch.load(invPfile)
mean = torch.load(meanfile)
TrainDataProvider.Data = unsup.zca_whiten(TrainDataProvider.Data, mean, P, invP)
else
TrainDataProvider.Data, mean, P, invP = unsup.zca_whiten(TrainDataProvider.Data)
torch.save(Pfile,P)
torch.save(invPfile,invP)
torch.save(meanfile,mean)
end
TestDataProvider.Data = unsup.zca_whiten(TestDataProvider.Data, mean, P, invP)
ValidDataProvider.Data = unsup.zca_whiten(ValidDataProvider.Data, mean, P, invP)
elseif dp_prepro then
-- Do nothing since we use dp lib for GCN and LCN
else
local meanfile = paths.concat(PreProcDir, format .. normalization .. 'Mean.t7')
local stdfile = paths.concat(PreProcDir,format .. normalization .. 'Std.t7')
local mean, std
local loaded = false
if paths.filep(meanfile) and paths.filep(stdfile) then
mean = torch.load(meanfile)
std = torch.load(stdfile)
loaded = true
end
mean, std = TrainDataProvider:normalize(normalization, mean, std)
TestDataProvider:normalize(normalization, mean, std)
ValidDataProvider:normalize(normalization, mean, std)
if not loaded then
torch.save(meanfile,mean)
torch.save(stdfile,std)
end
end
return{
TrainData = TrainDataProvider,
TestData = TestDataProvider,
ValidData = ValidDataProvider,
Classes = Classes
}
================================================
FILE: Dockerfile/binarynet-torch-gpu-cuda-8.0
================================================
FROM nvidia/cuda:8.0-cudnn5-devel
WORKDIR /workspace
# Install dependencies
RUN apt-get update \
&& apt-get install -y \
build-essential git gfortran \
python3 python3-setuptools python3-dev \
cmake curl wget unzip libreadline-dev libjpeg-dev libpng-dev ncurses-dev \
imagemagick gnuplot gnuplot-x11 libssl-dev libzmq3-dev graphviz vim sudo tmux
# Install OpenBLAS
RUN apt-get -y install libopenblas-dev
# Install Torch commit no: 0219027e6c4644a0ba5c5bf137c989a0a8c9e01b
RUN git clone https://github.com/torch/distro.git torch --recursive
RUN cd torch \
&& /bin/bash install-deps \
&& ./install.sh
# get torch tutorials. comment out this line if no need
RUN git clone https://github.com/torch/tutorials.git
# Install dependency for [BinaryNet](https://github.com/itayhubara/BinaryNet)
RUN /workspace/torch/install/bin/luarocks install https://raw.githubusercontent.com/eladhoffer/DataProvider.torch/master/dataprovider-scm-1.rockspec
RUN /workspace/torch/install/bin/luarocks install cudnn
RUN /workspace/torch/install/bin/luarocks install dp
RUN /workspace/torch/install/bin/luarocks install unsup
# copy BinaryNet into the image
ADD . BinaryNet
================================================
FILE: Main_BinaryNet_Cifar10.lua
================================================
require 'torch'
require 'xlua'
require 'optim'
require 'gnuplot'
require 'pl'
require 'trepl'
require 'adaMax_binary_clip_shift'
require 'adam_binary_clip_b'
require 'nn'
require 'SqrHingeEmbeddingCriterion'
----------------------------------------------------------------------
cmd = torch.CmdLine()
cmd:addTime()
cmd:text()
cmd:text('Training a convolutional network for visual classification')
cmd:text()
cmd:text('==>Options')
cmd:text('===>Model And Training Regime')
cmd:option('-modelsFolder', './Models/', 'Models Folder')
cmd:option('-network', 'Model.lua', 'Model file - must return valid network.')
cmd:option('-LR', 2^-6, 'learning rate')
cmd:option('-LRDecay', 0, 'learning rate decay (in # samples)')
cmd:option('-weightDecay', 0.0, 'L2 penalty on the weights')
cmd:option('-momentum', 0.0, 'momentum')
cmd:option('-batchSize', 200, 'batch size')
cmd:option('-stcNeurons', true, 'use stochastic binarization for the neurons')
cmd:option('-stcWeights', false, 'use stochastic binarization for the weights')
cmd:option('-optimization', 'adam', 'optimization method')
cmd:option('-SBN', true, 'shift based batch-normalization')
cmd:option('-runningVal', false, 'use running mean and std')
cmd:option('-epoch', -1, 'number of epochs to train, -1 for unbounded')
cmd:text('===>Platform Optimization')
cmd:option('-threads', 8, 'number of threads')
cmd:option('-type', 'cuda', 'float or cuda')
cmd:option('-devid', 1, 'device ID (if using CUDA)')
cmd:option('-nGPU', 1, 'num of gpu devices used')
cmd:option('-constBatchSize', false, 'do not allow varying batch sizes - e.g for ccn2 kernel')
cmd:text('===>Save/Load Options')
cmd:option('-load', '', 'load existing net weights')
cmd:option('-save', os.date():gsub(' ',''), 'save directory')
cmd:text('===>Data Options')
cmd:option('-dataset', 'Cifar10', 'Dataset - Cifar10, Cifar100, STL10, SVHN, MNIST')
cmd:option('-normalization', 'simple', 'simple - whole sample, channel - by image channel, image - mean and std images')
cmd:option('-format', 'rgb', 'rgb or yuv')
cmd:option('-whiten', true, 'whiten data')
cmd:option('-dp_prepro', false, 'preprocessing using dp lib')
cmd:option('-augment', false, 'Augment training data')
cmd:option('-preProcDir', './PreProcData/', 'Data for pre-processing (means,P,invP)')
cmd:text('===>Misc')
cmd:option('-visualize', 0, 'visualizing results')
torch.manualSeed(432)
opt = cmd:parse(arg or {})
opt.network = opt.modelsFolder .. paths.basename(opt.network, '.lua')
opt.save = paths.concat('./Results', opt.save)
opt.preProcDir = paths.concat(opt.preProcDir, opt.dataset .. '/')
-- If you choose to use exponentialy decaying learning rate use uncomment this line
--opt.LRDecay=torch.pow((2e-6/opt.LR),(1./500));
--
os.execute('mkdir -p ' .. opt.preProcDir)
torch.setnumthreads(opt.threads)
torch.setdefaulttensortype('torch.FloatTensor')
if opt.augment then
require 'image'
end
----------------------------------------------------------------------
-- Model + Loss:
local modelAll = require(opt.network)
model=modelAll.model
GLRvec=modelAll.lrs
clipV=modelAll.clipV
local loss = SqrtHingeEmbeddingCriterion(1)
local data = require 'Data'
local classes = data.Classes
----------------------------------------------------------------------
-- This matrix records the current confusion across classes
local confusion = optim.ConfusionMatrix(classes)
local AllowVarBatch = not opt.constBatchSize
----------------------------------------------------------------------
-- Output files configuration
os.execute('mkdir -p ' .. opt.save)
cmd:log(opt.save .. '/Log.txt', opt)
local netFilename = paths.concat(opt.save, 'Net')
local logFilename = paths.concat(opt.save,'ErrorRate.log')
local optStateFilename = paths.concat(opt.save,'optState')
local Log = optim.Logger(logFilename)
----------------------------------------------------------------------
local TensorType = 'torch.FloatTensor'
if paths.filep(opt.load) then
model = torch.load(opt.load)
print('==>Loaded model from: ' .. opt.load)
print(model)
end
if opt.type =='cuda' then
require 'cutorch'
cutorch.setDevice(opt.devid)
cutorch.setHeapTracking(true)
model:cuda()
GLRvec=GLRvec:cuda()
clipV=clipV:cuda()
loss = loss:cuda()
TensorType = 'torch.CudaTensor'
end
---Support for multiple GPUs - currently data parallel scheme
if opt.nGPU > 1 then
local net = model
model = nn.DataParallelTable(1)
for i = 1, opt.nGPU do
cutorch.setDevice(i)
model:add(net:clone():cuda(), i) -- Use the ith GPU
end
cutorch.setDevice(opt.devid)
end
-- Optimization configuration
local Weights,Gradients = model:getParameters()
----------------------------------------------------------------------
print '==> Network'
print(model)
print('==>' .. Weights:nElement() .. ' Parameters')
print '==> Loss'
print(loss)
------------------Optimization Configuration--------------------------
local optimState = {
learningRate = opt.LR,
momentum = opt.momentum,
weightDecay = opt.weightDecay,
learningRateDecay = opt.LRDecay,
GLRvec=GLRvec,
clipV=clipV
}
----------------------------------------------------------------------
local function SampleImages(images,labels)
if not opt.augment then
return images,labels
else
local sampled_imgs = images:clone()
for i=1,images:size(1) do
local sz = math.random(9) - 1
local hflip = math.random(2)==1
local startx = math.random(sz)
local starty = math.random(sz)
local img = images[i]:narrow(2,starty,32-sz):narrow(3,startx,32-sz)
if hflip then
img = image.hflip(img)
end
img = image.scale(img,32,32)
sampled_imgs[i]:copy(img)
end
return sampled_imgs,labels
end
end
------------------------------
local function Forward(Data, train)
local MiniBatch = DataProvider.Container{
Name = 'GPU_Batch',
MaxNumItems = opt.batchSize,
Source = Data,
ExtractFunction = SampleImages,
TensorType = TensorType
}
local yt = MiniBatch.Labels
local x = MiniBatch.Data
local SizeData = Data:size()
if not AllowVarBatch then SizeData = math.floor(SizeData/opt.batchSize)*opt.batchSize end
local NumSamples = 0
local NumBatches = 0
local lossVal = 0
while NumSamples < SizeData do
MiniBatch:getNextBatch()
local y, currLoss
NumSamples = NumSamples + x:size(1)
NumBatches = NumBatches + 1
if opt.nGPU > 1 then
model:syncParameters()
end
y = model:forward(x)
one_hot_yt=torch.zeros(yt:size(1),10)
one_hot_yt:scatter(2, yt:long():view(-1,1), 1)
one_hot_yt=one_hot_yt:mul(2):float():add(-1)
if opt.type == 'cuda' then
one_hot_yt=one_hot_yt:cuda()
end
currLoss = loss:forward(y,one_hot_yt)
if train then
function feval()
model:zeroGradParameters()
local dE_dy = loss:backward(y, one_hot_yt)
model:backward(x, dE_dy)
return currLoss, Gradients
end
--_G.optim[opt.optimization](feval, Weights, optimState) -- If you choose to use different optimization remember to clip the weights
adaMax_binary_clip_shift(feval, Weights, optimState)
end
lossVal = currLoss + lossVal
if type(y) == 'table' then --table results - always take first prediction
y = y[1]
end
confusion:batchAdd(y,one_hot_yt)
xlua.progress(NumSamples, SizeData)
if math.fmod(NumBatches,100)==0 then
collectgarbage()
end
end
return(lossVal/math.ceil(SizeData/opt.batchSize))
end
------------------------------
local function Train(Data)
model:training()
return Forward(Data, true)
end
local function Test(Data)
model:evaluate()
return Forward(Data, false)
end
------------------------------
local epoch = 1
print '\n==> Starting Training\n'
while epoch ~= opt.epoch do
data.TrainData:shuffleItems()
print('Epoch ' .. epoch)
--Train
confusion:zero()
local LossTrain = Train(data.TrainData)
if epoch%10==0 then
torch.save(netFilename, model)
end
confusion:updateValids()
local ErrTrain = (1-confusion.totalValid)
if #classes <= 10 then
print(confusion)
end
print('Training Error = ' .. ErrTrain)
print('Training Loss = ' .. LossTrain)
--validation
confusion:zero()
local LossValid = Test(data.ValidData)
confusion:updateValids()
local ErrValid = (1-confusion.totalValid)
if #classes <= 10 then
print(confusion)
end
print('Valid Error = ' .. ErrValid)
print('Valid Loss = ' .. LossValid)
--Test
confusion:zero()
local LossTest = Test(data.TestData)
confusion:updateValids()
local ErrTest = (1-confusion.totalValid)
if #classes <= 10 then
print(confusion)
end
print('Test Error = ' .. ErrTest)
print('Test Loss = ' .. LossTest)
Log:add{['Training Error']= ErrTrain, ['Valid Error'] = ErrValid, ['Test Error'] = ErrTest}
-- the training stops at epoch 3 if visualize is set to 1
if opt.visualize == 1 then
Log:style{['Training Error'] = '-',['Validation Error'] = '-', ['Test Error'] = '-'}
Log:plot()
end
--optimState.learningRate=optimState.learningRate*opt.LRDecay
if epoch%50==0 then
optimState.learningRate=optimState.learningRate*0.5
else
optimState.learningRate=optimState.learningRate --*opt.LRDecay
end
print('-------------------LR-------------------')
print(optimState.learningRate)
epoch = epoch + 1
end
================================================
FILE: Main_BinaryNet_MNIST.lua
================================================
require 'torch'
require 'xlua'
require 'optim'
require 'gnuplot'
require 'pl'
require 'trepl'
require 'adaMax_binary_clip_shift'
require 'nn'
require 'SqrHingeEmbeddingCriterion'
----------------------------------------------
cmd = torch.CmdLine()
cmd:addTime()
cmd:text()
cmd:text('Training a convolutional network for visual classification')
cmd:text()
cmd:text('==>Options')
cmd:text('===>Model And Training Regime')
cmd:option('-modelsFolder', './Models/', 'Models Folder')
cmd:option('-network', 'Model.lua', 'Model file - must return valid network.')
cmd:option('-LR', 2^-6, 'learning rate')
cmd:option('-LRDecay', 0, 'learning rate decay (in # samples)')
cmd:option('-weightDecay', 0.0, 'L2 penalty on the weights')
cmd:option('-momentum', 0.0, 'momentum')
cmd:option('-batchSize', 100, 'batch size')
cmd:option('-stcNeurons', true, 'batch size')
cmd:option('-stcWeights', false, 'batch size')
cmd:option('-optimization', 'adam', 'optimization method')
cmd:option('-SBN', true, 'shift based batch-normalization')
cmd:option('-runningVal', true, 'use running mean and std')
cmd:option('-epoch', -1, 'number of epochs to train, -1 for unbounded')
cmd:text('===>Platform Optimization')
cmd:option('-threads', 8, 'number of threads')
cmd:option('-type', 'cuda', 'float or cuda')
cmd:option('-devid', 1, 'device ID (if using CUDA)')
cmd:option('-nGPU', 1, 'num of gpu devices used')
cmd:option('-constBatchSize', false, 'do not allow varying batch sizes - e.g for ccn2 kernel')
cmd:text('===>Save/Load Options')
cmd:option('-load', '', 'load existing net weights')
cmd:option('-save', os.date():gsub(' ',''), 'save directory')
cmd:text('===>Data Options')
cmd:option('-dataset', 'MNIST', 'Dataset - Cifar10, Cifar100, STL10, SVHN, MNIST')
cmd:option('-normalization', 'simple', 'simple - whole sample, channel - by image channel, image - mean and std images')
cmd:option('-format', 'rgb', 'rgb or yuv')
cmd:option('-whiten', false, 'whiten data')
cmd:option('-dp_prepro', false, 'preprocessing using dp lib')
cmd:option('-augment', false, 'Augment training data')
cmd:option('-preProcDir', './PreProcData/', 'Data for pre-processing (means,P,invP)')
cmd:text('===>Misc')
cmd:option('-visualize', 1, 'visualizing results')
torch.manualSeed(432)
opt = cmd:parse(arg or {})
opt.network = opt.modelsFolder .. paths.basename(opt.network, '.lua')
opt.save = paths.concat('./Results', opt.save)
opt.preProcDir = paths.concat(opt.preProcDir, opt.dataset .. '/')
-- If you choose to use exponentialy decaying learning rate use uncomment this line
--opt.LRDecay=torch.pow((2e-6/opt.LR),(1./500));
--
os.execute('mk1ir -p ' .. opt.preProcDir)
torch.setnumthreads(opt.threads)
torch.setdefaulttensortype('torch.FloatTensor')
if opt.augment then
require 'image'
end
----------------------------------------------------------------------
local modelAll = require(opt.network)
model=modelAll.model
GLRvec=modelAll.lrs
clipV=modelAll.clipV
local loss = SqrtHingeEmbeddingCriterion(1)
local data = require 'Data'
local classes = data.Classes
----------------------------------------------------------------------
-- This matrix records the current confusion across classes
local confusion = optim.ConfusionMatrix(classes)
local AllowVarBatch = not opt.constBatchSize
----------------------------------------------------------------------
-- Output files configuration
os.execute('mkdir -p ' .. opt.save)
cmd:log(opt.save .. '/Log.txt', opt)
local netFilename = paths.concat(opt.save, 'Net')
local logFilename = paths.concat(opt.save,'ErrorRate.log')
local optStateFilename = paths.concat(opt.save,'optState')
local Log = optim.Logger(logFilename)
----------------------------------------------------------------------
local TensorType = 'torch.FloatTensor'
if paths.filep(opt.load) then
model = torch.load(opt.load)
print('==>Loaded model from: ' .. opt.load)
print(model)
end
if opt.type =='cuda' then
require 'cutorch'
cutorch.setDevice(opt.devid)
cutorch.setHeapTracking(true)
model:cuda()
GLRvec=GLRvec:cuda()
clipV=clipV:cuda()
loss = loss:cuda()
TensorType = 'torch.CudaTensor'
end
---Support for multiple GPUs - currently data parallel scheme
if opt.nGPU > 1 then
local net = model
model = nn.DataParallelTable(1)
for i = 1, opt.nGPU do
cutorch.setDevice(i)
model:add(net:clone():cuda(), i) -- Use the ith GPU
end
cutorch.setDevice(opt.devid)
end
-- Optimization configuration
local Weights,Gradients = model:getParameters()
----------------------------------------------------------------------
print '==> Network'
print(model)
print('==>' .. Weights:nElement() .. ' Parameters')
print '==> Loss'
print(loss)
------------------Optimization Configuration--------------------------
local optimState = {
learningRate = opt.LR,
momentum = opt.momentum,
weightDecay = opt.weightDecay,
learningRateDecay = opt.LRDecay,
GLRvec=GLRvec,
clipV=clipV
}
----------------------------------------------------------------------
local function SampleImages(images,labels)
if not opt.augment then
return images,labels
else
local sampled_imgs = images:clone()
for i=1,images:size(1) do
local sz = math.random(9) - 1
local hflip = math.random(2)==1
local startx = math.random(sz)
local starty = math.random(sz)
local img = images[i]:narrow(2,starty,32-sz):narrow(3,startx,32-sz)
if hflip then
img = image.hflip(img)
end
img = image.scale(img,32,32)
sampled_imgs[i]:copy(img)
end
return sampled_imgs,labels
end
end
------------------------------
local function Forward(Data, train)
local MiniBatch = DataProvider.Container{
Name = 'GPU_Batch',
MaxNumItems = opt.batchSize,
Source = Data,
ExtractFunction = SampleImages,
TensorType = TensorType
}
local yt = MiniBatch.Labels
local x = MiniBatch.Data
local SizeData = Data:size()
if not AllowVarBatch then SizeData = math.floor(SizeData/opt.batchSize)*opt.batchSize end
local NumSamples = 0
local NumBatches = 0
local lossVal = 0
while NumSamples < SizeData do
MiniBatch:getNextBatch()
local y, currLoss
NumSamples = NumSamples + x:size(1)
NumBatches = NumBatches + 1
if opt.nGPU > 1 then
model:syncParameters()
end
y = model:forward(x)
one_hot_yt=torch.zeros(yt:size(1),10)
one_hot_yt:scatter(2, yt:long():view(-1,1), 1)
one_hot_yt=one_hot_yt:mul(2):float():add(-1):cuda()
currLoss = loss:forward(y,one_hot_yt)
if train then
function feval()
model:zeroGradParameters()
local dE_dy = loss:backward(y, one_hot_yt)
model:backward(x, dE_dy)
return currLoss, Gradients
end
adaMax_binary_clip_shift(feval, Weights, optimState)
local indLayer=0
for i, layer in ipairs(model.modules) do
indLayer=indLayer+1;
if layer.__typename == 'cudnnBinarySpatialConvolution' then
model.modules[indLayer].weight:clamp(-1,1)
elseif layer.__typename == 'BinaryLinear' then
--print(indLayer)
model.modules[indLayer].weight:clamp(-1,1)
end
end
end
lossVal = currLoss + lossVal
if type(y) == 'table' then --table results - always take first prediction
y = y[1]
end
confusion:batchAdd(y,one_hot_yt)
xlua.progress(NumSamples, SizeData)
if math.fmod(NumBatches,100)==0 then
collectgarbage()
end
end
return(lossVal/math.ceil(SizeData/opt.batchSize))
end
------------------------------
local function Train(Data)
model:training()
return Forward(Data, true)
end
local function Test(Data)
model:evaluate()
return Forward(Data, false)
end
------------------------------
local epoch = 1
print '\n==> Starting Training\n'
local epoch = 1
print '\n==> Starting Training\n'
while epoch ~= opt.epoch do
data.TrainData:shuffleItems()
print('Epoch ' .. epoch)
--Train
confusion:zero()
local LossTrain = Train(data.TrainData)
if epoch%10==0 then
torch.save(netFilename, model)
end
confusion:updateValids()
local ErrTrain = (1-confusion.totalValid)
if #classes <= 10 then
print(confusion)
end
print('Training Error = ' .. ErrTrain)
print('Training Loss = ' .. LossTrain)
--validation
confusion:zero()
local LossValid = Test(data.ValidData)
confusion:updateValids()
local ErrValid = (1-confusion.totalValid)
if #classes <= 10 then
print(confusion)
end
print('Valid Error = ' .. ErrValid)
print('Valid Loss = ' .. LossValid)
--Test
confusion:zero()
local LossTest = Test(data.TestData)
confusion:updateValids()
local ErrTest = (1-confusion.totalValid)
if #classes <= 10 then
print(confusion)
end
print('Test Error = ' .. ErrTest)
print('Test Loss = ' .. LossTest)
Log:add{['Training Error']= ErrTrain, ['Valid Error'] = ErrValid, ['Test Error'] = ErrTest}
if opt.visualize == 1 then
Log:style{['Training Error'] = '-',['Validation Error'] = '-', ['Test Error'] = '-'}
Log:plot()
end
if epoch%20==0 then
optimState.learningRate=optimState.learningRate*0.5
else
optimState.learningRate=optimState.learningRate --*opt.LRDecay
end
print('-------------------LR-------------------')
print(optimState.learningRate)
epoch = epoch + 1
end
================================================
FILE: Main_BinaryNet_SVHN.lua
================================================
require 'torch'
require 'xlua'
require 'optim'
require 'gnuplot'
require 'pl'
require 'trepl'
require 'adaMax_binary_clip_shift'
require 'nn'
require 'SqrHingeEmbeddingCriterion'
----------------------------------------------------------------------
cmd = torch.CmdLine()
cmd:addTime()
cmd:text()
cmd:text('Training a convolutional network for visual classification')
cmd:text()
cmd:text('==>Options')
cmd:text('===>Model And Training Regime')
cmd:option('-modelsFolder', './Models/', 'Models Folder')
cmd:option('-network', 'Model.lua', 'Model file - must return valid network.')
cmd:option('-LR', 2^-7, 'learning rate')
cmd:option('-LRDecay', 0, 'learning rate decay (in # samples)')
cmd:option('-weightDecay', 0.0, 'L2 penalty on the weights')
cmd:option('-momentum', 0.0, 'momentum')
cmd:option('-batchSize', 200, 'batch size')
cmd:option('-stcNeurons', true, 'batch size')
cmd:option('-stcWeights', false, 'batch size')
cmd:option('-optimization', 'adam', 'optimization method')
cmd:option('-SBN', true, 'shift based batch-normalization')
cmd:option('-runningVal', true, 'use running mean and std')
cmd:option('-epoch', -1, 'number of epochs to train, -1 for unbounded')
cmd:text('===>Platform Optimization')
cmd:option('-threads', 8, 'number of threads')
cmd:option('-type', 'cuda', 'float or cuda')
cmd:option('-devid', 1, 'device ID (if using CUDA)')
cmd:option('-nGPU', 1, 'num of gpu devices used')
cmd:option('-constBatchSize', false, 'do not allow varying batch sizes - e.g for ccn2 kernel')
cmd:text('===>Save/Load Options')
cmd:option('-load', '', 'load existing net weights')
cmd:option('-save', os.date():gsub(' ',''), 'save directory')
cmd:text('===>Data Options')
cmd:option('-dataset', 'SVHN', 'Dataset - Cifar10, Cifar100, STL10, SVHN, MNIST')
cmd:option('-normalization', 'simple', 'simple - whole sample, channel - by image channel, image - mean and std images')
cmd:option('-format', 'rgb', 'rgb or yuv')
cmd:option('-whiten', false, 'whiten data')
cmd:option('-dp_prepro', true, 'preprocessing using dp lib')
cmd:option('-augment', false, 'Augment training data')
cmd:option('-preProcDir', './PreProcData/', 'Data for pre-processing (means,P,invP)')
cmd:text('===>Misc')
cmd:option('-visualize', 1, 'visualizing results')
torch.manualSeed(432)
opt = cmd:parse(arg or {})
opt.network = opt.modelsFolder .. paths.basename(opt.network, '.lua')
opt.save = paths.concat('./Results', opt.save)
opt.preProcDir = paths.concat(opt.preProcDir, opt.dataset .. '/')
-- If you choose to use exponentialy decaying learning rate use uncomment this line
--opt.LRDecay=torch.pow((2e-6/opt.LR),(1./500));
--
os.execute('mk1ir -p ' .. opt.preProcDir)
torch.setnumthreads(opt.threads)
torch.setdefaulttensortype('torch.FloatTensor')
if opt.augment then
require 'image'
end
----------------------------------------------------------------------
-- Model + Loss:
local modelAll = require(opt.network)
model=modelAll.model
GLRvec=modelAll.lrs
clipV=modelAll.clipV
local loss = SqrtHingeEmbeddingCriterion(1) --nn.ClassNLLCriterion()
local data = require 'Data'
local classes = data.Classes
----------------------------------------------------------------------
-- This matrix records the current confusion across classes
local confusion = optim.ConfusionMatrix(classes)
local AllowVarBatch = not opt.constBatchSize
----------------------------------------------------------------------
-- Output files configuration
os.execute('mkdir -p ' .. opt.save)
cmd:log(opt.save .. '/Log.txt', opt)
local netFilename = paths.concat(opt.save, 'Net')
local logFilename = paths.concat(opt.save,'ErrorRate.log')
local optStateFilename = paths.concat(opt.save,'optState')
local Log = optim.Logger(logFilename)
----------------------------------------------------------------------
local TensorType = 'torch.FloatTensor'
if opt.type =='cuda' then
require 'cutorch'
cutorch.setDevice(opt.devid)
cutorch.setHeapTracking(true)
model:cuda()
GLRvec=GLRvec:cuda()
clipV=clipV:cuda()
loss = loss:cuda()
TensorType = 'torch.CudaTensor'
end
if paths.filep(opt.load) then
model = torch.load(opt.load)
print('==>Loaded model from: ' .. opt.load)
print(model)
end
---Support for multiple GPUs - currently data parallel scheme
if opt.nGPU > 1 then
local net = model
model = nn.DataParallelTable(1)
for i = 1, opt.nGPU do
cutorch.setDevice(i)
model:add(net:clone():cuda(), i) -- Use the ith GPU
end
cutorch.setDevice(opt.devid)
end
-- Optimization configuration
local Weights,Gradients = model:getParameters()
----------------------------------------------------------------------
print '==> Network'
print(model)
print('==>' .. Weights:nElement() .. ' Parameters')
print '==> Loss'
print(loss)
------------------Optimization Configuration--------------------------
local optimState = {
learningRate = opt.LR,
momentum = opt.momentum,
weightDecay = opt.weightDecay,
learningRateDecay = opt.LRDecay,
GLRvec=GLRvec,
clipV=clipV
}
----------------------------------------------------------------------
local function SampleImages(images,labels)
if not opt.augment then
return images,labels
else
local sampled_imgs = images:clone()
for i=1,images:size(1) do
local sz = math.random(9) - 1
local hflip = math.random(2)==1
local startx = math.random(sz)
local starty = math.random(sz)
local img = images[i]:narrow(2,starty,32-sz):narrow(3,startx,32-sz)
if hflip then
img = image.hflip(img)
end
img = image.scale(img,32,32)
sampled_imgs[i]:copy(img)
end
return sampled_imgs,labels
end
end
------------------------------
local function Forward(Data, train)
local MiniBatch = DataProvider.Container{
Name = 'GPU_Batch',
MaxNumItems = opt.batchSize,
Source = Data,
ExtractFunction = SampleImages,
TensorType = TensorType
}
local yt = MiniBatch.Labels
local x = MiniBatch.Data
local SizeData = Data:size()
if not AllowVarBatch then SizeData = math.floor(SizeData/opt.batchSize)*opt.batchSize end
local NumSamples = 0
local NumBatches = 0
local lossVal = 0
while NumSamples < SizeData do
MiniBatch:getNextBatch()
local y, currLoss
NumSamples = NumSamples + x:size(1)
NumBatches = NumBatches + 1
if opt.nGPU > 1 then
model:syncParameters()
end
y = model:forward(x)
one_hot_yt=torch.zeros(yt:size(1),10)
one_hot_yt:scatter(2, yt:long():view(-1,1), 1)
one_hot_yt=one_hot_yt:mul(2):float():add(-1):cuda()
currLoss = loss:forward(y,one_hot_yt)
if train then
function feval()
model:zeroGradParameters()
local dE_dy = loss:backward(y, one_hot_yt)
model:backward(x, dE_dy)
return currLoss, Gradients
end
adaMax_binary_clip_shift(feval, Weights, optimState)
local indLayer=0
for i, layer in ipairs(model.modules) do
indLayer=indLayer+1;
if layer.__typename == 'cudnnBinarySpatialConvolution' then
model.modules[indLayer].weight:copy(model.modules[indLayer].weight:clamp(-1,1))
elseif layer.__typename == 'BinaryLinear' then
model.modules[indLayer].weight:copy(model.modules[indLayer].weight:clamp(-1,1))
end
end
end
lossVal = currLoss + lossVal
if type(y) == 'table' then --table results - always take first prediction
y = y[1]
end
confusion:batchAdd(y,one_hot_yt)
xlua.progress(NumSamples, SizeData)
if math.fmod(NumBatches,100)==0 then
collectgarbage()
end
end
return(lossVal/math.ceil(SizeData/opt.batchSize))
end
------------------------------
local function Train(Data)
model:training()
return Forward(Data, true)
end
local function Test(Data)
model:evaluate()
return Forward(Data, false)
end
------------------------------
local epoch = 1
print '\n==> Starting Training\n'
while epoch ~= opt.epoch do
data.TrainData:shuffleItems()
print('Epoch ' .. epoch)
--Train
confusion:zero()
local LossTrain = Train(data.TrainData)
if epoch%10==0 then
torch.save(netFilename, model)
end
confusion:updateValids()
local ErrTrain = (1-confusion.totalValid)
if #classes <= 10 then
print(confusion)
end
print('Training Error = ' .. ErrTrain)
print('Training Loss = ' .. LossTrain)
--validation
confusion:zero()
local LossValid = Test(data.ValidData)
confusion:updateValids()
local ErrValid = (1-confusion.totalValid)
if #classes <= 10 then
print(confusion)
end
print('Valid Error = ' .. ErrValid)
print('Valid Loss = ' .. LossValid)
--Test
confusion:zero()
local LossTest = Test(data.TestData)
confusion:updateValids()
local ErrTest = (1-confusion.totalValid)
if #classes <= 10 then
print(confusion)
end
print('Test Error = ' .. ErrTest)
print('Test Loss = ' .. LossTest)
Log:add{['Training Error']= ErrTrain, ['Valid Error'] = ErrValid, ['Test Error'] = ErrTest}
if opt.visualize == 1 then
Log:style{['Training Error'] = '-',['Validation Error'] = '-', ['Test Error'] = '-'}
Log:plot()
end
if epoch%20==0 then
optimState.learningRate=optimState.learningRate*0.5
else
optimState.learningRate=optimState.learningRate
end
print('-------------------LR-------------------')
print(optimState.learningRate)
epoch = epoch + 1
end
================================================
FILE: Models/BatchNormalizationShiftPow2.lua
================================================
--[[
This file implements Shift based Batch Normalization based a variant of the vanilla BN as described in the paper:
"Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Matthieu Courbariaux, Itay Hubara, Daniel Soudry, Ran El-Yaniv, Yoshua Bengio'
The code is based on nn library
--]]
local BatchNormalizationShiftPow2,parent = torch.class('BatchNormalizationShiftPow2', 'nn.Module')
function BatchNormalizationShiftPow2:__init(nOutput, runningVal, eps, momentum, affine)
parent.__init(self)
assert(nOutput and type(nOutput) == 'number',
'Missing argument #1: dimensionality of input. ')
assert(nOutput ~= 0, 'To set affine=false call BatchNormalization'
.. '(nOutput, eps, momentum, false) ')
if affine ~= nil then
assert(type(affine) == 'boolean', 'affine has to be true/false')
self.affine = affine
else
self.affine = true
end
self.eps = eps or 1e-5
self.train = true
self.momentum = momentum or 0.125
self.runningVal = runningVal or true
self.running_mean = torch.zeros(nOutput)
self.running_std = torch.ones(nOutput)
self.running_std_ap2 = torch.ones(nOutput)
if self.affine then
self.weight = torch.Tensor(nOutput)
self.weightSign = torch.Tensor(nOutput)
self.weight_ap2 = torch.Tensor(nOutput)
self.bias = torch.Tensor(nOutput)
self.gradWeight = torch.Tensor(nOutput)
self.gradBias = torch.Tensor(nOutput)
self:reset()
end
end
function BatchNormalizationShiftPow2:reset()
self.weight:fill(1)
self.bias:zero()
self.running_mean:zero()
self.running_std:fill(1)
end
function BatchNormalizationShiftPow2:updateOutput(input)
assert(input:dim() == 2, 'only mini-batch supported (2D tensor), got '
.. input:dim() .. 'D tensor instead')
local nBatch = input:size(1)
-- buffers that are reused
self.buffer = self.buffer or input.new()
self.buffer2 = self.buffer2 or input.new()
self.centered = self.centered or input.new()
self.centered:resizeAs(input)
self.centerSign = self.centerSign or input.new()
self.centerSign:resizeAs(input)
self.centeredOrg = self.centeredOrg or input.new()
self.centeredOrg:resizeAs(input)
self.std = self.std or input.new()
self.normalized = self.normalized or input.new()
self.normalized:resizeAs(input)
self.normalizedSign = self.normalizedSign or input.new()
self.normalizedSign:resizeAs(input)
self.output:resizeAs(input)
self.gradInput:resizeAs(input)
if self.train == false and self.runningVal == true then
self.output:copy(input)
self.buffer:repeatTensor(self.running_mean, nBatch, 1)
self.output:add(-1, self.buffer)
self.running_std_ap2:copy(torch.pow(2,torch.round(torch.log(self.running_std):div(math.log(2)))))
self.buffer:repeatTensor(self.running_std_ap2, nBatch, 1)
self.output:cmul(self.buffer)
else -- training mode
-- calculate mean over mini-batch
self.buffer:mean(input, 1) -- E(x) = expectation of x.
self.running_mean:mul(1 - self.momentum):add(self.momentum, self.buffer) -- add to running mean
self.buffer:repeatTensor(self.buffer, nBatch, 1)
-- subtract mean
self.centered:add(input, -1, self.buffer) -- x - E(x)
self.centeredOrg:copy(self.centered)
self.centerSign:copy(self.centered)
self.centerSign:sign()
self.centered:copy(torch.pow(2,torch.round(torch.log(self.centered:abs()):div(math.log(2))))):cmul(self.centerSign)
-- calculate standard deviation over mini-batch
self.buffer:copy(self.centered):cmul(self.centeredOrg) -- [x - E(x)]^2
-- 1 / E([x - E(x)]^2)
self.std:mean(self.buffer, 1):add(self.eps):sqrt():pow(-1)
self.running_std:mul(1 - self.momentum):add(self.momentum, self.std) -- add to running stdv
self.std:copy(torch.pow(2,torch.round(torch.log(self.std):div(math.log(2)))))
self.buffer:repeatTensor(self.std, nBatch, 1)
-- divide standard-deviation + eps
self.output:cmul(self.centeredOrg, self.buffer)
self.normalized:copy(self.output)
self.normalizedSign:copy(self.normalized)
self.normalizedSign:sign()
self.normalized:copy(torch.pow(2,torch.round(torch.log(self.normalized:abs()):div(math.log(2)))):cmul(self.normalizedSign))
--self.normalized[self.normalized:lt(0)]=1; -- Can improve results
end
if self.affine then
-- multiply with gamma and add beta
self.weightSign:copy(self.weight)
self.weightSign:sign()
self.weight_ap2:copy(torch.pow(2,torch.round(torch.log(self.weight:clone():abs()):div(math.log(2))))):cmul(self.weightSign)
--self.weight:fill(1) --Almost similar results
self.buffer:repeatTensor(self.weight_ap2, nBatch, 1)
self.output:cmul(self.buffer)
self.buffer:repeatTensor(self.bias, nBatch, 1)
self.output:add(self.buffer)
end
return self.output
end
function BatchNormalizationShiftPow2:updateGradInput(input, gradOutput)
assert(input:dim() == 2, 'only mini-batch supported')
assert(gradOutput:dim() == 2, 'only mini-batch supported')
assert(self.train == true, 'should be in training mode when self.train is true')
local nBatch = input:size(1)
self.gradInput:cmul(self.centered, gradOutput)
self.buffer:mean(self.gradInput, 1)
self.gradInput:repeatTensor(self.buffer, nBatch, 1)
self.gradInput:cmul(self.centered):mul(-1)
self.buffer:repeatTensor(self.std, nBatch, 1)
self.gradInput:cmul(self.buffer):cmul(self.buffer)
self.buffer:mean(gradOutput, 1)
self.buffer:repeatTensor(self.buffer, nBatch, 1)
self.gradInput:add(gradOutput):add(-1, self.buffer)
self.buffer:repeatTensor(self.std, nBatch, 1)
self.gradInput:cmul(self.buffer)
if self.affine then
self.buffer:repeatTensor(self.weight_ap2, nBatch, 1)
self.gradInput:cmul(self.buffer)
end
return self.gradInput
end
function BatchNormalizationShiftPow2:accGradParameters(input, gradOutput, scale)
if self.affine then
scale = scale or 1.0
self.buffer2:resizeAs(self.normalized):copy(self.normalized)
self.buffer2:cmul(gradOutput)
self.buffer:sum(self.buffer2, 1) -- sum over mini-batch
self.gradWeight:add(scale, self.buffer)
self.buffer:sum(gradOutput, 1) -- sum over mini-batch
self.gradBias:add(scale, self.buffer)
end
end
================================================
FILE: Models/BinarizedNeurons.lua
================================================
local BinarizedNeurons,parent = torch.class('BinarizedNeurons', 'nn.Module')
function BinarizedNeurons:__init(stcFlag)
parent.__init(self)
self.stcFlag = stcFlag
self.randmat=torch.Tensor();
self.outputR=torch.Tensor();
end
function BinarizedNeurons:updateOutput(input)
self.randmat:resizeAs(input);
self.outputR:resizeAs(input);
self.output:resizeAs(input);
self.outputR:copy(input):add(1):div(2)
if self.train and self.stcFlag then
local mask=self.outputR-self.randmat:rand(self.randmat:size())
self.output=mask:sign()
else
self.output:copy(self.outputR):add(-0.5):sign()
end
return self.output
end
function BinarizedNeurons:updateGradInput(input, gradOutput)
self.gradInput:resizeAs(gradOutput)
self.gradInput:copy(gradOutput) --:mul(0.5)
return self.gradInput
end
================================================
FILE: Models/BinaryLinear.lua
================================================
--require 'randomkit'
local BinaryLinear, parent = torch.class('BinaryLinear', 'nn.Linear')
function BinaryLinear:__init(inputSize, outputSize,stcWeights)
local delayedReset = self.reset
self.reset = function() end
parent.__init(self, inputSize, outputSize)
self.reset = delayedReset
self.weight = torch.Tensor(outputSize, inputSize)
self.weightB = torch.Tensor(outputSize, inputSize)
self.weightOrg = torch.Tensor(outputSize, inputSize)
self.maskStc = torch.Tensor(outputSize, inputSize)
self.randmat = torch.Tensor(outputSize, inputSize)
self.bias = torch.Tensor(outputSize)
self.gradWeight = torch.Tensor(outputSize, inputSize)
self.gradBias = torch.Tensor(outputSize)
self.stcWeights=stcWeights
self:reset()
-- should nil for serialization, the reset will still work
self.reset = nil
end
function BinaryLinear:reset(stdv)
if stdv then
stdv = stdv * math.sqrt(3)
else
stdv = 1./math.sqrt(self.weight:size(2))
end
if nn.oldSeed then
for i=1,self.weight:size(1) do
self.weight:select(1, i):apply(function()
return torch.uniform(-1, 1)
end)
self.bias[i] = torch.uniform(-stdv, stdv)
end
else
self.weight:uniform(-1, 1)
self.bias:uniform(-stdv, stdv)
end
return self
end
function BinaryLinear:binarized(trainFlag)
self.weightOrg:copy(self.weight)
self.binaryFlag = true
if not self.binaryFlag then
self.weight:copy(self.weightOrg)
else
self.weightB:copy(self.weight):add(1):div(2):clamp(0,1)
if not self.stcWeights or not trainFlag then
self.weightB:round():mul(2):add(-1)
else
self.maskStc=self.weightB-self.randmat:rand(self.randmat:size())
self.weightB:copy(self.maskStc)
end
end
return self.weightB
end
function BinaryLinear:updateOutput(input)
self.weightB = self:binarized(self.train)
self.weight:copy(self.weightB)
parent.updateOutput(self,input)
self.weight:copy(self.weightOrg);
return self.output
end
function BinaryLinear:updateGradInput(input, gradOutput)
if self.gradInput then
self.weight:copy(self.weightB)
parent.updateGradInput(self,input, gradOutput)
self.weight:copy(self.weightOrg);
return self.gradInput
end
end
function BinaryLinear:accGradParameters(input, gradOutput, scale)
parent.accGradParameters(self,input, gradOutput, scale)
end
-- we do not need to accumulate parameters when sharing
BinaryLinear.sharedAccUpdateGradParameters = BinaryLinear.accUpdateGradParameters
function BinaryLinear:__tostring__()
return torch.type(self) ..
string.format('(%d -> %d)', self.weight:size(2), self.weight:size(1))
end
================================================
FILE: Models/BinaryNet_Cifar10_Model.lua
================================================
--[[This code specify the model for CIFAR 10 dataset. This model uses the Shift based batch-normalization algorithm.
In this file we also secify the Glorot learning parameter and the which of the learnable parameter we clip ]]
require 'nn'
require './BinaryLinear.lua'
require './BinarizedNeurons'
local SpatialConvolution
local SpatialMaxPooling
if opt.type =='cuda' then
require 'cunn'
require 'cudnn'
require './cudnnBinarySpatialConvolution.lua'
SpatialConvolution = cudnnBinarySpatialConvolution
SpatialMaxPooling = cudnn.SpatialMaxPooling
else
require './BinarySpatialConvolution.lua'
SpatialConvolution = BinarySpatialConvolution
SpatialMaxPooling = nn.SpatialMaxPooling
end
if opt.SBN == true then
require './BatchNormalizationShiftPow2.lua'
require './SpatialBatchNormalizationShiftPow2.lua'
BatchNormalization = BatchNormalizationShiftPow2
SpatialBatchNormalization = SpatialBatchNormalizationShiftPow2
else
BatchNormalization = nn.BatchNormalization
SpatialBatchNormalization = nn.SpatialBatchNormalization
end
numHid=1024;
local model = nn.Sequential()
-- Convolution Layers
model:add(SpatialConvolution(3, 128, 3, 3 ,1,1,1,1,opt.stcWeights ))
model:add(SpatialBatchNormalization(128, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(SpatialConvolution(128, 128, 3, 3,1,1,1,1,opt.stcWeights ))
model:add(SpatialMaxPooling(2, 2))
model:add(SpatialBatchNormalization(128, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(SpatialConvolution(128, 256, 3, 3 ,1,1,1,1,opt.stcWeights ))
model:add(SpatialBatchNormalization(256, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(SpatialConvolution(256, 256, 3, 3 ,1,1,1,1,opt.stcWeights ))
model:add(SpatialMaxPooling(2, 2))
model:add(SpatialBatchNormalization(256, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(SpatialConvolution(256, 512, 3, 3,1,1,1,1,opt.stcWeights ))
model:add(SpatialBatchNormalization(512, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(SpatialConvolution(512, 512, 3, 3,1,1,1,1,opt.stcWeights ))
model:add(SpatialMaxPooling(2, 2))
model:add(SpatialBatchNormalization(512, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(nn.View(512*4*4))
model:add(BinaryLinear(512*4*4,numHid,opt.stcWeights))
model:add(BatchNormalization(numHid))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(BinaryLinear(numHid,numHid,opt.stcWeights))
model:add(BatchNormalization(numHid, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(BinaryLinear(numHid,10,opt.stcWeights))
model:add(nn.BatchNormalization(10))
local dE, param = model:getParameters()
local weight_size = dE:size(1)
local learningRates = torch.Tensor(weight_size):fill(0)
local clipvector = torch.Tensor(weight_size):fill(1)
local counter = 0
for i, layer in ipairs(model.modules) do
if layer.__typename == 'BinaryLinear' then
local weight_size = layer.weight:size(1)*layer.weight:size(2)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]+size_w[2]))
GLR=(math.pow(2,torch.round(math.log(GLR)/(math.log(2)))))
learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
clipvector[{{counter+1, counter+weight_size}}]:fill(1)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'BatchNormalizationShiftPow2' then
local weight_size = layer.weight:size(1)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]))
learningRates[{{counter+1, counter+weight_size}}]:fill(1)
clipvector[{{counter+1, counter+weight_size}}]:fill(0)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(1)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'nn.BatchNormalization' then
local weight_size = layer.weight:size(1)
learningRates[{{counter+1, counter+weight_size}}]:fill(1)
clipvector[{{counter+1, counter+weight_size}}]:fill(0)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(1)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'SpatialBatchNormalizationShiftPow2' then
local weight_size = layer.weight:size(1)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]))
learningRates[{{counter+1, counter+weight_size}}]:fill(1)
clipvector[{{counter+1, counter+weight_size}}]:fill(0)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(1)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'nn.SpatialBatchNormalization' then
local weight_size = layer.weight:size(1)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]))
learningRates[{{counter+1, counter+weight_size}}]:fill(1)
clipvector[{{counter+1, counter+weight_size}}]:fill(0)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(1)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'cudnnBinarySpatialConvolution' then
local size_w=layer.weight:size();
local weight_size = size_w[1]*size_w[2]*size_w[3]*size_w[4]
local filter_size=size_w[3]*size_w[4]
GLR=1/torch.sqrt(1.5/(size_w[1]*filter_size+size_w[2]*filter_size))
GLR=(math.pow(2,torch.round(math.log(GLR)/(math.log(2)))))
learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
clipvector[{{counter+1, counter+weight_size}}]:fill(1)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'BinarySpatialConvolution' then
local size_w=layer.weight:size();
local weight_size = size_w[1]*size_w[2]*size_w[3]*size_w[4]
local filter_size=size_w[3]*size_w[4]
GLR=1/torch.sqrt(1.5/(size_w[1]*filter_size+size_w[2]*filter_size))
GLR=(math.pow(2,torch.round(math.log(GLR)/(math.log(2)))))
learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
clipvector[{{counter+1, counter+weight_size}}]:fill(1)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
end
end
-- clip all parameter
clipvector:fill(1)
--
print(learningRates:eq(0):sum())
print(learningRates:ne(0):sum())
print(clipvector:ne(0):sum())
print(counter)
return {
model = model,
lrs = learningRates,
clipV =clipvector,
}
================================================
FILE: Models/BinaryNet_MNIST_Model.lua
================================================
--[[This code specify the model for MNIST dataset. This model uses the Shift based batch-normalization algorithm.
In this file we also secify the Glorot learning parameter and which of the learnable parameter we clip ]]
require 'nn'
require './BinaryLinear.lua'
require './BinarizedNeurons'
if opt.type=='cuda' then
require 'cunn'
require 'cudnn'
end
local BatchNormalization;
if opt.SBN == true then
require './BatchNormalizationShiftPow2'
BatchNormalization = BatchNormalizationShiftPow2
else
BatchNormalization = nn.BatchNormalization
end
local model = nn.Sequential()
local numHid =2048
-- Convolution Layers
model:add(nn.View(-1,784))
model:add(BinaryLinear(784,numHid))
model:add(BatchNormalization(numHid, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(BinaryLinear(numHid,numHid,opt.stcWeights))
model:add(BatchNormalization(numHid, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(BinaryLinear(numHid,numHid,opt.stcWeights))
model:add(BatchNormalization(numHid, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(BinaryLinear(numHid,10,opt.stcWeights))
model:add(nn.BatchNormalization(10))
local dE, param = model:getParameters()
local weight_size = dE:size(1)
local learningRates = torch.Tensor(weight_size):fill(0)
local clipvector = torch.Tensor(weight_size):fill(0)
local counter = 0
for i, layer in ipairs(model.modules) do
if layer.__typename == 'BinaryLinear' then
local weight_size = layer.weight:size(1)*layer.weight:size(2)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]+size_w[2]))
learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
clipvector[{{counter+1, counter+weight_size}}]:fill(1)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'BatchNormalizationShiftPow2' then
local weight_size = layer.weight:size(1)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]))
learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
clipvector[{{counter+1, counter+weight_size}}]:fill(0)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(1)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'nn.BatchNormalization' then
local weight_size = layer.weight:size(1)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]))
learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
clipvector[{{counter+1, counter+weight_size}}]:fill(0)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(1)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
end
end
print(learningRates:eq(0):sum())
print(learningRates:ne(0):sum())
print(counter)
return {
model = model,
lrs = learningRates,
clipV =clipvector,
}
================================================
FILE: Models/BinaryNet_SVHN_Model.lua
================================================
--[[This code specify the model for SVHN dataset. This model uses the Shift based batch-normalization algorithm.
In this file we also secify the Glorot learning parameter and which of the learnable parameter we clip ]]
require 'nn'
require './BinaryLinear.lua'
require './BinarizedNeurons'
local SpatialConvolution
if opt.type =='cuda' then
require 'cunn'
require 'cudnn'
require './cudnnBinarySpatialConvolution.lua'
SpatialConvolution = cudnnBinarySpatialConvolution
else
require './BinarySpatialConvolution.lua'
SpatialConvolution = BinarySpatialConvolution
end
if opt.SBN == true then
require './BatchNormalizationShiftPow2.lua'
require './SpatialBatchNormalizationShiftPow2.lua'
BatchNormalization = BatchNormalizationShiftPow2
SpatialBatchNormalization = SpatialBatchNormalizationShiftPow2
else
BatchNormalization = nn.BatchNormalization
SpatialBatchNormalization = nn.SpatialBatchNormalization
end
numHid=1024;
local model = nn.Sequential()
-- Convolution Layers
model:add(SpatialConvolution(3, 64, 3, 3 ,1,1,1,1,opt.stcWeights ))
model:add(SpatialBatchNormalization(64, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(SpatialConvolution(64, 64, 3, 3,1,1,1,1,opt.stcWeights ))
model:add(cudnn.SpatialMaxPooling(2, 2))
model:add(SpatialBatchNormalization(64, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(SpatialConvolution(64, 128, 3, 3 ,1,1,1,1,opt.stcWeights ))
model:add(SpatialBatchNormalization(128, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(SpatialConvolution(128, 128, 3, 3 ,1,1,1,1,opt.stcWeights ))
model:add(cudnn.SpatialMaxPooling(2, 2))
model:add(SpatialBatchNormalization(128, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(SpatialConvolution(128, 256, 3, 3,1,1,1,1,opt.stcWeights ))
model:add(SpatialBatchNormalization(256, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(SpatialConvolution(256, 256, 3, 3,1,1,1,1,opt.stcWeights ))
model:add(cudnn.SpatialMaxPooling(2, 2))
model:add(SpatialBatchNormalization(256, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(nn.View(256*4*4))
model:add(BinaryLinear(256*4*4,numHid,opt.stcWeights))
model:add(BatchNormalization(numHid, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(BinaryLinear(numHid,numHid,opt.stcWeights))
model:add(BatchNormalization(numHid, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(BinaryLinear(numHid,10,opt.stcWeights))
model:add(nn.BatchNormalization(10))
local dE, param = model:getParameters()
local weight_size = dE:size(1)
local learningRates = torch.Tensor(weight_size):fill(0)
local clipvector = torch.Tensor(weight_size):fill(0)
local counter = 0
for i, layer in ipairs(model.modules) do
if layer.__typename == 'BinaryLinear' then
local weight_size = layer.weight:size(1)*layer.weight:size(2)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]+size_w[2]))
GLR=(math.pow(2,torch.round(math.log(GLR)/(math.log(2)))))
learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
clipvector[{{counter+1, counter+weight_size}}]:fill(1)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'BatchNormalizationShiftPow2' then
local weight_size = layer.weight:size(1)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]))
learningRates[{{counter+1, counter+weight_size}}]:fill(1)
clipvector[{{counter+1, counter+weight_size}}]:fill(0)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(1)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'nn.BatchNormalization' then
local weight_size = layer.weight:size(1)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]))
learningRates[{{counter+1, counter+weight_size}}]:fill(1)
clipvector[{{counter+1, counter+weight_size}}]:fill(0)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(1)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'nn.SpatialBatchNormalization' then
local weight_size = layer.weight:size(1)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]))
learningRates[{{counter+1, counter+weight_size}}]:fill(1)
clipvector[{{counter+1, counter+weight_size}}]:fill(0)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(1)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'nn.SpatialBatchNormalization' then
local weight_size = layer.weight:size(1)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]))
learningRates[{{counter+1, counter+weight_size}}]:fill(1)
clipvector[{{counter+1, counter+weight_size}}]:fill(0)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(1)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'SpatialBatchNormalizationShiftPow2' then
local weight_size = layer.weight:size(1)
local size_w=layer.weight:size(); GLR=1/torch.sqrt(1.5/(size_w[1]))
learningRates[{{counter+1, counter+weight_size}}]:fill(1)
clipvector[{{counter+1, counter+weight_size}}]:fill(0)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(1)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'cudnnBinarySpatialConvolution' then
local size_w=layer.weight:size();
local weight_size = size_w[1]*size_w[2]*size_w[3]*size_w[4]
local filter_size=size_w[3]*size_w[4]
GLR=1/torch.sqrt(1.5/(size_w[1]*filter_size+size_w[2]*filter_size))
GLR=(math.pow(2,torch.round(math.log(GLR)/(math.log(2)))))
learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
clipvector[{{counter+1, counter+weight_size}}]:fill(1)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
elseif layer.__typename == 'BinarySpatialConvolution' then
local size_w=layer.weight:size();
local weight_size = size_w[1]*size_w[2]*size_w[3]*size_w[4]
local filter_size=size_w[3]*size_w[4]
GLR=1/torch.sqrt(1.5/(size_w[1]*filter_size+size_w[2]*filter_size))
GLR=(math.pow(2,torch.round(math.log(GLR)/(math.log(2)))))
learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
clipvector[{{counter+1, counter+weight_size}}]:fill(1)
counter = counter+weight_size
local bias_size = layer.bias:size(1)
learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
clipvector[{{counter+1, counter+bias_size}}]:fill(0)
counter = counter+bias_size
end
end
print(learningRates:eq(0):sum())
print(learningRates:ne(0):sum())
print(clipvector:ne(0):sum())
print(counter)
return {
model = model,
lrs = learningRates,
clipV =clipvector,
}
================================================
FILE: Models/BinarySpatialConvolution.lua
================================================
local BinarySpatialConvolution, parent = torch.class('BinarySpatialConvolution', 'nn.SpatialConvolution')
function BinarySpatialConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
local delayedReset = self.reset
self.reset = function() end
parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH)
self.reset = delayedReset
self.padW = padW or 0
self.padH = padH or 0
self.stcWeights = stcWeights or false
self.groups = groups or 1
assert(nInputPlane % self.groups == 0,
'nInputPlane should be divisible by nGroups')
assert(nOutputPlane % self.groups == 0,
'nOutputPlane should be divisible by nGroups')
self.weight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
self.weightB = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
self.weightOrg = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
self.randmat = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
self.maskStc = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
self:reset()
-- should nil for serialization, the reset will still work
self.reset = nil
self.iSize = torch.LongStorage(4):fill(0)
end
function BinarySpatialConvolution:reset(stdv)
if stdv then
stdv = stdv * math.sqrt(3)
else
stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane)
end
if nn.oldSeed then
self.weight:apply(function()
return torch.uniform(-1, 1)
end)
if self.bias then
self.bias:apply(function()
return torch.uniform(-stdv, stdv)
end)
end
else
self.weight:uniform(-1, 1)
if self.bias then
self.bias:uniform(-stdv, stdv)
end
end
end
function BinarySpatialConvolution:binarized(trainFlag)
self.weightOrg:copy(self.weight)
self.binaryFlag = true
if not self.binaryFlag then
self.weight:copy(self.weightOrg)
else
self.weightB:copy(self.weight):add(1):div(2):clamp(0,1)
if not self.stcWeights or not trainFlag then
self.weightB:round():mul(2):add(-1)
else
self.maskStc=self.weightB-self.randmat:rand(self.randmat:size())
self.weightB:copy(self.maskStc)
end
end
return self.weightB
end
local function backCompatibility(self)
self.finput = self.finput or self.weight.new()
self.fgradInput = self.fgradInput or self.weight.new()
if self.padding then
self.padW = self.padding
self.padH = self.padding
self.padding = nil
else
self.padW = self.padW or 0
self.padH = self.padH or 0
end
if self.weight:dim() == 2 then
self.weight = self.weight:view(self.nOutputPlane, self.nInputPlane, self.kH, self.kW)
end
if self.gradWeight and self.gradWeight:dim() == 2 then
self.gradWeight = self.gradWeight:view(self.nOutputPlane, self.nInputPlane, self.kH, self.kW)
end
end
local function makeContiguous(self, input, gradOutput)
if not input:isContiguous() then
self._input = self._input or input.new()
self._input:resizeAs(input):copy(input)
input = self._input
end
if gradOutput then
if not gradOutput:isContiguous() then
self._gradOutput = self._gradOutput or gradOutput.new()
self._gradOutput:resizeAs(gradOutput):copy(gradOutput)
gradOutput = self._gradOutput
end
end
return input, gradOutput
end
-- function to re-view the weight layout in a way that would make the MM ops happy
local function viewWeight(self)
self.weight = self.weight:view(self.nOutputPlane, self.nInputPlane * self.kH * self.kW)
if self.gradWeight and self.gradWeight:dim() > 0 then
self.gradWeight = self.gradWeight:view(self.nOutputPlane, self.nInputPlane * self.kH * self.kW)
end
end
local function unviewWeight(self)
self.weight = self.weight:view(self.nOutputPlane, self.nInputPlane, self.kH, self.kW)
if self.gradWeight and self.gradWeight:dim() > 0 then
self.gradWeight = self.gradWeight:view(self.nOutputPlane, self.nInputPlane, self.kH, self.kW)
end
end
function BinarySpatialConvolution:updateOutput(input)
backCompatibility(self)
viewWeight(self)
input = makeContiguous(self, input)
self.weightB = self:binarized(self.train)
self.weight:copy(self.weightB)
input.THNN.SpatialConvolutionMM_updateOutput(
input:cdata(),
self.output:cdata(),
self.weight:cdata(),
self.bias:cdata(),
self.finput:cdata(),
self.fgradInput:cdata(),
self.kW, self.kH,
self.dW, self.dH,
self.padW, self.padH
)
self.weight:copy(self.weightOrg)
unviewWeight(self)
return self.output
end
function BinarySpatialConvolution:updateGradInput(input, gradOutput)
if self.gradInput then
backCompatibility(self)
viewWeight(self)
input, gradOutput = makeContiguous(self, input, gradOutput)
self.weight:copy(self.weightB)
input.THNN.SpatialConvolutionMM_updateGradInput(
input:cdata(),
gradOutput:cdata(),
self.gradInput:cdata(),
self.weight:cdata(),
-- self.bias:cdata(), -- removed from this commit https://github.com/torch/nn/commit/651103f3aabc2dd154d6bd95ad565d14009255e6
self.finput:cdata(),
self.fgradInput:cdata(),
self.kW, self.kH,
self.dW, self.dH,
self.padW, self.padH
)
self.weight:copy(self.weightOrg)
unviewWeight(self)
return self.gradInput
end
end
function BinarySpatialConvolution:accGradParameters(input, gradOutput, scale)
scale = scale or 1
backCompatibility(self)
input, gradOutput = makeContiguous(self, input, gradOutput)
viewWeight(self)
input.THNN.SpatialConvolutionMM_accGradParameters(
input:cdata(),
gradOutput:cdata(),
self.gradWeight:cdata(),
self.gradBias:cdata(),
self.finput:cdata(),
self.fgradInput:cdata(),
self.kW, self.kH,
self.dW, self.dH,
self.padW, self.padH,
scale
)
unviewWeight(self)
end
function BinarySpatialConvolution:type(type,tensorCache)
self.finput = self.finput and torch.Tensor()
self.fgradInput = self.fgradInput and torch.Tensor()
return parent.type(self,type,tensorCache)
end
function BinarySpatialConvolution:__tostring__()
return parent.__tostring__(self)
end
function BinarySpatialConvolution:clearState()
nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput')
return parent.clearState(self)
end
================================================
FILE: Models/SpatialBatchNormalizationShiftPow2.lua
================================================
--[[
This file implements Shift based Batch Normalization based a variant of the vanilla BN as described in the paper:
"Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Matthieu Courbariaux, Itay Hubara, Daniel Soudry, Ran El-Yaniv, Yoshua Bengio'
The code is based on nn library
--]]
local SpatialBatchNormalizationShiftPow2,parent = torch.class('SpatialBatchNormalizationShiftPow2', 'nn.Module')
function SpatialBatchNormalizationShiftPow2:__init(nFeature, runningVal, eps, momentum)
parent.__init(self)
assert(nFeature and type(nFeature) == 'number',
'Missing argument #1: Number of feature planes. ' ..
'Give 0 for no affine transform')
self.eps = eps or 1e-5
self.train = true
self.momentum = momentum or 0.125
self.runningVal = runningVal or true
self.running_mean = torch.Tensor()
self.running_std = torch.Tensor()
self.running_std_ap2 = torch.Tensor()
if nFeature > 0 then self.affine = true end
if self.affine then
self.weight = torch.Tensor(nFeature)
self.weightSign = torch.Tensor(nFeature)
self.weight_ap2 = torch.Tensor(nFeature)
self.bias = torch.Tensor(nFeature)
self.gradWeight = torch.Tensor(nFeature)
self.gradBias = torch.Tensor(nFeature)
self:reset()
end
end
function SpatialBatchNormalizationShiftPow2:reset()
self.weight:fill(1)
self.bias:zero()
end
function SpatialBatchNormalizationShiftPow2:updateOutput(input)
assert(input:dim() == 4, 'only mini-batch supported (4D tensor), got '
.. input:dim() .. 'D tensor instead')
local nBatch = input:size(1)
local nFeature = input:size(2)
local iH = input:size(3)
local iW = input:size(4)
-- buffers that are reused
self.buffer = self.buffer or input.new()
self.buffer2 = self.buffer2 or input.new()
self.centered = self.centered or input.new()
self.centered:resizeAs(input)
self.centeredOrg = self.centeredOrg or input.new()
self.centeredOrg:resizeAs(input)
self.centeredSign = self.centeredSign or input.new()
self.centeredSign:resizeAs(input)
self.std = self.std or input.new()
self.normalized = self.normalized or input.new()
self.normalized:resizeAs(input)
self.normalizedSign = self.normalizedSign or input.new()
self.normalizedSign:resizeAs(input)
self.output:resizeAs(input)
self.gradInput:resizeAs(input)
if self.train == false and self.runningVal == true then
assert(self.running_mean:nDimension() ~= 0,
'Module never run on training data. First run on some training data before evaluating.')
self.output:copy(input)
self.buffer:repeatTensor(self.running_mean:view(1, nFeature, 1, 1), nBatch, 1, iH, iW)
self.output:add(-1, self.buffer)
self.running_std_ap2:copy(torch.pow(2,torch.round(torch.log(self.running_std):div(math.log(2)))))
self.buffer:repeatTensor(self.running_std_ap2:view(1, nFeature, 1, 1), nBatch, 1, iH, iW)
self.output:cmul(self.buffer)
else -- training mode
if self.running_mean:nDimension() == 0 then
self.running_mean:resize(nFeature):zero()
end
if self.running_std:nDimension() == 0 then
self.running_std:resize(nFeature):zero()
self.running_std_ap2:resize(nFeature):zero()
end
-- calculate mean over mini-batch, over feature-maps
local in_folded = input:view(nBatch, nFeature, iH * iW)
self.buffer:mean(in_folded, 1)
self.buffer2:mean(self.buffer, 3)
self.running_mean:mul(1 - self.momentum):add(self.momentum, self.buffer2) -- add to running mean
self.buffer:repeatTensor(self.buffer2:view(1, nFeature, 1, 1),
nBatch, 1, iH, iW)
-- subtract mean
self.centered:add(input, -1, self.buffer) -- x - E(x)
self.centeredOrg:copy(self.centered)
self.centeredSign:copy(self.centered)
self.centeredSign:sign()
self.centered:copy(torch.pow(2,torch.round(torch.log(self.centered:abs()):div(math.log(2))))):cmul(self.centeredSign)
-- calculate standard deviation over mini-batch
self.buffer:copy(self.centered):cmul(self.centeredOrg) --:abs()
-- calculate standard deviation over mini-batch
local buf_folded = self.buffer:view(nBatch,nFeature,iH*iW)
self.std:mean(self.buffer2:mean(buf_folded, 1), 3)
self.std:add(self.eps):sqrt():pow(-1) -- 1 / E([x - E(x)]^2)
self.running_std:mul(1 - self.momentum):add(self.momentum, self.std) -- add to running stdv
self.std:copy(torch.pow(2,torch.round(torch.log(self.std):div(math.log(2)))))
self.buffer:repeatTensor(self.std:view(1, nFeature, 1, 1),
nBatch, 1, iH, iW)
-- divide standard-deviation + eps
self.output:cmul(self.centeredOrg, self.buffer)
self.normalized:copy(self.output)
self.normalizedSign:copy(self.normalized)
self.normalizedSign:sign()
self.normalized:copy(torch.pow(2,torch.round(torch.log(self.normalized:abs()):div(math.log(2)))):cmul(self.normalizedSign))
-- self.normalized[self.normalized:lt(0)]=1; -- Can improve results
end
if self.affine then
-- multiply with gamma and add beta
self.weight_ap2:copy(self.weight)
self.weightSign:copy(self.weight):sign()
self.weight_ap2:copy(torch.pow(2,torch.round(torch.log(self.weight:clone():abs()):div(math.log(2))))):cmul(self.weightSign)
--self.weight:fill(1) --Almost similar results
self.buffer:repeatTensor(self.weight_ap2:view(1, nFeature, 1, 1),nBatch, 1, iH, iW)
self.output:cmul(self.buffer)
self.buffer:repeatTensor(self.bias:view(1, nFeature, 1, 1),
nBatch, 1, iH, iW)
self.output:add(self.buffer)
end
return self.output
end
function SpatialBatchNormalizationShiftPow2:updateGradInput(input, gradOutput)
assert(input:dim() == 4, 'only mini-batch supported')
assert(gradOutput:dim() == 4, 'only mini-batch supported')
assert(self.train == true, 'should be in training mode when self.train is true')
local nBatch = input:size(1)
local nFeature = input:size(2)
local iH = input:size(3)
local iW = input:size(4)
self.gradInput:cmul(self.centered, gradOutput)
local gi_folded = self.gradInput:view(nBatch, nFeature, iH * iW)
self.buffer2:mean(self.buffer:mean(gi_folded, 1), 3)
self.gradInput:repeatTensor(self.buffer2:view(1, nFeature, 1, 1),
nBatch, 1, iH, iW)
self.gradInput:cmul(self.centered):mul(-1)
self.buffer:repeatTensor(self.std:view(1, nFeature, 1, 1),
nBatch, 1, iH, iW)
self.gradInput:cmul(self.buffer):cmul(self.buffer)
self.buffer:mean(gradOutput:view(nBatch, nFeature, iH*iW), 1)
self.buffer2:mean(self.buffer, 3)
self.buffer:repeatTensor(self.buffer2:view(1, nFeature, 1, 1),
nBatch, 1, iH, iW)
self.gradInput:add(gradOutput):add(-1, self.buffer)
self.buffer:repeatTensor(self.std:view(1, nFeature, 1, 1),
nBatch, 1, iH, iW)
self.gradInput:cmul(self.buffer)
if self.affine then
self.buffer:repeatTensor(self.weight_ap2:view(1, nFeature, 1, 1),
nBatch, 1, iH, iW)
self.gradInput:cmul(self.buffer)
end
return self.gradInput
end
function SpatialBatchNormalizationShiftPow2:accGradParameters(input, gradOutput, scale)
if self.affine then
scale = scale or 1.0
local nBatch = input:size(1)
local nFeature = input:size(2)
local iH = input:size(3)
local iW = input:size(4)
self.buffer2:resizeAs(self.normalized):copy(self.normalized)
self.buffer2 = self.buffer2:cmul(gradOutput):view(nBatch, nFeature, iH*iW)
self.buffer:sum(self.buffer2, 1) -- sum over mini-batch
self.buffer2:sum(self.buffer, 3) -- sum over pixels
self.gradWeight:add(scale, self.buffer2)
self.buffer:sum(gradOutput:view(nBatch, nFeature, iH*iW), 1)
self.buffer2:sum(self.buffer, 3)
self.gradBias:add(scale, self.buffer2) -- sum over mini-batch
end
end
================================================
FILE: Models/cudnnBinarySpatialConvolution.lua
================================================
local cudnnBinarySpatialConvolution, parent =
torch.class('cudnnBinarySpatialConvolution', 'cudnn.SpatialConvolution')
local ffi = require 'ffi'
local errcheck = cudnn.errcheck
local autotunerCache = {}
autotunerCache[1] = {} -- forward
autotunerCache[2] = {} -- backwardFilter
autotunerCache[3] = {} -- backwardData
function cudnnBinarySpatialConvolution:__init(nInputPlane, nOutputPlane,
kW, kH, dW, dH, padW, padH,stcWeights, groups)
local delayedReset = self.reset
self.reset = function() end
parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH)
self.reset = delayedReset
self.padW = padW or 0
self.padH = padH or 0
self.groups = groups or 1
self.stcWeights = stcWeights or false
assert(nInputPlane % self.groups == 0,
'nInputPlane should be divisible by nGroups')
assert(nOutputPlane % self.groups == 0,
'nOutputPlane should be divisible by nGroups')
self.weight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kH, kW)
self.weightB = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
self.weightOrg = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
self.randmat = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
self.maskStc = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kH, kW)
self:reset()
-- should nil for serialization, the reset will still work
self.reset = nil
end
function cudnnBinarySpatialConvolution:binarized(trainFlag)
self.weightOrg:copy(self.weight)
self.binaryFlag = true
if not self.binaryFlag then
self.weight:copy(self.weightOrg)
else
self.weightB:copy(self.weight):add(1):div(2):clamp(0,1)
if not self.stcWeights or not trainFlag then
self.weightB:round():mul(2):add(-1)
--print(self.weightB)
else
self.maskStc=self.weightB-self.randmat:rand(self.randmat:size())
self.weightB:copy(self.maskStc)
end
end
return self.weightB
end
-- if you change the configuration of the module manually, call this
function cudnnBinarySpatialConvolution:resetWeightDescriptors()
assert(torch.typename(self.weight) == 'torch.CudaTensor',
'Only Cuda supported duh!')
assert(torch.typename(self.bias) == 'torch.CudaTensor' or not self.bias,
'Only Cuda supported duh!')
-- for compatibility
self.groups = self.groups or 1
-- create filterDescriptor for weight
self.weightDesc = ffi.new('struct cudnnFilterStruct*[1]')
errcheck('cudnnCreateFilterDescriptor', self.weightDesc)
local desc = torch.IntTensor({self.nOutputPlane/self.groups,
self.nInputPlane/self.groups,
self.kH, self.kW})
errcheck('cudnnSetFilterNdDescriptor', self.weightDesc[0],
'CUDNN_DATA_FLOAT', 'CUDNN_TENSOR_NCHW', 4,
desc:data());
local function destroyWDesc(d)
errcheck('cudnnDestroyFilterDescriptor', d[0]);
end
ffi.gc(self.weightDesc, destroyWDesc)
-- create descriptor for bias
if self.bias then
self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane,1,1))
end
end
function cudnnBinarySpatialConvolution:fastest(mode)
if mode == nil then mode = true end
self.fastest_mode = mode
self.iSize = self.iSize or torch.LongStorage(4)
self.iSize:fill(0)
return self
end
function cudnnBinarySpatialConvolution:setMode(fmode, bdmode, bwmode)
if fmode ~= nil then
self.fmode = fmode
end
if bdmode ~= nil then
self.bdmode = bdmode
end
if bwmode ~= nil then
self.bwmode = bwmode
end
self.iSize = self.iSize or torch.LongStorage(4)
self.iSize:fill(0)
return self
end
function cudnnBinarySpatialConvolution:resetMode()
self.fmode = nil
self.bdmode = nil
self.bwmode = nil
return self
end
function cudnnBinarySpatialConvolution:noBias()
self.bias = nil
self.gradBias = nil
return self
end
function cudnnBinarySpatialConvolution:createIODescriptors(input)
parent.createIODescriptors(self,input)
end
local one = torch.FloatTensor({1});
local zero = torch.FloatTensor({0});
local function makeContiguous(self, input, gradOutput)
if not input:isContiguous() then
self._input = self._input or input.new()
self._input:typeAs(input):resizeAs(input):copy(input)
input = self._input
end
if gradOutput and not gradOutput:isContiguous() then
self._gradOutput = self._gradOutput or gradOutput.new()
self._gradOutput:typeAs(gradOutput):resizeAs(gradOutput):copy(gradOutput)
gradOutput = self._gradOutput
end
return input, gradOutput
end
function cudnnBinarySpatialConvolution:updateOutput(input)
self.weightOrg:copy(self.weight)
self.weightB = self:binarized(self.train)
self.weight:copy(self.weightB)
parent.updateOutput(self,input)
self.weight:copy(self.weightOrg)
return self.output
end
function cudnnBinarySpatialConvolution:updateGradInput(input, gradOutput)
if not self.gradInput then return end
self.weight:copy(self.weightB)
parent.updateGradInput(self, input, gradOutput:contiguous(), scale)
self.weight:copy(self.weightOrg)
return self.gradInput
end
function cudnnBinarySpatialConvolution:accGradParameters(input, gradOutput, scale)
parent.accGradParameters(self, input, gradOutput:contiguous(), scale)
end
function cudnnBinarySpatialConvolution:clearDesc()
self.weightDesc = nil
self.biasDesc = nil
self.convDesc = nil
self.iDesc = nil
self.oDesc = nil
self.oDescForBias = nil
self.algType = nil
self.fwdAlgType = nil
self.bwdDataAlgType = nil
self.bwdFilterAlgType = nil
self.extraBuffer = nil
self.extraBufferSizeInBytes = nil
self.scaleT = nil
end
function cudnnBinarySpatialConvolution:write(f)
self:clearDesc()
local var = {}
for k,v in pairs(self) do
var[k] = v
end
f:writeObject(var)
end
function cudnnBinarySpatialConvolution:clearState()
self:clearDesc()
return nn.Module.clearState(self)
end
================================================
FILE: README.md
================================================
Deep Networks on classification tasks using Torch
=================================================
This is a complete training example for BinaryNets using Binary-Backpropagation algorithm as explained in
"Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Matthieu Courbariaux, Itay Hubara, Daniel Soudry, Ran El-Yaniv, Yoshua Bengio'
on following datasets: Cifar10/100, SVHN, MNIST
## Data
We use dp library to extract all the data please view installation section
## Dependencies
* Torch (http://torch.ch)
* "DataProvider.torch" (https://github.com/eladhoffer/DataProvider.torch) for DataProvider class.
* "cudnn.torch" (https://github.com/soumith/cudnn.torch) for faster training. Can be avoided by changing "cudnn" to "nn" in models.
* "dp" (https://github.com/nicholas-leonard/dp.git) for data extraction
* "unsup" (https://github.com/koraykv/unsup.git) for data pre-processing
To install all dependencies (assuming torch is installed) use:
```bash
luarocks install https://raw.githubusercontent.com/eladhoffer/DataProvider.torch/master/dataprovider-scm-1.rockspec
luarocks install cudnn
luarocks install dp
luarocks install unsup
```
## Training
Create pre-processing folder:
```lua
cd BinaryNet
mkdir PreProcData
```
Start training using:
```lua
th Main_BinaryNet_Cifar10.lua -network BinaryNet_Cifar10_Model
```
or,
```lua
th Main_BinaryNet_MNIST.lua -network BinaryNet_MNIST_Model
```
## Run with Docker
The Docker is built from `nvidia/cuda:8.0-cudnn5-devel` with Torch commit `0219027e6c4644a0ba5c5bf137c989a0a8c9e01b`
- To build image, run: `docker build -t binarynet:torch-gpu-cuda-8.0 -f Dockerfile/binarynet-torch-gpu-cuda-8.0 .` or to pull docker image: `docker pull hychiang/binarynet:torch-gpu-cuda-8.0`
- To launch image with gpu, run: `docker run -it --gpus all binarynet:torch-gpu-cuda-8.0`
- To train BNN with Cifar10: `th Main_BinaryNet_Cifar10.lua -network BinaryNet_Cifar10_Model`
## Additional flags
|Flag | Default Value |Description
|:----------------|:--------------------:|:----------------------------------------------
|modelsFolder | ./Models/ | Models Folder
|network | Model.lua | Model file - must return valid network.
|LR | 0.1 | learning rate
|LRDecay | 0 | learning rate decay (in # samples
|weightDecay | 1e-4 | L2 penalty on the weights
|momentum | 0.9 | momentum
|batchSize | 128 | batch size
|stcNeurons | true | using stochastic binarization for the neurons or not
|stcWeights | false | using stochastic binarization for the weights or not
|optimization | adam | optimization method
|SBN | true | use shift based batch-normalization or not
|runningVal | true | use running mean and std or not
|epoch | -1 | number of epochs to train (-1 for unbounded)
|threads | 8 | number of threads
|type | cuda | float or cuda
|devid | 1 | device ID (if using CUDA)
|load | none | load existing net weights
|save | time-identifier | save directory
|dataset | Cifar10 | Dataset - Cifar10, Cifar100, STL10, SVHN, MNIST
|dp_prepro | false | preprocessing using dp lib
|whiten | false | whiten data
|augment | false | Augment training data
|preProcDir | ./PreProcData/ | Data for pre-processing (means,Pinv,P)
================================================
FILE: SqrHingeEmbeddingCriterion.lua
================================================
--[[
This Function implement the squared hinge loss criterion
]]
local SqrtHingeEmbeddingCriterion, parent = torch.class('SqrtHingeEmbeddingCriterion', 'nn.Criterion')
function SqrtHingeEmbeddingCriterion:__init(margin)
parent.__init(self)
self.margin = margin or 1
self.sizeAverage = true
end
function SqrtHingeEmbeddingCriterion:updateOutput(input,y)
self.buffer = self.buffer or input.new()
if not torch.isTensor(y) then
self.ty = self.ty or input.new():resize(1)
self.ty[1]=y
y=self.ty
end
self.buffer:resizeAs(input):copy(input)
self.buffer:cmul(y):mul(-1):add(self.margin)
self.buffer[torch.le(self.buffer ,0)]=0
self.output=self.buffer:clone():pow(2):sum()
if (self.sizeAverage == nil or self.sizeAverage == true) then
self.output = self.output / input:nElement()
end
return self.output
end
function SqrtHingeEmbeddingCriterion:updateGradInput(input, y)
if not torch.isTensor(y) then self.ty[1]=y; y=self.ty end
self.gradInput:resizeAs(input):copy(y):mul(-2):cmul(self.buffer)
self.gradInput[torch.cmul(y,input):gt(self.margin)] = 0
if (self.sizeAverage == nil or self.sizeAverage == true) then
self.gradInput:mul(1 / input:nElement())
end
return self.gradInput
end
================================================
FILE: adaMax_binary_clip_shift.lua
================================================
--[[ An implementation of Shift based AdaMax based on http://arxiv.org/pdf/1412.6980.pdf as described the paper:
"Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Matthieu Courbariaux, Itay Hubara, Daniel Soudry, Ran El-Yaniv, Yoshua Bengio'
Note that this function perform the weight cliping as well
ARGS:
- 'opfunc' : a function that takes a single input (X), the point
of a evaluation, and returns f(X) and df/dX
- 'x' : the initial point
- 'config` : a table with configuration parameters for the optimizer
- 'config.learningRate' : learning rate
- 'config.beta1' : first moment coefficient
- 'config.beta2' : second moment coefficient
- 'config.epsilon' : for numerical stability
- 'state' : a table describing the state of the optimizer; after each
call the state is modified
RETURN:
- `x` : the new x vector
- `f(x)` : the function, evaluated before the update
]]
function adaMax_binary_clip_shift(opfunc, x, config, state)
-- (0) get/update state
local config = config or {}
local state = state or config
local lr = config.learningRate or 0.002
local GLRvec = config.GLRvec or 1
local clipV = config.clipV or 0
local beta1 = config.beta1 or 0.9
local beta2 = config.beta2 or 0.999
local epsilon = config.epsilon or 2^-27
-- (1) evaluate f(x) and df/dx
local fx, dfdx = opfunc(x)
-- Initialization
state.t = state.t or 0
-- Exponential moving average of gradient values
state.m = state.m or x.new(dfdx:size()):zero()
-- Exponential moving average of squared gradient values
state.v = state.v or x.new(dfdx:size()):zero()
-- A tmp tensor to hold the sqrt(v) + epsilon
state.denom = state.denom or x.new(dfdx:size()):zero()
state.t = state.t + 1
-- Decay the first and second moment running average coefficient
state.m:mul(beta1):add(1-beta1, dfdx)
state.v:copy( torch.cmax(state.v:mul(beta2),dfdx:abs()) )
local biasCorrection1 = 1 - beta1^state.t
local stepSize = lr/biasCorrection1 --math.sqrt(biasCorrection2)/biasCorrection1
stepSize=math.pow(2,torch.round(math.log(stepSize)/(math.log(2))))
-- (2) update x
local tmp=torch.zeros(x:size())
if opt.type == 'cuda' then
tmp=tmp:cuda()
end
state.v:copy(torch.pow(2,torch.round(torch.log(state.v):div(math.log(2)))))
state.v:add(epsilon)
tmp:addcdiv(1, state.m, state.v)
-- Multiply by Glorot learning rate vector
x:addcmul(-stepSize, tmp, GLRvec)
-- Clip to [-1,1]
x[clipV:eq(1)]=x[clipV:eq(1)]:clamp(-1,1)
-- return x*, f(x) before optimization
return x, {fx}
end
================================================
FILE: adam_binary_clip_b.lua
================================================
--[[ An implementation of Adam http://arxiv.org/pdf/1412.6980.pdf
Note that this function perform the weight cliping as well
ARGS:
- 'opfunc' : a function that takes a single input (X), the point
of a evaluation, and returns f(X) and df/dX
- 'x' : the initial point
- 'config` : a table with configuration parameters for the optimizer
- 'config.learningRate' : learning rate
- 'config.beta1' : first moment coefficient
- 'config.beta2' : second moment coefficient
- 'config.epsilon' : for numerical stability
- 'state' : a table describing the state of the optimizer; after each
call the state is modified
RETURN:
- `x` : the new x vector
- `f(x)` : the function, evaluated before the update
]]
function adam_binary_clip_b(opfunc, x, config, state)
-- (0) get/update state
local config = config or {}
local state = state or config
local lr = config.learningRate or 0.001
local GLRvec = config.GLRvec or 1
local beta1 = config.beta1 or 0.9
local beta2 = config.beta2 or 0.999
local epsilon = config.epsilon or 1e-8
-- (1) evaluate f(x) and df/dx
local fx, dfdx = opfunc(x)
--print(lr,dfdx:size())
-- Initialization
state.t = state.t or 0
-- Exponential moving average of gradient values
state.m = state.m or x.new(dfdx:size()):zero()
-- Exponential moving average of squared gradient values
state.v = state.v or x.new(dfdx:size()):zero()
-- A tmp tensor to hold the sqrt(v) + epsilon
state.denom = state.denom or x.new(dfdx:size()):zero()
state.t = state.t + 1
-- Decay the first and second moment running average coefficient
state.m:mul(beta1):add(1-beta1, dfdx)
state.v:mul(beta2):addcmul(1-beta2, dfdx, dfdx)
state.denom:copy(state.v):sqrt():add(epsilon)
local biasCorrection1 = 1 - beta1^state.t
local biasCorrection2 = 1 - beta2^state.t
local stepSize = lr * math.sqrt(biasCorrection2)/biasCorrection1
-- (2) update x
local tmp=torch.zeros(x:size())
if opt.type == 'cuda' then
tmp=tmp:cuda()
end
tmp:addcdiv(1, state.m, state.denom)
x:addcmul(-stepSize, tmp, GLRvec)
x[clipV:eq(1)]=x[clipV:eq(1)]:clamp(-1,1)
return x, {fx}
end
gitextract_kj09dah_/ ├── Data.lua ├── Dockerfile/ │ └── binarynet-torch-gpu-cuda-8.0 ├── Main_BinaryNet_Cifar10.lua ├── Main_BinaryNet_MNIST.lua ├── Main_BinaryNet_SVHN.lua ├── Models/ │ ├── BatchNormalizationShiftPow2.lua │ ├── BinarizedNeurons.lua │ ├── BinaryLinear.lua │ ├── BinaryNet_Cifar10_Model.lua │ ├── BinaryNet_MNIST_Model.lua │ ├── BinaryNet_SVHN_Model.lua │ ├── BinarySpatialConvolution.lua │ ├── SpatialBatchNormalizationShiftPow2.lua │ └── cudnnBinarySpatialConvolution.lua ├── README.md ├── SqrHingeEmbeddingCriterion.lua ├── adaMax_binary_clip_shift.lua └── adam_binary_clip_b.lua
Condensed preview — 18 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (105K chars).
[
{
"path": "Data.lua",
"chars": 9001,
"preview": "--[[\nThis code create the training test and validation datasets and preform diffrent kinds of preprocessing\nThis code is"
},
{
"path": "Dockerfile/binarynet-torch-gpu-cuda-8.0",
"chars": 1173,
"preview": "FROM nvidia/cuda:8.0-cudnn5-devel\nWORKDIR /workspace\n\n# Install dependencies\nRUN apt-get update \\\n && apt-get install -y"
},
{
"path": "Main_BinaryNet_Cifar10.lua",
"chars": 10343,
"preview": "require 'torch'\nrequire 'xlua'\nrequire 'optim'\nrequire 'gnuplot'\nrequire 'pl'\nrequire 'trepl'\nrequire 'adaMax_binary_cli"
},
{
"path": "Main_BinaryNet_MNIST.lua",
"chars": 10322,
"preview": "require 'torch'\nrequire 'xlua'\nrequire 'optim'\nrequire 'gnuplot'\nrequire 'pl'\nrequire 'trepl'\nrequire 'adaMax_binary_cli"
},
{
"path": "Main_BinaryNet_SVHN.lua",
"chars": 10360,
"preview": "require 'torch'\nrequire 'xlua'\nrequire 'optim'\nrequire 'gnuplot'\nrequire 'pl'\nrequire 'trepl'\nrequire 'adaMax_binary_cli"
},
{
"path": "Models/BatchNormalizationShiftPow2.lua",
"chars": 6454,
"preview": "--[[\n This file implements Shift based Batch Normalization based a variant of the vanilla BN as described in the paper"
},
{
"path": "Models/BinarizedNeurons.lua",
"chars": 858,
"preview": "local BinarizedNeurons,parent = torch.class('BinarizedNeurons', 'nn.Module')\n\n\nfunction BinarizedNeurons:__init(stcFlag)"
},
{
"path": "Models/BinaryLinear.lua",
"chars": 2700,
"preview": "--require 'randomkit'\n\nlocal BinaryLinear, parent = torch.class('BinaryLinear', 'nn.Linear')\n\nfunction BinaryLinear:__in"
},
{
"path": "Models/BinaryNet_Cifar10_Model.lua",
"chars": 7655,
"preview": "--[[This code specify the model for CIFAR 10 dataset. This model uses the Shift based batch-normalization algorithm.\nIn "
},
{
"path": "Models/BinaryNet_MNIST_Model.lua",
"chars": 3339,
"preview": "--[[This code specify the model for MNIST dataset. This model uses the Shift based batch-normalization algorithm.\nIn thi"
},
{
"path": "Models/BinaryNet_SVHN_Model.lua",
"chars": 8202,
"preview": "--[[This code specify the model for SVHN dataset. This model uses the Shift based batch-normalization algorithm.\nIn this"
},
{
"path": "Models/BinarySpatialConvolution.lua",
"chars": 6381,
"preview": "local BinarySpatialConvolution, parent = torch.class('BinarySpatialConvolution', 'nn.SpatialConvolution')\n\nfunction Bina"
},
{
"path": "Models/SpatialBatchNormalizationShiftPow2.lua",
"chars": 8179,
"preview": "--[[\n This file implements Shift based Batch Normalization based a variant of the vanilla BN as described in the paper"
},
{
"path": "Models/cudnnBinarySpatialConvolution.lua",
"chars": 6199,
"preview": "local cudnnBinarySpatialConvolution, parent =\n torch.class('cudnnBinarySpatialConvolution', 'cudnn.SpatialConvolution"
},
{
"path": "README.md",
"chars": 3776,
"preview": "Deep Networks on classification tasks using Torch\n=================================================\nThis is a complete t"
},
{
"path": "SqrHingeEmbeddingCriterion.lua",
"chars": 1266,
"preview": "--[[\nThis Function implement the squared hinge loss criterion\n]]\nlocal SqrtHingeEmbeddingCriterion, parent = torch.class"
},
{
"path": "adaMax_binary_clip_shift.lua",
"chars": 2774,
"preview": "--[[ An implementation of Shift based AdaMax based on http://arxiv.org/pdf/1412.6980.pdf as described the paper:\n \"Bi"
},
{
"path": "adam_binary_clip_b.lua",
"chars": 2303,
"preview": "--[[ An implementation of Adam http://arxiv.org/pdf/1412.6980.pdf\n\nNote that this function perform the weight cliping as"
}
]
About this extraction
This page contains the full source code of the itayhubara/BinaryNet GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 18 files (98.9 KB), approximately 27.4k tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.