Full Code of itayhubara/BinaryNet for AI

master c23b86285cd1 cached
18 files
98.9 KB
27.4k tokens
1 requests
Download .txt
Repository: itayhubara/BinaryNet
Branch: master
Commit: c23b86285cd1
Files: 18
Total size: 98.9 KB

Directory structure:
gitextract_kj09dah_/

├── Data.lua
├── Dockerfile/
│   └── binarynet-torch-gpu-cuda-8.0
├── Main_BinaryNet_Cifar10.lua
├── Main_BinaryNet_MNIST.lua
├── Main_BinaryNet_SVHN.lua
├── Models/
│   ├── BatchNormalizationShiftPow2.lua
│   ├── BinarizedNeurons.lua
│   ├── BinaryLinear.lua
│   ├── BinaryNet_Cifar10_Model.lua
│   ├── BinaryNet_MNIST_Model.lua
│   ├── BinaryNet_SVHN_Model.lua
│   ├── BinarySpatialConvolution.lua
│   ├── SpatialBatchNormalizationShiftPow2.lua
│   └── cudnnBinarySpatialConvolution.lua
├── README.md
├── SqrHingeEmbeddingCriterion.lua
├── adaMax_binary_clip_shift.lua
└── adam_binary_clip_b.lua

================================================
FILE CONTENTS
================================================

================================================
FILE: Data.lua
================================================
--[[
This code create the training test and validation datasets and preform diffrent kinds of preprocessing
This code is based on elad hoffer Data.lua file from ConvNet-torch library (https://github.com/eladhoffer/ConvNet-torch.git) and uses:
  - Elad Hoffer DataProvidor.torch library: https://github.com/eladhoffer/DataProvider.torch.git
  - Nicholas Leonard dp library: https://github.com/nicholas-leonard/dp.git
  - Koray Kavukcuoglu dp library: https://github.com/koraykv/unsup.git
]]
require 'dp'
local DataProvider = require 'DataProvider'
local opt = opt or {}
local Dataset = opt.dataset or 'Cifar10'
local PreProcDir = opt.preProcDir or './PreProcData/'
local Whiten = opt.whiten or false
local NormelizeWhiten = opt.NormelizeWhiten or false
local DataPath = opt.datapath or '/home/itayh/Datasets/'
local normalization = opt.normalization or 'simple'
local format = opt.format or 'rgb'
local TestData
local TrainData
local ValidData
local Classes

if Dataset =='Cifar100' then
  local file_valid = paths.concat(PreProcDir, format .. 'whiten_valid.t7')
  local file_train = paths.concat(PreProcDir, format .. 'whiten_train.t7')
  local file_test = paths.concat(PreProcDir, format .. 'whiten_test.t7')
  if (paths.filep(file_valid) and paths.filep(file_train) and paths.filep(file_test)) then
    ValidData=torch.load(file_valid)
    TrainData=torch.load(file_train)
    TestData=torch.load(file_test)
  else
    if paths.dirp(PreProcDir)==false then
     sys.execute('mkdir PreProcData/Cifar100')
    end
    input_preprocess = {}
    table.insert(input_preprocess, dp.ZCA())
    ds = dp.Cifar100{scale={0,1}, valid_ratio=0.1,input_preprocess = input_preprocess}
    ValidData = {data=ds:validSet():inputs():input():clone():float(), label=ds:validSet():targets():input():clone():byte() }
    TrainData = {data=ds:trainSet():inputs():input():float(), label=ds:trainSet():targets():input():byte() }
    TestData  = {data=ds:testSet():inputs():input():float() , label=ds:testSet():targets():input():byte()  }
    collectgarbage()
    torch.save(file_valid,ValidData)
    torch.save(file_train,TrainData)
    torch.save(file_test,TestData)
  end
elseif Dataset == 'Cifar10' then
    local file_valid = paths.concat(PreProcDir, format .. 'whiten_valid.t7')
    local file_train = paths.concat(PreProcDir, format .. 'whiten_train.t7')
    local file_test = paths.concat(PreProcDir, format .. 'whiten_test.t7')
    if (paths.filep(file_valid) and paths.filep(file_train) and paths.filep(file_test)) then
      ValidData=torch.load(file_valid)
      TrainData=torch.load(file_train)
      TestData=torch.load(file_test)
    else
      if paths.dirp(PreProcDir)==false then
       sys.execute('mkdir PreProcData/Cifar10')
      end
      input_preprocess = {}
      table.insert(input_preprocess, dp.ZCA())
      ds = dp.Cifar10{scale={0,1},valid_ratio=0.1,input_preprocess = input_preprocess} --,input_preprocess = input_preprocess}  scale={0,1},
      ValidData = {data=ds:validSet():inputs():input():float(), label=ds:validSet():targets():input():clone():byte() }
      TrainData = {data=ds:trainSet():inputs():input():float(), label=ds:trainSet():targets():input():byte() }
      TestData  = {data=ds:testSet():inputs():input():float(), label=ds:testSet():targets():input():byte()  }
      collectgarbage()
      torch.save(file_valid,ValidData)
      torch.save(file_train,TrainData)
      torch.save(file_test,TestData)
    end
    Classes = {'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'}
elseif Dataset == 'MNIST' then
  local file_valid = paths.concat(PreProcDir, format .. '_valid.t7')
  local file_train = paths.concat(PreProcDir, format .. '_train.t7')
  local file_test = paths.concat(PreProcDir, format .. '_test.t7')
  if (paths.filep(file_valid) and paths.filep(file_train) and paths.filep(file_test)) then
    ValidData=torch.load(file_valid)
    TrainData=torch.load(file_train)
    TestData=torch.load(file_test)
  else
    if paths.dirp(PreProcDir)==false then
     sys.execute('mkdir PreProcData/MNIST')
    end
    ds = dp.Mnist{scale={0,1}}
    ValidData = {data=ds:validSet():inputs():input():clone():float(), label=ds:validSet():targets():input():clone():byte() }
    TrainData = {data=ds:trainSet():inputs():input():float(), label=ds:trainSet():targets():input():byte() }
    TestData  = {data=ds:testSet():inputs():input():float() , label=ds:testSet():targets():input():byte()  }
    collectgarbage()
    torch.save(file_valid,ValidData)
    torch.save(file_train,TrainData)
    torch.save(file_test,TestData)
  end
  Classes = {1,2,3,4,5,6,7,8,9,0}
elseif Dataset == 'SVHN' then
    local LCNfile_valid = paths.concat(PreProcDir, format .. 'GCN_LCN_valid.t7')
    local LCNfile_train = paths.concat(PreProcDir, format .. 'GCN_LCN_train.t7')
    local LCNfile_test = paths.concat(PreProcDir, format .. 'GCN_LCN_test.t7')
    print(LCNfile_valid)
    if (paths.filep(LCNfile_valid) and paths.filep(LCNfile_train) and paths.filep(LCNfile_test)) then
      ValidData=torch.load(LCNfile_valid)
      TrainData=torch.load(LCNfile_train)
      TestData=torch.load(LCNfile_test)
    else
      if paths.dirp(PreProcDir)==false then
       sys.execute('mkdir PreProcData/SVHN')
      end
      local input_preprocess = {}
      table.insert(input_preprocess, dp.GCN{batch_size=5000,use_std=true,sqrt_bias=10})
      table.insert(input_preprocess, dp.LeCunLCN{kernel_size=9,divide_by_std=true,batch_size=5000,progress=true}) --,kernel_size=31,kernel_std=32})
      ds = dp.Svhn{scale={0,1}, input_preprocess = input_preprocess}
      ValidData = {data=ds:validSet():inputs():input():float(), label=ds:validSet():targets():input():byte() }; ValidData.data:div( ValidData.data:max())
      TrainData = {data=ds:trainSet():inputs():input():float(), label=ds:trainSet():targets():input():byte() }; TrainData.data:div( TrainData.data:max())
      TestData  = {data=ds:testSet():inputs():input():float(), label=ds:testSet():targets():input():byte() };  TestData.data:div( TestData.data:max())

      collectgarbage()
      torch.save(LCNfile_valid,ValidData)
      torch.save(LCNfile_train,TrainData)
      torch.save(LCNfile_test,TestData)
    end
    Classes = {1,2,3,4,5,6,7,8,9,0}
end

TrainData.data = TrainData.data:float()
TestData.data = TestData.data:float()

local TrainDataProvider = DataProvider.Container{
  Name = 'TrainingData',
  CachePrefix = nil,
  CacheFiles = false,
  Source = {TrainData.data,TrainData.label},
  MaxNumItems = 1e6,
  CopyData = false,
  TensorType = 'torch.FloatTensor',
}
local TestDataProvider = DataProvider.Container{
  Name = 'TestData',
  CachePrefix = nil,
  CacheFiles = false,
  Source = {TestData.data, TestData.label},
  MaxNumItems = 1e6,
  CopyData = false,
  TensorType = 'torch.FloatTensor',

}
local ValidDataProvider = DataProvider.Container{
  Name = 'ValidData',
  CachePrefix = nil,
  CacheFiles = false,
  Source = {ValidData.data, ValidData.label},
  MaxNumItems = 1e6,
  CopyData = false,
  TensorType = 'torch.FloatTensor',

}

--Preprocesss

  if format == 'yuv' then
    require 'image'
    TrainDataProvider:apply(image.rgb2yuv)
    TestDataProvider:apply(image.rgb2yuv)
  end
  if Whiten then
    require 'unsup'
    local meanfile = paths.concat(PreProcDir, format .. 'imageMean.t7')
    local mean, P, invP
    local Pfile = paths.concat(PreProcDir,format .. 'P.t7')
    local invPfile = paths.concat(PreProcDir,format .. 'invP.t7')

    if (paths.filep(Pfile) and paths.filep(invPfile) and paths.filep(meanfile)) then
      P = torch.load(Pfile)
      invP = torch.load(invPfile)
      mean = torch.load(meanfile)
      TrainDataProvider.Data = unsup.zca_whiten(TrainDataProvider.Data, mean, P, invP)
    else
      TrainDataProvider.Data, mean, P, invP = unsup.zca_whiten(TrainDataProvider.Data)
      torch.save(Pfile,P)
      torch.save(invPfile,invP)
      torch.save(meanfile,mean)
    end
      TestDataProvider.Data = unsup.zca_whiten(TestDataProvider.Data, mean, P, invP)
      ValidDataProvider.Data = unsup.zca_whiten(ValidDataProvider.Data, mean, P, invP)
  elseif dp_prepro then
        -- Do nothing since we use dp lib for GCN and LCN
  else
      local meanfile = paths.concat(PreProcDir, format .. normalization .. 'Mean.t7')
      local stdfile = paths.concat(PreProcDir,format .. normalization .. 'Std.t7')
      local mean, std
      local loaded = false

      if paths.filep(meanfile) and paths.filep(stdfile) then
        mean = torch.load(meanfile)
        std = torch.load(stdfile)
        loaded = true
      end

      mean, std = TrainDataProvider:normalize(normalization, mean, std)
      TestDataProvider:normalize(normalization, mean, std)
      ValidDataProvider:normalize(normalization, mean, std)
      if not loaded then
        torch.save(meanfile,mean)
        torch.save(stdfile,std)
      end
    end



return{
    TrainData = TrainDataProvider,
    TestData = TestDataProvider,
    ValidData = ValidDataProvider,
    Classes = Classes
}


================================================
FILE: Dockerfile/binarynet-torch-gpu-cuda-8.0
================================================
FROM nvidia/cuda:8.0-cudnn5-devel
WORKDIR /workspace

# Install dependencies
RUN apt-get update \
 && apt-get install -y \
    build-essential git gfortran \
    python3 python3-setuptools python3-dev \
    cmake curl wget unzip libreadline-dev libjpeg-dev libpng-dev ncurses-dev \
    imagemagick gnuplot gnuplot-x11 libssl-dev libzmq3-dev graphviz vim sudo tmux

# Install OpenBLAS
RUN apt-get -y install libopenblas-dev

# Install Torch commit no: 0219027e6c4644a0ba5c5bf137c989a0a8c9e01b
RUN git clone https://github.com/torch/distro.git torch --recursive
RUN cd torch \
 && /bin/bash install-deps \
 && ./install.sh

# get torch tutorials. comment out this line if no need
RUN git clone https://github.com/torch/tutorials.git

# Install dependency for [BinaryNet](https://github.com/itayhubara/BinaryNet)
RUN /workspace/torch/install/bin/luarocks install https://raw.githubusercontent.com/eladhoffer/DataProvider.torch/master/dataprovider-scm-1.rockspec
RUN /workspace/torch/install/bin/luarocks install cudnn 
RUN /workspace/torch/install/bin/luarocks install dp
RUN /workspace/torch/install/bin/luarocks install unsup

# copy BinaryNet into the image
ADD . BinaryNet

================================================
FILE: Main_BinaryNet_Cifar10.lua
================================================
require 'torch'
require 'xlua'
require 'optim'
require 'gnuplot'
require 'pl'
require 'trepl'
require 'adaMax_binary_clip_shift'
require 'adam_binary_clip_b'
require 'nn'
require 'SqrHingeEmbeddingCriterion'
----------------------------------------------------------------------

cmd = torch.CmdLine()
cmd:addTime()
cmd:text()
cmd:text('Training a convolutional network for visual classification')
cmd:text()
cmd:text('==>Options')

cmd:text('===>Model And Training Regime')
cmd:option('-modelsFolder',       './Models/',            'Models Folder')
cmd:option('-network',            'Model.lua',            'Model file - must return valid network.')
cmd:option('-LR',                 2^-6,                   'learning rate')
cmd:option('-LRDecay',            0,                      'learning rate decay (in # samples)')
cmd:option('-weightDecay',        0.0,                    'L2 penalty on the weights')
cmd:option('-momentum',           0.0,                    'momentum')
cmd:option('-batchSize',          200,                    'batch size')
cmd:option('-stcNeurons',         true,                   'use stochastic binarization for the neurons')
cmd:option('-stcWeights',         false,                  'use stochastic binarization for the weights')
cmd:option('-optimization',       'adam',                 'optimization method')
cmd:option('-SBN',                true,                   'shift based batch-normalization')
cmd:option('-runningVal',         false,                  'use running mean and std')
cmd:option('-epoch',              -1,                     'number of epochs to train, -1 for unbounded')

cmd:text('===>Platform Optimization')
cmd:option('-threads',            8,                      'number of threads')
cmd:option('-type',               'cuda',                 'float or cuda')
cmd:option('-devid',              1,                      'device ID (if using CUDA)')
cmd:option('-nGPU',               1,                      'num of gpu devices used')
cmd:option('-constBatchSize',     false,                  'do not allow varying batch sizes - e.g for ccn2 kernel')


cmd:text('===>Save/Load Options')
cmd:option('-load',               '',                     'load existing net weights')
cmd:option('-save',               os.date():gsub(' ',''), 'save directory')

cmd:text('===>Data Options')
cmd:option('-dataset',            'Cifar10',              'Dataset - Cifar10, Cifar100, STL10, SVHN, MNIST')
cmd:option('-normalization',      'simple',               'simple - whole sample, channel - by image channel, image - mean and std images')
cmd:option('-format',             'rgb',                  'rgb or yuv')
cmd:option('-whiten',             true,                   'whiten data')
cmd:option('-dp_prepro',          false,                   'preprocessing using dp lib')
cmd:option('-augment',            false,                  'Augment training data')
cmd:option('-preProcDir',         './PreProcData/',       'Data for pre-processing (means,P,invP)')
cmd:text('===>Misc')
cmd:option('-visualize',          0,                      'visualizing results')

torch.manualSeed(432)
opt = cmd:parse(arg or {})
opt.network = opt.modelsFolder .. paths.basename(opt.network, '.lua')
opt.save = paths.concat('./Results', opt.save)
opt.preProcDir = paths.concat(opt.preProcDir, opt.dataset .. '/')

-- If you choose to use exponentialy decaying learning rate use uncomment this line
--opt.LRDecay=torch.pow((2e-6/opt.LR),(1./500));
--
os.execute('mkdir -p ' .. opt.preProcDir)
torch.setnumthreads(opt.threads)

torch.setdefaulttensortype('torch.FloatTensor')
if opt.augment then
    require 'image'
end
----------------------------------------------------------------------
-- Model + Loss:
local modelAll = require(opt.network)
model=modelAll.model
GLRvec=modelAll.lrs
clipV=modelAll.clipV

local loss = SqrtHingeEmbeddingCriterion(1)


local data = require 'Data'
local classes = data.Classes

----------------------------------------------------------------------

-- This matrix records the current confusion across classes
local confusion = optim.ConfusionMatrix(classes)

local AllowVarBatch = not opt.constBatchSize


----------------------------------------------------------------------


-- Output files configuration
os.execute('mkdir -p ' .. opt.save)
cmd:log(opt.save .. '/Log.txt', opt)
local netFilename = paths.concat(opt.save, 'Net')
local logFilename = paths.concat(opt.save,'ErrorRate.log')
local optStateFilename = paths.concat(opt.save,'optState')
local Log = optim.Logger(logFilename)
----------------------------------------------------------------------

local TensorType = 'torch.FloatTensor'
if paths.filep(opt.load) then
    model = torch.load(opt.load)
    print('==>Loaded model from: ' .. opt.load)
    print(model)
end
if opt.type =='cuda' then
    require 'cutorch'
    cutorch.setDevice(opt.devid)
    cutorch.setHeapTracking(true)
    model:cuda()
    GLRvec=GLRvec:cuda()
    clipV=clipV:cuda()
    loss = loss:cuda()
    TensorType = 'torch.CudaTensor'
end



---Support for multiple GPUs - currently data parallel scheme
if opt.nGPU > 1 then
    local net = model
    model = nn.DataParallelTable(1)
    for i = 1, opt.nGPU do
        cutorch.setDevice(i)
        model:add(net:clone():cuda(), i)  -- Use the ith GPU
    end
    cutorch.setDevice(opt.devid)
end

-- Optimization configuration
local Weights,Gradients = model:getParameters()


----------------------------------------------------------------------
print '==> Network'
print(model)
print('==>' .. Weights:nElement() ..  ' Parameters')

print '==> Loss'
print(loss)


------------------Optimization Configuration--------------------------
local optimState = {
    learningRate = opt.LR,
    momentum = opt.momentum,
    weightDecay = opt.weightDecay,
    learningRateDecay = opt.LRDecay,
    GLRvec=GLRvec,
    clipV=clipV
}
----------------------------------------------------------------------

local function SampleImages(images,labels)
    if not opt.augment then
        return images,labels
    else

        local sampled_imgs = images:clone()
        for i=1,images:size(1) do
            local sz = math.random(9) - 1
            local hflip = math.random(2)==1

            local startx = math.random(sz)
            local starty = math.random(sz)
            local img = images[i]:narrow(2,starty,32-sz):narrow(3,startx,32-sz)
            if hflip then
                img = image.hflip(img)
            end
            img = image.scale(img,32,32)
            sampled_imgs[i]:copy(img)
        end
        return sampled_imgs,labels
    end
end


------------------------------
local function Forward(Data, train)


  local MiniBatch = DataProvider.Container{
    Name = 'GPU_Batch',
    MaxNumItems = opt.batchSize,
    Source = Data,
    ExtractFunction = SampleImages,
    TensorType = TensorType
  }

  local yt = MiniBatch.Labels
  local x = MiniBatch.Data
  local SizeData = Data:size()
  if not AllowVarBatch then SizeData = math.floor(SizeData/opt.batchSize)*opt.batchSize end

  local NumSamples = 0
  local NumBatches = 0
  local lossVal = 0

  while NumSamples < SizeData do
    MiniBatch:getNextBatch()
    local y, currLoss
    NumSamples = NumSamples + x:size(1)
    NumBatches = NumBatches + 1
    if opt.nGPU > 1 then
      model:syncParameters()
    end
    y = model:forward(x)
    one_hot_yt=torch.zeros(yt:size(1),10)
    one_hot_yt:scatter(2, yt:long():view(-1,1), 1)
    one_hot_yt=one_hot_yt:mul(2):float():add(-1)
    if opt.type == 'cuda' then
      one_hot_yt=one_hot_yt:cuda()
    end

    currLoss = loss:forward(y,one_hot_yt)
    if train then
      function feval()
        model:zeroGradParameters()
        local dE_dy = loss:backward(y, one_hot_yt)
        model:backward(x, dE_dy)
        return currLoss, Gradients
      end
       --_G.optim[opt.optimization](feval, Weights, optimState) -- If you choose to use different optimization remember to clip the weights
       adaMax_binary_clip_shift(feval, Weights, optimState)
    end

    lossVal = currLoss + lossVal

    if type(y) == 'table' then --table results - always take first prediction
      y = y[1]
    end

    confusion:batchAdd(y,one_hot_yt)
    xlua.progress(NumSamples, SizeData)
    if math.fmod(NumBatches,100)==0 then
      collectgarbage()
    end
  end
  return(lossVal/math.ceil(SizeData/opt.batchSize))
end

------------------------------
local function Train(Data)
  model:training()
  return Forward(Data, true)
end

local function Test(Data)
  model:evaluate()
  return Forward(Data, false)
end
------------------------------

local epoch = 1
print '\n==> Starting Training\n'


while epoch ~= opt.epoch do
    data.TrainData:shuffleItems()
    print('Epoch ' .. epoch)
    --Train
    confusion:zero()
    local LossTrain = Train(data.TrainData)
    if epoch%10==0 then
      torch.save(netFilename, model)
    end
    confusion:updateValids()
    local ErrTrain = (1-confusion.totalValid)
    if #classes <= 10 then
        print(confusion)
    end
    print('Training Error = ' .. ErrTrain)
    print('Training Loss = ' .. LossTrain)

    --validation
    confusion:zero()
    local LossValid = Test(data.ValidData)
    confusion:updateValids()
    local ErrValid = (1-confusion.totalValid)
    if #classes <= 10 then
        print(confusion)
    end
    print('Valid Error = ' .. ErrValid)
    print('Valid Loss = ' .. LossValid)

    --Test
    confusion:zero()
    local LossTest = Test(data.TestData)
    confusion:updateValids()
    local ErrTest = (1-confusion.totalValid)
    if #classes <= 10 then
        print(confusion)
    end

    print('Test Error = ' .. ErrTest)
    print('Test Loss = ' .. LossTest)

    Log:add{['Training Error']= ErrTrain, ['Valid Error'] = ErrValid, ['Test Error'] = ErrTest}
    -- the training stops at epoch 3 if visualize is set to 1
    if opt.visualize == 1 then
        Log:style{['Training Error'] = '-',['Validation Error'] = '-', ['Test Error'] = '-'}
        Log:plot()
    end
    --optimState.learningRate=optimState.learningRate*opt.LRDecay
    if epoch%50==0  then
      optimState.learningRate=optimState.learningRate*0.5
    else
      optimState.learningRate=optimState.learningRate --*opt.LRDecay
    end
    print('-------------------LR-------------------')
    print(optimState.learningRate)
    epoch = epoch + 1
end


================================================
FILE: Main_BinaryNet_MNIST.lua
================================================
require 'torch'
require 'xlua'
require 'optim'
require 'gnuplot'
require 'pl'
require 'trepl'
require 'adaMax_binary_clip_shift'
require 'nn'
require 'SqrHingeEmbeddingCriterion'
----------------------------------------------

cmd = torch.CmdLine()
cmd:addTime()
cmd:text()
cmd:text('Training a convolutional network for visual classification')
cmd:text()
cmd:text('==>Options')

cmd:text('===>Model And Training Regime')
cmd:option('-modelsFolder',       './Models/',            'Models Folder')
cmd:option('-network',            'Model.lua',            'Model file - must return valid network.')
cmd:option('-LR',                 2^-6,                    'learning rate')
cmd:option('-LRDecay',            0,                     'learning rate decay (in # samples)')
cmd:option('-weightDecay',        0.0,                   'L2 penalty on the weights')
cmd:option('-momentum',           0.0,                    'momentum')
cmd:option('-batchSize',          100,                    'batch size')
cmd:option('-stcNeurons',         true,                    'batch size')
cmd:option('-stcWeights',         false,                    'batch size')
cmd:option('-optimization',       'adam',                  'optimization method')
cmd:option('-SBN',                true,                   'shift based batch-normalization')
cmd:option('-runningVal',         true,                    'use running mean and std')
cmd:option('-epoch',              -1,                     'number of epochs to train, -1 for unbounded')

cmd:text('===>Platform Optimization')
cmd:option('-threads',            8,                      'number of threads')
cmd:option('-type',               'cuda',                 'float or cuda')
cmd:option('-devid',              1,                      'device ID (if using CUDA)')
cmd:option('-nGPU',               1,                      'num of gpu devices used')
cmd:option('-constBatchSize',     false,                    'do not allow varying batch sizes - e.g for ccn2 kernel')

cmd:text('===>Save/Load Options')
cmd:option('-load',               '',                  'load existing net weights')
cmd:option('-save',               os.date():gsub(' ',''), 'save directory')

cmd:text('===>Data Options')
cmd:option('-dataset',            'MNIST',              'Dataset - Cifar10, Cifar100, STL10, SVHN, MNIST')
cmd:option('-normalization',      'simple',               'simple - whole sample, channel - by image channel, image - mean and std images')
cmd:option('-format',             'rgb',                  'rgb or yuv')
cmd:option('-whiten',             false,                  'whiten data')
cmd:option('-dp_prepro',          false,                  'preprocessing using dp lib')
cmd:option('-augment',            false,                  'Augment training data')
cmd:option('-preProcDir',         './PreProcData/',       'Data for pre-processing (means,P,invP)')

cmd:text('===>Misc')
cmd:option('-visualize',          1,                      'visualizing results')

torch.manualSeed(432)
opt = cmd:parse(arg or {})
opt.network = opt.modelsFolder .. paths.basename(opt.network, '.lua')
opt.save = paths.concat('./Results', opt.save)
opt.preProcDir = paths.concat(opt.preProcDir, opt.dataset .. '/')


-- If you choose to use exponentialy decaying learning rate use uncomment this line
--opt.LRDecay=torch.pow((2e-6/opt.LR),(1./500));
--



os.execute('mk1ir -p ' .. opt.preProcDir)
torch.setnumthreads(opt.threads)

torch.setdefaulttensortype('torch.FloatTensor')
if opt.augment then
    require 'image'
end
----------------------------------------------------------------------

local modelAll = require(opt.network)
model=modelAll.model
GLRvec=modelAll.lrs
clipV=modelAll.clipV
local loss = SqrtHingeEmbeddingCriterion(1)

local data = require 'Data'
local classes = data.Classes

----------------------------------------------------------------------

-- This matrix records the current confusion across classes
local confusion = optim.ConfusionMatrix(classes)

local AllowVarBatch = not opt.constBatchSize


----------------------------------------------------------------------


-- Output files configuration
os.execute('mkdir -p ' .. opt.save)
cmd:log(opt.save .. '/Log.txt', opt)
local netFilename = paths.concat(opt.save, 'Net')
local logFilename = paths.concat(opt.save,'ErrorRate.log')
local optStateFilename = paths.concat(opt.save,'optState')
local Log = optim.Logger(logFilename)
----------------------------------------------------------------------

local TensorType = 'torch.FloatTensor'
if paths.filep(opt.load) then
    model = torch.load(opt.load)
    print('==>Loaded model from: ' .. opt.load)
    print(model)
end
if opt.type =='cuda' then
    require 'cutorch'
    cutorch.setDevice(opt.devid)
    cutorch.setHeapTracking(true)
    model:cuda()
    GLRvec=GLRvec:cuda()
    clipV=clipV:cuda()
    loss = loss:cuda()
    TensorType = 'torch.CudaTensor'
end



---Support for multiple GPUs - currently data parallel scheme
if opt.nGPU > 1 then
    local net = model
    model = nn.DataParallelTable(1)
    for i = 1, opt.nGPU do
        cutorch.setDevice(i)
        model:add(net:clone():cuda(), i)  -- Use the ith GPU
    end
    cutorch.setDevice(opt.devid)
end

-- Optimization configuration
local Weights,Gradients = model:getParameters()


----------------------------------------------------------------------
print '==> Network'
print(model)
print('==>' .. Weights:nElement() ..  ' Parameters')

print '==> Loss'
print(loss)


------------------Optimization Configuration--------------------------
local optimState = {
    learningRate = opt.LR,
    momentum = opt.momentum,
    weightDecay = opt.weightDecay,
    learningRateDecay = opt.LRDecay,
    GLRvec=GLRvec,
    clipV=clipV
}
----------------------------------------------------------------------

local function SampleImages(images,labels)
    if not opt.augment then
        return images,labels
    else

        local sampled_imgs = images:clone()
        for i=1,images:size(1) do
            local sz = math.random(9) - 1
            local hflip = math.random(2)==1

            local startx = math.random(sz)
            local starty = math.random(sz)
            local img = images[i]:narrow(2,starty,32-sz):narrow(3,startx,32-sz)
            if hflip then
                img = image.hflip(img)
            end
            img = image.scale(img,32,32)
            sampled_imgs[i]:copy(img)
        end
        return sampled_imgs,labels
    end
end


------------------------------
local function Forward(Data, train)


  local MiniBatch = DataProvider.Container{
    Name = 'GPU_Batch',
    MaxNumItems = opt.batchSize,
    Source = Data,
    ExtractFunction = SampleImages,
    TensorType = TensorType
  }

  local yt = MiniBatch.Labels
  local x = MiniBatch.Data
  local SizeData = Data:size()
  if not AllowVarBatch then SizeData = math.floor(SizeData/opt.batchSize)*opt.batchSize end

  local NumSamples = 0
  local NumBatches = 0
  local lossVal = 0

  while NumSamples < SizeData do
    MiniBatch:getNextBatch()
    local y, currLoss
    NumSamples = NumSamples + x:size(1)
    NumBatches = NumBatches + 1
    if opt.nGPU > 1 then
      model:syncParameters()
    end

    y = model:forward(x)

    one_hot_yt=torch.zeros(yt:size(1),10)
    one_hot_yt:scatter(2, yt:long():view(-1,1), 1)
    one_hot_yt=one_hot_yt:mul(2):float():add(-1):cuda()


    currLoss = loss:forward(y,one_hot_yt)
    if train then
      function feval()
        model:zeroGradParameters()
        local dE_dy = loss:backward(y, one_hot_yt)
        model:backward(x, dE_dy)
        return currLoss, Gradients
      end


       adaMax_binary_clip_shift(feval, Weights, optimState)

      local indLayer=0
      for i, layer in ipairs(model.modules) do
          indLayer=indLayer+1;
          if layer.__typename == 'cudnnBinarySpatialConvolution' then
            model.modules[indLayer].weight:clamp(-1,1)
          elseif layer.__typename == 'BinaryLinear' then
            --print(indLayer)
            model.modules[indLayer].weight:clamp(-1,1)
        end
      end
    end

    lossVal = currLoss + lossVal

    if type(y) == 'table' then --table results - always take first prediction
      y = y[1]
    end


    confusion:batchAdd(y,one_hot_yt)
    xlua.progress(NumSamples, SizeData)
    if math.fmod(NumBatches,100)==0 then
      collectgarbage()
    end
  end
  return(lossVal/math.ceil(SizeData/opt.batchSize))
end

------------------------------
local function Train(Data)
  model:training()
  return Forward(Data, true)
end

local function Test(Data)
  model:evaluate()
  return Forward(Data, false)
end
------------------------------

local epoch = 1
print '\n==> Starting Training\n'

local epoch = 1
print '\n==> Starting Training\n'

while epoch ~= opt.epoch do
    data.TrainData:shuffleItems()
    print('Epoch ' .. epoch)
    --Train
    confusion:zero()
    local LossTrain = Train(data.TrainData)
    if epoch%10==0 then
      torch.save(netFilename, model)
    end
    confusion:updateValids()
    local ErrTrain = (1-confusion.totalValid)
    if #classes <= 10 then
        print(confusion)
    end
    print('Training Error = ' .. ErrTrain)
    print('Training Loss = ' .. LossTrain)

    --validation
    confusion:zero()
    local LossValid = Test(data.ValidData)
    confusion:updateValids()
    local ErrValid = (1-confusion.totalValid)
    if #classes <= 10 then
        print(confusion)
    end
    print('Valid Error = ' .. ErrValid)
    print('Valid Loss = ' .. LossValid)

    --Test
    confusion:zero()
    local LossTest = Test(data.TestData)
    confusion:updateValids()
    local ErrTest = (1-confusion.totalValid)
    if #classes <= 10 then
        print(confusion)
    end

    print('Test Error = ' .. ErrTest)
    print('Test Loss = ' .. LossTest)

    Log:add{['Training Error']= ErrTrain, ['Valid Error'] = ErrValid, ['Test Error'] = ErrTest}
    if opt.visualize == 1 then
        Log:style{['Training Error'] = '-',['Validation Error'] = '-', ['Test Error'] = '-'}
        Log:plot()
    end
    if epoch%20==0  then
      optimState.learningRate=optimState.learningRate*0.5
    else
      optimState.learningRate=optimState.learningRate --*opt.LRDecay
    end
    print('-------------------LR-------------------')
    print(optimState.learningRate)
    epoch = epoch + 1
end


================================================
FILE: Main_BinaryNet_SVHN.lua
================================================
require 'torch'
require 'xlua'
require 'optim'
require 'gnuplot'
require 'pl'
require 'trepl'
require 'adaMax_binary_clip_shift'
require 'nn'
require 'SqrHingeEmbeddingCriterion'
----------------------------------------------------------------------

cmd = torch.CmdLine()
cmd:addTime()
cmd:text()
cmd:text('Training a convolutional network for visual classification')
cmd:text()
cmd:text('==>Options')

cmd:text('===>Model And Training Regime')
cmd:option('-modelsFolder',       './Models/',            'Models Folder')
cmd:option('-network',            'Model.lua',            'Model file - must return valid network.')
cmd:option('-LR',                 2^-7,                    'learning rate')
cmd:option('-LRDecay',            0,                     'learning rate decay (in # samples)')
cmd:option('-weightDecay',        0.0,                   'L2 penalty on the weights')
cmd:option('-momentum',           0.0,                    'momentum')
cmd:option('-batchSize',          200,                    'batch size')
cmd:option('-stcNeurons',         true,                    'batch size')
cmd:option('-stcWeights',         false,                    'batch size')
cmd:option('-optimization',       'adam',                  'optimization method')
cmd:option('-SBN',                true,                   'shift based batch-normalization')
cmd:option('-runningVal',         true,                    'use running mean and std')
cmd:option('-epoch',              -1,                     'number of epochs to train, -1 for unbounded')

cmd:text('===>Platform Optimization')
cmd:option('-threads',            8,                      'number of threads')
cmd:option('-type',               'cuda',                 'float or cuda')
cmd:option('-devid',              1,                      'device ID (if using CUDA)')
cmd:option('-nGPU',               1,                      'num of gpu devices used')
cmd:option('-constBatchSize',     false,                    'do not allow varying batch sizes - e.g for ccn2 kernel')

cmd:text('===>Save/Load Options')
cmd:option('-load',               '',                  'load existing net weights')
cmd:option('-save',               os.date():gsub(' ',''), 'save directory')

cmd:text('===>Data Options')
cmd:option('-dataset',            'SVHN',              'Dataset - Cifar10, Cifar100, STL10, SVHN, MNIST')
cmd:option('-normalization',      'simple',               'simple - whole sample, channel - by image channel, image - mean and std images')
cmd:option('-format',             'rgb',                  'rgb or yuv')
cmd:option('-whiten',             false,                  'whiten data')
cmd:option('-dp_prepro',          true,                   'preprocessing using dp lib')
cmd:option('-augment',            false,                  'Augment training data')
cmd:option('-preProcDir',         './PreProcData/',       'Data for pre-processing (means,P,invP)')

cmd:text('===>Misc')
cmd:option('-visualize',          1,                      'visualizing results')

torch.manualSeed(432)
opt = cmd:parse(arg or {})
opt.network = opt.modelsFolder .. paths.basename(opt.network, '.lua')
opt.save = paths.concat('./Results', opt.save)
opt.preProcDir = paths.concat(opt.preProcDir, opt.dataset .. '/')


-- If you choose to use exponentialy decaying learning rate use uncomment this line
--opt.LRDecay=torch.pow((2e-6/opt.LR),(1./500));
--

os.execute('mk1ir -p ' .. opt.preProcDir)
torch.setnumthreads(opt.threads)

torch.setdefaulttensortype('torch.FloatTensor')
if opt.augment then
    require 'image'
end
----------------------------------------------------------------------
-- Model + Loss:
local modelAll = require(opt.network)
model=modelAll.model
GLRvec=modelAll.lrs
clipV=modelAll.clipV

local loss = SqrtHingeEmbeddingCriterion(1) --nn.ClassNLLCriterion()
local data = require 'Data'
local classes = data.Classes

----------------------------------------------------------------------

-- This matrix records the current confusion across classes
local confusion = optim.ConfusionMatrix(classes)

local AllowVarBatch = not opt.constBatchSize


----------------------------------------------------------------------


-- Output files configuration
os.execute('mkdir -p ' .. opt.save)
cmd:log(opt.save .. '/Log.txt', opt)
local netFilename = paths.concat(opt.save, 'Net')
local logFilename = paths.concat(opt.save,'ErrorRate.log')
local optStateFilename = paths.concat(opt.save,'optState')
local Log = optim.Logger(logFilename)
----------------------------------------------------------------------

local TensorType = 'torch.FloatTensor'

if opt.type =='cuda' then
    require 'cutorch'
    cutorch.setDevice(opt.devid)
    cutorch.setHeapTracking(true)
    model:cuda()
    GLRvec=GLRvec:cuda()
    clipV=clipV:cuda()
    loss = loss:cuda()
    TensorType = 'torch.CudaTensor'
end
if paths.filep(opt.load) then
    model = torch.load(opt.load)
    print('==>Loaded model from: ' .. opt.load)
    print(model)
end


---Support for multiple GPUs - currently data parallel scheme
if opt.nGPU > 1 then
    local net = model
    model = nn.DataParallelTable(1)
    for i = 1, opt.nGPU do
        cutorch.setDevice(i)
        model:add(net:clone():cuda(), i)  -- Use the ith GPU
    end
    cutorch.setDevice(opt.devid)
end

-- Optimization configuration
local Weights,Gradients = model:getParameters()



----------------------------------------------------------------------
print '==> Network'
print(model)
print('==>' .. Weights:nElement() ..  ' Parameters')

print '==> Loss'
print(loss)


------------------Optimization Configuration--------------------------
local optimState = {
    learningRate = opt.LR,
    momentum = opt.momentum,
    weightDecay = opt.weightDecay,
    learningRateDecay = opt.LRDecay,
    GLRvec=GLRvec,
    clipV=clipV
}
----------------------------------------------------------------------

local function SampleImages(images,labels)
    if not opt.augment then
        return images,labels
    else

        local sampled_imgs = images:clone()
        for i=1,images:size(1) do
            local sz = math.random(9) - 1
            local hflip = math.random(2)==1

            local startx = math.random(sz)
            local starty = math.random(sz)
            local img = images[i]:narrow(2,starty,32-sz):narrow(3,startx,32-sz)
            if hflip then
                img = image.hflip(img)
            end
            img = image.scale(img,32,32)
            sampled_imgs[i]:copy(img)
        end
        return sampled_imgs,labels
    end
end


------------------------------
local function Forward(Data, train)


  local MiniBatch = DataProvider.Container{
    Name = 'GPU_Batch',
    MaxNumItems = opt.batchSize,
    Source = Data,
    ExtractFunction = SampleImages,
    TensorType = TensorType
  }

  local yt = MiniBatch.Labels
  local x = MiniBatch.Data
  local SizeData = Data:size()
  if not AllowVarBatch then SizeData = math.floor(SizeData/opt.batchSize)*opt.batchSize end

  local NumSamples = 0
  local NumBatches = 0
  local lossVal = 0

  while NumSamples < SizeData do
    MiniBatch:getNextBatch()
    local y, currLoss
    NumSamples = NumSamples + x:size(1)
    NumBatches = NumBatches + 1
    if opt.nGPU > 1 then
      model:syncParameters()
    end

    y = model:forward(x)
    one_hot_yt=torch.zeros(yt:size(1),10)
    one_hot_yt:scatter(2, yt:long():view(-1,1), 1)
    one_hot_yt=one_hot_yt:mul(2):float():add(-1):cuda()

    currLoss = loss:forward(y,one_hot_yt)
    if train then
      function feval()
        model:zeroGradParameters()
        local dE_dy = loss:backward(y, one_hot_yt)
        model:backward(x, dE_dy)
        return currLoss, Gradients
      end

       adaMax_binary_clip_shift(feval, Weights, optimState)

      local indLayer=0
      for i, layer in ipairs(model.modules) do
          indLayer=indLayer+1;
          if layer.__typename == 'cudnnBinarySpatialConvolution' then
            model.modules[indLayer].weight:copy(model.modules[indLayer].weight:clamp(-1,1))
          elseif layer.__typename == 'BinaryLinear' then
            model.modules[indLayer].weight:copy(model.modules[indLayer].weight:clamp(-1,1))
        end
      end
    end

    lossVal = currLoss + lossVal

    if type(y) == 'table' then --table results - always take first prediction
      y = y[1]
    end


    confusion:batchAdd(y,one_hot_yt)
    xlua.progress(NumSamples, SizeData)
    if math.fmod(NumBatches,100)==0 then
      collectgarbage()
    end
  end
  return(lossVal/math.ceil(SizeData/opt.batchSize))
end

------------------------------
local function Train(Data)
  model:training()
  return Forward(Data, true)
end

local function Test(Data)
  model:evaluate()
  return Forward(Data, false)
end
------------------------------

local epoch = 1
print '\n==> Starting Training\n'


while epoch ~= opt.epoch do
    data.TrainData:shuffleItems()
    print('Epoch ' .. epoch)
    --Train
    confusion:zero()
    local LossTrain = Train(data.TrainData)
    if epoch%10==0 then
      torch.save(netFilename, model)
    end
    confusion:updateValids()
    local ErrTrain = (1-confusion.totalValid)
    if #classes <= 10 then
        print(confusion)
    end
    print('Training Error = ' .. ErrTrain)
    print('Training Loss = ' .. LossTrain)
    --validation
    confusion:zero()
    local LossValid = Test(data.ValidData)
    confusion:updateValids()
    local ErrValid = (1-confusion.totalValid)
    if #classes <= 10 then
        print(confusion)
    end
    print('Valid Error = ' .. ErrValid)
    print('Valid Loss = ' .. LossValid)
    --Test
    confusion:zero()
    local LossTest = Test(data.TestData)
    confusion:updateValids()
    local ErrTest = (1-confusion.totalValid)
    if #classes <= 10 then
        print(confusion)
    end

    print('Test Error = ' .. ErrTest)
    print('Test Loss = ' .. LossTest)

    Log:add{['Training Error']= ErrTrain, ['Valid Error'] = ErrValid, ['Test Error'] = ErrTest}
    if opt.visualize == 1 then
        Log:style{['Training Error'] = '-',['Validation Error'] = '-', ['Test Error'] = '-'}
        Log:plot()
    end
    if epoch%20==0 then
      optimState.learningRate=optimState.learningRate*0.5
    else
      optimState.learningRate=optimState.learningRate
    end
    print('-------------------LR-------------------')
    print(optimState.learningRate)


    epoch = epoch + 1
end


================================================
FILE: Models/BatchNormalizationShiftPow2.lua
================================================
--[[
   This file implements Shift based Batch Normalization based a variant of the vanilla BN as described in the paper:
   "Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Matthieu Courbariaux, Itay Hubara, Daniel Soudry, Ran El-Yaniv, Yoshua Bengio'

   The code is based on nn library
   --]]


local BatchNormalizationShiftPow2,parent = torch.class('BatchNormalizationShiftPow2', 'nn.Module')

function BatchNormalizationShiftPow2:__init(nOutput, runningVal, eps, momentum, affine)
   parent.__init(self)
   assert(nOutput and type(nOutput) == 'number',
          'Missing argument #1: dimensionality of input. ')
   assert(nOutput ~= 0, 'To set affine=false call BatchNormalization'
     .. '(nOutput,  eps, momentum, false) ')
   if affine ~= nil then
      assert(type(affine) == 'boolean', 'affine has to be true/false')
      self.affine = affine
   else
      self.affine = true
   end
   self.eps = eps or 1e-5
   self.train = true
   self.momentum = momentum or 0.125
   self.runningVal = runningVal or true
   self.running_mean = torch.zeros(nOutput)
   self.running_std = torch.ones(nOutput)
   self.running_std_ap2 = torch.ones(nOutput)
   if self.affine then
      self.weight = torch.Tensor(nOutput)
      self.weightSign = torch.Tensor(nOutput)
      self.weight_ap2 = torch.Tensor(nOutput)
      self.bias = torch.Tensor(nOutput)
      self.gradWeight = torch.Tensor(nOutput)
      self.gradBias = torch.Tensor(nOutput)
      self:reset()
   end
end

function BatchNormalizationShiftPow2:reset()
   self.weight:fill(1)
   self.bias:zero()
   self.running_mean:zero()
   self.running_std:fill(1)
end

function BatchNormalizationShiftPow2:updateOutput(input)
   assert(input:dim() == 2, 'only mini-batch supported (2D tensor), got '
             .. input:dim() .. 'D tensor instead')
   local nBatch = input:size(1)
   -- buffers that are reused
   self.buffer = self.buffer or input.new()
   self.buffer2 = self.buffer2 or input.new()
   self.centered = self.centered or input.new()
   self.centered:resizeAs(input)
   self.centerSign = self.centerSign or input.new()
   self.centerSign:resizeAs(input)
   self.centeredOrg = self.centeredOrg or input.new()
   self.centeredOrg:resizeAs(input)
   self.std = self.std or input.new()
   self.normalized = self.normalized or input.new()
   self.normalized:resizeAs(input)
   self.normalizedSign = self.normalizedSign or input.new()
   self.normalizedSign:resizeAs(input)
   self.output:resizeAs(input)
   self.gradInput:resizeAs(input)
   if self.train == false and self.runningVal == true then
     self.output:copy(input)
     self.buffer:repeatTensor(self.running_mean, nBatch, 1)
     self.output:add(-1, self.buffer)
     self.running_std_ap2:copy(torch.pow(2,torch.round(torch.log(self.running_std):div(math.log(2)))))
     self.buffer:repeatTensor(self.running_std_ap2, nBatch, 1)
     self.output:cmul(self.buffer)
   else -- training mode
      -- calculate mean over mini-batch
      self.buffer:mean(input, 1)                        -- E(x) = expectation of x.
      self.running_mean:mul(1 - self.momentum):add(self.momentum, self.buffer) -- add to running mean
      self.buffer:repeatTensor(self.buffer, nBatch, 1)

      -- subtract mean
      self.centered:add(input, -1, self.buffer)         -- x - E(x)
      self.centeredOrg:copy(self.centered)
      self.centerSign:copy(self.centered)
      self.centerSign:sign()
      self.centered:copy(torch.pow(2,torch.round(torch.log(self.centered:abs()):div(math.log(2))))):cmul(self.centerSign)
      -- calculate standard deviation over mini-batch
      self.buffer:copy(self.centered):cmul(self.centeredOrg) -- [x - E(x)]^2
      -- 1 / E([x - E(x)]^2)
      self.std:mean(self.buffer, 1):add(self.eps):sqrt():pow(-1)
      self.running_std:mul(1 - self.momentum):add(self.momentum, self.std) -- add to running stdv
      self.std:copy(torch.pow(2,torch.round(torch.log(self.std):div(math.log(2)))))
      self.buffer:repeatTensor(self.std, nBatch, 1)

      -- divide standard-deviation + eps

      self.output:cmul(self.centeredOrg, self.buffer)
      self.normalized:copy(self.output)
      self.normalizedSign:copy(self.normalized)
      self.normalizedSign:sign()

      self.normalized:copy(torch.pow(2,torch.round(torch.log(self.normalized:abs()):div(math.log(2)))):cmul(self.normalizedSign))
      --self.normalized[self.normalized:lt(0)]=1; -- Can improve results
   end

   if self.affine then
      -- multiply with gamma and add beta
      self.weightSign:copy(self.weight)
      self.weightSign:sign()
      self.weight_ap2:copy(torch.pow(2,torch.round(torch.log(self.weight:clone():abs()):div(math.log(2))))):cmul(self.weightSign)
      --self.weight:fill(1) --Almost similar results
      self.buffer:repeatTensor(self.weight_ap2, nBatch, 1)
      self.output:cmul(self.buffer)
      self.buffer:repeatTensor(self.bias, nBatch, 1)
      self.output:add(self.buffer)
   end
   return self.output
end

function BatchNormalizationShiftPow2:updateGradInput(input, gradOutput)
   assert(input:dim() == 2, 'only mini-batch supported')
   assert(gradOutput:dim() == 2, 'only mini-batch supported')
   assert(self.train == true, 'should be in training mode when self.train is true')
   local nBatch = input:size(1)

   self.gradInput:cmul(self.centered, gradOutput)
   self.buffer:mean(self.gradInput, 1)
   self.gradInput:repeatTensor(self.buffer, nBatch, 1)
   self.gradInput:cmul(self.centered):mul(-1)
   self.buffer:repeatTensor(self.std, nBatch, 1)
   self.gradInput:cmul(self.buffer):cmul(self.buffer)

   self.buffer:mean(gradOutput, 1)
   self.buffer:repeatTensor(self.buffer, nBatch, 1)
   self.gradInput:add(gradOutput):add(-1, self.buffer)
   self.buffer:repeatTensor(self.std, nBatch, 1)
   self.gradInput:cmul(self.buffer)

   if self.affine then
      self.buffer:repeatTensor(self.weight_ap2, nBatch, 1)
      self.gradInput:cmul(self.buffer)
   end

   return self.gradInput
end

function BatchNormalizationShiftPow2:accGradParameters(input, gradOutput, scale)
   if self.affine then
      scale = scale or 1.0
      self.buffer2:resizeAs(self.normalized):copy(self.normalized)
      self.buffer2:cmul(gradOutput)
      self.buffer:sum(self.buffer2, 1) -- sum over mini-batch
      self.gradWeight:add(scale, self.buffer)
      self.buffer:sum(gradOutput, 1) -- sum over mini-batch
      self.gradBias:add(scale, self.buffer)
   end
end


================================================
FILE: Models/BinarizedNeurons.lua
================================================
local BinarizedNeurons,parent = torch.class('BinarizedNeurons', 'nn.Module')


function BinarizedNeurons:__init(stcFlag)
   parent.__init(self)
   self.stcFlag = stcFlag
   self.randmat=torch.Tensor();
   self.outputR=torch.Tensor();
 end
function BinarizedNeurons:updateOutput(input)
    self.randmat:resizeAs(input);
    self.outputR:resizeAs(input);
    self.output:resizeAs(input);
    self.outputR:copy(input):add(1):div(2)
     if self.train and self.stcFlag then
       local mask=self.outputR-self.randmat:rand(self.randmat:size())
       self.output=mask:sign()
     else
       self.output:copy(self.outputR):add(-0.5):sign()
     end
   return self.output
end

function BinarizedNeurons:updateGradInput(input, gradOutput)
        self.gradInput:resizeAs(gradOutput)
        self.gradInput:copy(gradOutput) --:mul(0.5)
   return self.gradInput
end


================================================
FILE: Models/BinaryLinear.lua
================================================
--require 'randomkit'

local BinaryLinear, parent = torch.class('BinaryLinear', 'nn.Linear')

function BinaryLinear:__init(inputSize, outputSize,stcWeights)
   local delayedReset = self.reset
   self.reset = function() end
   parent.__init(self, inputSize, outputSize)
   self.reset = delayedReset

   self.weight = torch.Tensor(outputSize, inputSize)
   self.weightB = torch.Tensor(outputSize, inputSize)
   self.weightOrg = torch.Tensor(outputSize, inputSize)
   self.maskStc = torch.Tensor(outputSize, inputSize)
   self.randmat = torch.Tensor(outputSize, inputSize)
   self.bias = torch.Tensor(outputSize)
   self.gradWeight = torch.Tensor(outputSize, inputSize)
   self.gradBias = torch.Tensor(outputSize)
   self.stcWeights=stcWeights
   self:reset()
   -- should nil for serialization, the reset will still work
   self.reset = nil
end

function BinaryLinear:reset(stdv)
   if stdv then
      stdv = stdv * math.sqrt(3)
   else
      stdv = 1./math.sqrt(self.weight:size(2))
   end
   if nn.oldSeed then
      for i=1,self.weight:size(1) do
         self.weight:select(1, i):apply(function()
            return torch.uniform(-1, 1)
         end)
         self.bias[i] = torch.uniform(-stdv, stdv)
      end
   else
      self.weight:uniform(-1, 1)
      self.bias:uniform(-stdv, stdv)
   end

   return self
end

function BinaryLinear:binarized(trainFlag)
  self.weightOrg:copy(self.weight)
  self.binaryFlag = true
  if not self.binaryFlag then
    self.weight:copy(self.weightOrg)
  else
    self.weightB:copy(self.weight):add(1):div(2):clamp(0,1)

    if not self.stcWeights or not trainFlag then
      self.weightB:round():mul(2):add(-1)
    else
      self.maskStc=self.weightB-self.randmat:rand(self.randmat:size())
      self.weightB:copy(self.maskStc)

    end
  end

  return  self.weightB
end

function BinaryLinear:updateOutput(input)

  self.weightB = self:binarized(self.train)
  self.weight:copy(self.weightB)
   parent.updateOutput(self,input)
   self.weight:copy(self.weightOrg);
   return self.output
end

function BinaryLinear:updateGradInput(input, gradOutput)

   if self.gradInput then
      self.weight:copy(self.weightB)
      parent.updateGradInput(self,input, gradOutput)
      self.weight:copy(self.weightOrg);
      return self.gradInput
   end

end

function BinaryLinear:accGradParameters(input, gradOutput, scale)
  parent.accGradParameters(self,input, gradOutput, scale)
end

-- we do not need to accumulate parameters when sharing
BinaryLinear.sharedAccUpdateGradParameters = BinaryLinear.accUpdateGradParameters


function BinaryLinear:__tostring__()
  return torch.type(self) ..
      string.format('(%d -> %d)', self.weight:size(2), self.weight:size(1))
end


================================================
FILE: Models/BinaryNet_Cifar10_Model.lua
================================================
--[[This code specify the model for CIFAR 10 dataset. This model uses the Shift based batch-normalization algorithm.
In this file we also secify the Glorot learning parameter and the which of the learnable parameter we clip ]]
require 'nn'
require './BinaryLinear.lua'
require './BinarizedNeurons'

local SpatialConvolution
local SpatialMaxPooling
if opt.type =='cuda' then
  require 'cunn'
  require 'cudnn'
  require './cudnnBinarySpatialConvolution.lua'
  SpatialConvolution = cudnnBinarySpatialConvolution
  SpatialMaxPooling = cudnn.SpatialMaxPooling
else
  require './BinarySpatialConvolution.lua'
  SpatialConvolution = BinarySpatialConvolution
  SpatialMaxPooling = nn.SpatialMaxPooling
end
if opt.SBN == true then
  require './BatchNormalizationShiftPow2.lua'
  require './SpatialBatchNormalizationShiftPow2.lua'
  BatchNormalization = BatchNormalizationShiftPow2
  SpatialBatchNormalization = SpatialBatchNormalizationShiftPow2
else
  BatchNormalization = nn.BatchNormalization
  SpatialBatchNormalization = nn.SpatialBatchNormalization
end
numHid=1024;
local model = nn.Sequential()

-- Convolution Layers
model:add(SpatialConvolution(3, 128, 3, 3 ,1,1,1,1,opt.stcWeights ))
model:add(SpatialBatchNormalization(128, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(SpatialConvolution(128, 128, 3, 3,1,1,1,1,opt.stcWeights ))
model:add(SpatialMaxPooling(2, 2))
model:add(SpatialBatchNormalization(128, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(SpatialConvolution(128, 256, 3, 3 ,1,1,1,1,opt.stcWeights ))
model:add(SpatialBatchNormalization(256, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(SpatialConvolution(256, 256, 3, 3 ,1,1,1,1,opt.stcWeights ))
model:add(SpatialMaxPooling(2, 2))
model:add(SpatialBatchNormalization(256, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(SpatialConvolution(256, 512, 3, 3,1,1,1,1,opt.stcWeights ))
model:add(SpatialBatchNormalization(512, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(SpatialConvolution(512, 512, 3, 3,1,1,1,1,opt.stcWeights ))
model:add(SpatialMaxPooling(2, 2))
model:add(SpatialBatchNormalization(512, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(nn.View(512*4*4))
model:add(BinaryLinear(512*4*4,numHid,opt.stcWeights))
model:add(BatchNormalization(numHid))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(BinaryLinear(numHid,numHid,opt.stcWeights))
model:add(BatchNormalization(numHid, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(BinaryLinear(numHid,10,opt.stcWeights))
model:add(nn.BatchNormalization(10))

local dE, param = model:getParameters()
local weight_size = dE:size(1)
local learningRates = torch.Tensor(weight_size):fill(0)
local clipvector = torch.Tensor(weight_size):fill(1)
local counter = 0
for i, layer in ipairs(model.modules) do
   if layer.__typename == 'BinaryLinear' then
      local weight_size = layer.weight:size(1)*layer.weight:size(2)
      local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]+size_w[2]))
      GLR=(math.pow(2,torch.round(math.log(GLR)/(math.log(2)))))
      learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
      clipvector[{{counter+1, counter+weight_size}}]:fill(1)
      counter = counter+weight_size
      local bias_size = layer.bias:size(1)
      learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
      clipvector[{{counter+1, counter+bias_size}}]:fill(0)
      counter = counter+bias_size
    elseif layer.__typename == 'BatchNormalizationShiftPow2' then
        local weight_size = layer.weight:size(1)
        local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]))
        learningRates[{{counter+1, counter+weight_size}}]:fill(1)
        clipvector[{{counter+1, counter+weight_size}}]:fill(0)
        counter = counter+weight_size
        local bias_size = layer.bias:size(1)
        learningRates[{{counter+1, counter+bias_size}}]:fill(1)
        clipvector[{{counter+1, counter+bias_size}}]:fill(0)
        counter = counter+bias_size
    elseif layer.__typename == 'nn.BatchNormalization' then
      local weight_size = layer.weight:size(1)
      learningRates[{{counter+1, counter+weight_size}}]:fill(1)
      clipvector[{{counter+1, counter+weight_size}}]:fill(0)
      counter = counter+weight_size
      local bias_size = layer.bias:size(1)
      learningRates[{{counter+1, counter+bias_size}}]:fill(1)
      clipvector[{{counter+1, counter+bias_size}}]:fill(0)
      counter = counter+bias_size
    elseif layer.__typename == 'SpatialBatchNormalizationShiftPow2' then
        local weight_size = layer.weight:size(1)
        local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]))
        learningRates[{{counter+1, counter+weight_size}}]:fill(1)
        clipvector[{{counter+1, counter+weight_size}}]:fill(0)
        counter = counter+weight_size
        local bias_size = layer.bias:size(1)
        learningRates[{{counter+1, counter+bias_size}}]:fill(1)
        clipvector[{{counter+1, counter+bias_size}}]:fill(0)
        counter = counter+bias_size
    elseif layer.__typename == 'nn.SpatialBatchNormalization' then
            local weight_size = layer.weight:size(1)
            local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]))
            learningRates[{{counter+1, counter+weight_size}}]:fill(1)
            clipvector[{{counter+1, counter+weight_size}}]:fill(0)
            counter = counter+weight_size
            local bias_size = layer.bias:size(1)
            learningRates[{{counter+1, counter+bias_size}}]:fill(1)
            clipvector[{{counter+1, counter+bias_size}}]:fill(0)
            counter = counter+bias_size
    elseif layer.__typename == 'cudnnBinarySpatialConvolution' then
      local size_w=layer.weight:size();
      local weight_size = size_w[1]*size_w[2]*size_w[3]*size_w[4]

      local filter_size=size_w[3]*size_w[4]
      GLR=1/torch.sqrt(1.5/(size_w[1]*filter_size+size_w[2]*filter_size))
      GLR=(math.pow(2,torch.round(math.log(GLR)/(math.log(2)))))
      learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
      clipvector[{{counter+1, counter+weight_size}}]:fill(1)
      counter = counter+weight_size
      local bias_size = layer.bias:size(1)
      learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
      clipvector[{{counter+1, counter+bias_size}}]:fill(0)
      counter = counter+bias_size
      elseif layer.__typename == 'BinarySpatialConvolution' then
        local size_w=layer.weight:size();
        local weight_size = size_w[1]*size_w[2]*size_w[3]*size_w[4]

        local filter_size=size_w[3]*size_w[4]
        GLR=1/torch.sqrt(1.5/(size_w[1]*filter_size+size_w[2]*filter_size))
        GLR=(math.pow(2,torch.round(math.log(GLR)/(math.log(2)))))
        learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
        clipvector[{{counter+1, counter+weight_size}}]:fill(1)
        counter = counter+weight_size
        local bias_size = layer.bias:size(1)
        learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
        clipvector[{{counter+1, counter+bias_size}}]:fill(0)
        counter = counter+bias_size

  end
end
-- clip all parameter
clipvector:fill(1)
--
print(learningRates:eq(0):sum())
print(learningRates:ne(0):sum())
print(clipvector:ne(0):sum())
print(counter)
return {
     model = model,
     lrs = learningRates,
     clipV =clipvector,
  }


================================================
FILE: Models/BinaryNet_MNIST_Model.lua
================================================
--[[This code specify the model for MNIST dataset. This model uses the Shift based batch-normalization algorithm.
In this file we also secify the Glorot learning parameter and which of the learnable parameter we clip ]]
require 'nn'
require './BinaryLinear.lua'

require './BinarizedNeurons'
if opt.type=='cuda' then
  require 'cunn'
  require 'cudnn'
end

local BatchNormalization;
if opt.SBN == true then
  require './BatchNormalizationShiftPow2'
  BatchNormalization = BatchNormalizationShiftPow2
else
  BatchNormalization = nn.BatchNormalization
end

local model = nn.Sequential()
local numHid =2048
-- Convolution Layers
model:add(nn.View(-1,784))

model:add(BinaryLinear(784,numHid))
model:add(BatchNormalization(numHid, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(BinaryLinear(numHid,numHid,opt.stcWeights))
model:add(BatchNormalization(numHid, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(BinaryLinear(numHid,numHid,opt.stcWeights))
model:add(BatchNormalization(numHid, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))
model:add(BinaryLinear(numHid,10,opt.stcWeights))
model:add(nn.BatchNormalization(10))



local dE, param = model:getParameters()
local weight_size = dE:size(1)
local learningRates = torch.Tensor(weight_size):fill(0)
local clipvector = torch.Tensor(weight_size):fill(0)

local counter = 0
for i, layer in ipairs(model.modules) do
   if layer.__typename == 'BinaryLinear' then
      local weight_size = layer.weight:size(1)*layer.weight:size(2)
      local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]+size_w[2]))
      learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
      clipvector[{{counter+1, counter+weight_size}}]:fill(1)
      counter = counter+weight_size
      local bias_size = layer.bias:size(1)
      learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
      clipvector[{{counter+1, counter+bias_size}}]:fill(0)
      counter = counter+bias_size
    elseif layer.__typename == 'BatchNormalizationShiftPow2' then
        local weight_size = layer.weight:size(1)
        local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]))
        learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
        clipvector[{{counter+1, counter+weight_size}}]:fill(0)
        counter = counter+weight_size
        local bias_size = layer.bias:size(1)
        learningRates[{{counter+1, counter+bias_size}}]:fill(1)
        clipvector[{{counter+1, counter+bias_size}}]:fill(0)
        counter = counter+bias_size
    elseif layer.__typename == 'nn.BatchNormalization' then
      local weight_size = layer.weight:size(1)
      local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]))
      learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
      clipvector[{{counter+1, counter+weight_size}}]:fill(0)
      counter = counter+weight_size
      local bias_size = layer.bias:size(1)
      learningRates[{{counter+1, counter+bias_size}}]:fill(1)
      clipvector[{{counter+1, counter+bias_size}}]:fill(0)
      counter = counter+bias_size
  end
end
print(learningRates:eq(0):sum())
print(learningRates:ne(0):sum())
print(counter)

return {
   model = model,
   lrs = learningRates,
   clipV =clipvector,
}


================================================
FILE: Models/BinaryNet_SVHN_Model.lua
================================================
--[[This code specify the model for SVHN dataset. This model uses the Shift based batch-normalization algorithm.
In this file we also secify the Glorot learning parameter and which of the learnable parameter we clip ]]
require 'nn'
require './BinaryLinear.lua'
require './BinarizedNeurons'

local SpatialConvolution
if opt.type =='cuda' then
  require 'cunn'
  require 'cudnn'
  require './cudnnBinarySpatialConvolution.lua'
  SpatialConvolution = cudnnBinarySpatialConvolution
else
  require './BinarySpatialConvolution.lua'
  SpatialConvolution = BinarySpatialConvolution
end
if opt.SBN == true then
  require './BatchNormalizationShiftPow2.lua'
  require './SpatialBatchNormalizationShiftPow2.lua'
  BatchNormalization = BatchNormalizationShiftPow2
  SpatialBatchNormalization = SpatialBatchNormalizationShiftPow2
else
  BatchNormalization = nn.BatchNormalization
  SpatialBatchNormalization = nn.SpatialBatchNormalization
end


numHid=1024;
local model = nn.Sequential()

-- Convolution Layers
model:add(SpatialConvolution(3, 64, 3, 3 ,1,1,1,1,opt.stcWeights ))
model:add(SpatialBatchNormalization(64, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(SpatialConvolution(64, 64, 3, 3,1,1,1,1,opt.stcWeights ))
model:add(cudnn.SpatialMaxPooling(2, 2))
model:add(SpatialBatchNormalization(64, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))


model:add(SpatialConvolution(64, 128, 3, 3 ,1,1,1,1,opt.stcWeights ))
model:add(SpatialBatchNormalization(128, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(SpatialConvolution(128, 128, 3, 3 ,1,1,1,1,opt.stcWeights ))
model:add(cudnn.SpatialMaxPooling(2, 2))
model:add(SpatialBatchNormalization(128, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))


model:add(SpatialConvolution(128, 256, 3, 3,1,1,1,1,opt.stcWeights ))
model:add(SpatialBatchNormalization(256, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(SpatialConvolution(256, 256, 3, 3,1,1,1,1,opt.stcWeights ))
model:add(cudnn.SpatialMaxPooling(2, 2))
model:add(SpatialBatchNormalization(256, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(nn.View(256*4*4))

model:add(BinaryLinear(256*4*4,numHid,opt.stcWeights))
model:add(BatchNormalization(numHid, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))

model:add(BinaryLinear(numHid,numHid,opt.stcWeights))
model:add(BatchNormalization(numHid, opt.runningVal))
model:add(nn.HardTanh())
model:add(BinarizedNeurons(opt.stcNeurons))


model:add(BinaryLinear(numHid,10,opt.stcWeights))
model:add(nn.BatchNormalization(10))

local dE, param = model:getParameters()
local weight_size = dE:size(1)
local learningRates = torch.Tensor(weight_size):fill(0)
local clipvector = torch.Tensor(weight_size):fill(0)
local counter = 0
for i, layer in ipairs(model.modules) do
   if layer.__typename == 'BinaryLinear' then
      local weight_size = layer.weight:size(1)*layer.weight:size(2)
      local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]+size_w[2]))
      GLR=(math.pow(2,torch.round(math.log(GLR)/(math.log(2)))))
      learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
      clipvector[{{counter+1, counter+weight_size}}]:fill(1)
      counter = counter+weight_size
      local bias_size = layer.bias:size(1)
      learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
      clipvector[{{counter+1, counter+bias_size}}]:fill(0)
      counter = counter+bias_size
    elseif layer.__typename == 'BatchNormalizationShiftPow2' then
        local weight_size = layer.weight:size(1)
        local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]))
        learningRates[{{counter+1, counter+weight_size}}]:fill(1)
        clipvector[{{counter+1, counter+weight_size}}]:fill(0)
        counter = counter+weight_size
        local bias_size = layer.bias:size(1)
        learningRates[{{counter+1, counter+bias_size}}]:fill(1)
        clipvector[{{counter+1, counter+bias_size}}]:fill(0)
        counter = counter+bias_size
    elseif layer.__typename == 'nn.BatchNormalization' then
      local weight_size = layer.weight:size(1)
      local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]))
      learningRates[{{counter+1, counter+weight_size}}]:fill(1)
      clipvector[{{counter+1, counter+weight_size}}]:fill(0)
      counter = counter+weight_size
      local bias_size = layer.bias:size(1)
      learningRates[{{counter+1, counter+bias_size}}]:fill(1)
      clipvector[{{counter+1, counter+bias_size}}]:fill(0)
      counter = counter+bias_size
    elseif layer.__typename == 'nn.SpatialBatchNormalization' then
        local weight_size = layer.weight:size(1)
        local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]))
        learningRates[{{counter+1, counter+weight_size}}]:fill(1)
        clipvector[{{counter+1, counter+weight_size}}]:fill(0)
        counter = counter+weight_size
        local bias_size = layer.bias:size(1)
        learningRates[{{counter+1, counter+bias_size}}]:fill(1)
        clipvector[{{counter+1, counter+bias_size}}]:fill(0)
        counter = counter+bias_size
    elseif layer.__typename == 'nn.SpatialBatchNormalization' then
                local weight_size = layer.weight:size(1)
                local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]))
                learningRates[{{counter+1, counter+weight_size}}]:fill(1)
                clipvector[{{counter+1, counter+weight_size}}]:fill(0)
                counter = counter+weight_size
                local bias_size = layer.bias:size(1)
                learningRates[{{counter+1, counter+bias_size}}]:fill(1)
                clipvector[{{counter+1, counter+bias_size}}]:fill(0)
                counter = counter+bias_size
    elseif layer.__typename == 'SpatialBatchNormalizationShiftPow2' then
        local weight_size = layer.weight:size(1)
        local size_w=layer.weight:size();   GLR=1/torch.sqrt(1.5/(size_w[1]))
        learningRates[{{counter+1, counter+weight_size}}]:fill(1)
        clipvector[{{counter+1, counter+weight_size}}]:fill(0)
        counter = counter+weight_size
        local bias_size = layer.bias:size(1)
        learningRates[{{counter+1, counter+bias_size}}]:fill(1)
        clipvector[{{counter+1, counter+bias_size}}]:fill(0)
        counter = counter+bias_size
    elseif layer.__typename == 'cudnnBinarySpatialConvolution' then
      local size_w=layer.weight:size();
      local weight_size = size_w[1]*size_w[2]*size_w[3]*size_w[4]

      local filter_size=size_w[3]*size_w[4]
      GLR=1/torch.sqrt(1.5/(size_w[1]*filter_size+size_w[2]*filter_size))
      GLR=(math.pow(2,torch.round(math.log(GLR)/(math.log(2)))))
      learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
      clipvector[{{counter+1, counter+weight_size}}]:fill(1)
      counter = counter+weight_size
      local bias_size = layer.bias:size(1)
      learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
      clipvector[{{counter+1, counter+bias_size}}]:fill(0)
      counter = counter+bias_size
    elseif layer.__typename == 'BinarySpatialConvolution' then
        local size_w=layer.weight:size();
        local weight_size = size_w[1]*size_w[2]*size_w[3]*size_w[4]

        local filter_size=size_w[3]*size_w[4]
        GLR=1/torch.sqrt(1.5/(size_w[1]*filter_size+size_w[2]*filter_size))
        GLR=(math.pow(2,torch.round(math.log(GLR)/(math.log(2)))))
        learningRates[{{counter+1, counter+weight_size}}]:fill(GLR)
        clipvector[{{counter+1, counter+weight_size}}]:fill(1)
        counter = counter+weight_size
        local bias_size = layer.bias:size(1)
        learningRates[{{counter+1, counter+bias_size}}]:fill(GLR)
        clipvector[{{counter+1, counter+bias_size}}]:fill(0)
        counter = counter+bias_size

  end
end

print(learningRates:eq(0):sum())
print(learningRates:ne(0):sum())
print(clipvector:ne(0):sum())
print(counter)
return {
     model = model,
     lrs = learningRates,
     clipV =clipvector,
  }


================================================
FILE: Models/BinarySpatialConvolution.lua
================================================
local BinarySpatialConvolution, parent = torch.class('BinarySpatialConvolution', 'nn.SpatialConvolution')

function BinarySpatialConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
  local delayedReset = self.reset
  self.reset = function() end
  parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH)
  self.reset = delayedReset
  self.padW = padW or 0
  self.padH = padH or 0
  self.stcWeights = stcWeights or false
  self.groups = groups or 1
  assert(nInputPlane % self.groups == 0,
         'nInputPlane should be divisible by nGroups')
  assert(nOutputPlane % self.groups == 0,
         'nOutputPlane should be divisible by nGroups')
  self.weight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
  self.weightB = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
  self.weightOrg = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
  self.randmat = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
  self.maskStc = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
  self:reset()
  -- should nil for serialization, the reset will still work
  self.reset = nil
  self.iSize = torch.LongStorage(4):fill(0)


end

function BinarySpatialConvolution:reset(stdv)
  if stdv then
     stdv = stdv * math.sqrt(3)
  else
     stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane)
  end
  if nn.oldSeed then
     self.weight:apply(function()
        return torch.uniform(-1, 1)
     end)
     if self.bias then
        self.bias:apply(function()
        return torch.uniform(-stdv, stdv)
        end)
     end
  else
     self.weight:uniform(-1, 1)
     if self.bias then
        self.bias:uniform(-stdv, stdv)
     end
  end
end

function BinarySpatialConvolution:binarized(trainFlag)
  self.weightOrg:copy(self.weight)
  self.binaryFlag = true
  if not self.binaryFlag then
    self.weight:copy(self.weightOrg)
  else
    self.weightB:copy(self.weight):add(1):div(2):clamp(0,1)

    if not self.stcWeights or not trainFlag then
      self.weightB:round():mul(2):add(-1)
    else
      self.maskStc=self.weightB-self.randmat:rand(self.randmat:size())
      self.weightB:copy(self.maskStc)

    end
  end

  return  self.weightB
end

local function backCompatibility(self)
   self.finput = self.finput or self.weight.new()
   self.fgradInput = self.fgradInput or self.weight.new()
   if self.padding then
      self.padW = self.padding
      self.padH = self.padding
      self.padding = nil
   else
      self.padW = self.padW or 0
      self.padH = self.padH or 0
   end
   if self.weight:dim() == 2 then
      self.weight = self.weight:view(self.nOutputPlane, self.nInputPlane, self.kH, self.kW)
   end
   if self.gradWeight and self.gradWeight:dim() == 2 then
      self.gradWeight = self.gradWeight:view(self.nOutputPlane, self.nInputPlane, self.kH, self.kW)
   end
end

local function makeContiguous(self, input, gradOutput)
  if not input:isContiguous() then
    self._input = self._input or input.new()
    self._input:resizeAs(input):copy(input)
    input = self._input
 end
 if gradOutput then
    if not gradOutput:isContiguous() then
 self._gradOutput = self._gradOutput or gradOutput.new()
 self._gradOutput:resizeAs(gradOutput):copy(gradOutput)
 gradOutput = self._gradOutput
    end
 end
 return input, gradOutput
end

-- function to re-view the weight layout in a way that would make the MM ops happy
local function viewWeight(self)
   self.weight = self.weight:view(self.nOutputPlane, self.nInputPlane * self.kH * self.kW)
   if self.gradWeight and self.gradWeight:dim() > 0 then
      self.gradWeight = self.gradWeight:view(self.nOutputPlane, self.nInputPlane * self.kH * self.kW)
   end
end

local function unviewWeight(self)
   self.weight = self.weight:view(self.nOutputPlane, self.nInputPlane, self.kH, self.kW)
   if self.gradWeight and self.gradWeight:dim() > 0 then
      self.gradWeight = self.gradWeight:view(self.nOutputPlane, self.nInputPlane, self.kH, self.kW)
   end
end

function BinarySpatialConvolution:updateOutput(input)
   backCompatibility(self)
   viewWeight(self)
   input = makeContiguous(self, input)
   self.weightB = self:binarized(self.train)
   self.weight:copy(self.weightB)
   input.THNN.SpatialConvolutionMM_updateOutput(
      input:cdata(),
      self.output:cdata(),
      self.weight:cdata(),
      self.bias:cdata(),
      self.finput:cdata(),
      self.fgradInput:cdata(),
      self.kW, self.kH,
      self.dW, self.dH,
      self.padW, self.padH
   )
   self.weight:copy(self.weightOrg)
   unviewWeight(self)
   return self.output
end

function BinarySpatialConvolution:updateGradInput(input, gradOutput)
   if self.gradInput then
      backCompatibility(self)
      viewWeight(self)
      input, gradOutput = makeContiguous(self, input, gradOutput)
      self.weight:copy(self.weightB)
      input.THNN.SpatialConvolutionMM_updateGradInput(
         input:cdata(),
         gradOutput:cdata(),
         self.gradInput:cdata(),
         self.weight:cdata(),
         -- self.bias:cdata(), -- removed from this commit https://github.com/torch/nn/commit/651103f3aabc2dd154d6bd95ad565d14009255e6
         self.finput:cdata(),
         self.fgradInput:cdata(),
         self.kW, self.kH,
         self.dW, self.dH,
         self.padW, self.padH
      )
      self.weight:copy(self.weightOrg)
      unviewWeight(self)
      return self.gradInput
   end
end

function BinarySpatialConvolution:accGradParameters(input, gradOutput, scale)
  scale = scale or 1
  backCompatibility(self)
  input, gradOutput = makeContiguous(self, input, gradOutput)
  viewWeight(self)
  input.THNN.SpatialConvolutionMM_accGradParameters(
     input:cdata(),
     gradOutput:cdata(),
     self.gradWeight:cdata(),
     self.gradBias:cdata(),
     self.finput:cdata(),
     self.fgradInput:cdata(),
     self.kW, self.kH,
     self.dW, self.dH,
     self.padW, self.padH,
     scale
  )
  unviewWeight(self)
end

function BinarySpatialConvolution:type(type,tensorCache)
   self.finput = self.finput and torch.Tensor()
   self.fgradInput = self.fgradInput and torch.Tensor()
   return parent.type(self,type,tensorCache)
end

function BinarySpatialConvolution:__tostring__()
   return parent.__tostring__(self)
end

function BinarySpatialConvolution:clearState()
   nn.utils.clear(self, 'finput', 'fgradInput', '_input', '_gradOutput')
   return parent.clearState(self)
end


================================================
FILE: Models/SpatialBatchNormalizationShiftPow2.lua
================================================
--[[
   This file implements Shift based Batch Normalization based a variant of the vanilla BN as described in the paper:
   "Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Matthieu Courbariaux, Itay Hubara, Daniel Soudry, Ran El-Yaniv, Yoshua Bengio'

   The code is based on nn library
   --]]
local SpatialBatchNormalizationShiftPow2,parent = torch.class('SpatialBatchNormalizationShiftPow2', 'nn.Module')

function SpatialBatchNormalizationShiftPow2:__init(nFeature, runningVal, eps, momentum)
   parent.__init(self)
   assert(nFeature and type(nFeature) == 'number',
          'Missing argument #1: Number of feature planes. ' ..
          'Give 0 for no affine transform')
   self.eps = eps or 1e-5
   self.train = true
   self.momentum = momentum or 0.125
   self.runningVal = runningVal or true
   self.running_mean = torch.Tensor()
   self.running_std = torch.Tensor()
   self.running_std_ap2 = torch.Tensor()
   if nFeature > 0 then self.affine = true end

   if self.affine then
      self.weight = torch.Tensor(nFeature)
      self.weightSign = torch.Tensor(nFeature)
      self.weight_ap2 = torch.Tensor(nFeature)
      self.bias = torch.Tensor(nFeature)
      self.gradWeight = torch.Tensor(nFeature)
      self.gradBias = torch.Tensor(nFeature)
      self:reset()
   end
end

function SpatialBatchNormalizationShiftPow2:reset()
   self.weight:fill(1)
   self.bias:zero()
end

function SpatialBatchNormalizationShiftPow2:updateOutput(input)
   assert(input:dim() == 4, 'only mini-batch supported (4D tensor), got '
             .. input:dim() .. 'D tensor instead')
   local nBatch = input:size(1)
   local nFeature = input:size(2)
   local iH = input:size(3)
   local iW = input:size(4)

   -- buffers that are reused
   self.buffer = self.buffer or input.new()
   self.buffer2 = self.buffer2 or input.new()
   self.centered = self.centered or input.new()
   self.centered:resizeAs(input)
   self.centeredOrg = self.centeredOrg or input.new()
   self.centeredOrg:resizeAs(input)
   self.centeredSign = self.centeredSign or input.new()
   self.centeredSign:resizeAs(input)
   self.std = self.std or input.new()
   self.normalized = self.normalized or input.new()
   self.normalized:resizeAs(input)
   self.normalizedSign = self.normalizedSign or input.new()
   self.normalizedSign:resizeAs(input)
   self.output:resizeAs(input)
   self.gradInput:resizeAs(input)
   if self.train == false and self.runningVal == true then
      assert(self.running_mean:nDimension() ~= 0,
             'Module never run on training data. First run on some training data before evaluating.')
      self.output:copy(input)
      self.buffer:repeatTensor(self.running_mean:view(1, nFeature, 1, 1), nBatch, 1, iH, iW)
      self.output:add(-1, self.buffer)
      self.running_std_ap2:copy(torch.pow(2,torch.round(torch.log(self.running_std):div(math.log(2)))))
      self.buffer:repeatTensor(self.running_std_ap2:view(1, nFeature, 1, 1), nBatch, 1, iH, iW)
      self.output:cmul(self.buffer)
   else -- training mode
      if self.running_mean:nDimension() == 0 then
         self.running_mean:resize(nFeature):zero()
      end
      if self.running_std:nDimension() == 0 then
         self.running_std:resize(nFeature):zero()
         self.running_std_ap2:resize(nFeature):zero()
      end
      -- calculate mean over mini-batch, over feature-maps
      local in_folded = input:view(nBatch, nFeature, iH * iW)
      self.buffer:mean(in_folded, 1)
      self.buffer2:mean(self.buffer, 3)
      self.running_mean:mul(1 - self.momentum):add(self.momentum, self.buffer2) -- add to running mean
      self.buffer:repeatTensor(self.buffer2:view(1, nFeature, 1, 1),
                               nBatch, 1, iH, iW)

      -- subtract mean
      self.centered:add(input, -1, self.buffer)                  -- x - E(x)
      self.centeredOrg:copy(self.centered)
      self.centeredSign:copy(self.centered)

      self.centeredSign:sign()
      self.centered:copy(torch.pow(2,torch.round(torch.log(self.centered:abs()):div(math.log(2))))):cmul(self.centeredSign)
      -- calculate standard deviation over mini-batch

      self.buffer:copy(self.centered):cmul(self.centeredOrg) --:abs()
      -- calculate standard deviation over mini-batch

      local buf_folded = self.buffer:view(nBatch,nFeature,iH*iW)
      self.std:mean(self.buffer2:mean(buf_folded, 1), 3)
      self.std:add(self.eps):sqrt():pow(-1)      -- 1 / E([x - E(x)]^2)
      self.running_std:mul(1 - self.momentum):add(self.momentum, self.std) -- add to running stdv
      self.std:copy(torch.pow(2,torch.round(torch.log(self.std):div(math.log(2)))))


      self.buffer:repeatTensor(self.std:view(1, nFeature, 1, 1),
                               nBatch, 1, iH, iW)

      -- divide standard-deviation + eps
      self.output:cmul(self.centeredOrg, self.buffer)
      self.normalized:copy(self.output)
      self.normalizedSign:copy(self.normalized)
      self.normalizedSign:sign()
      self.normalized:copy(torch.pow(2,torch.round(torch.log(self.normalized:abs()):div(math.log(2)))):cmul(self.normalizedSign))
    --  self.normalized[self.normalized:lt(0)]=1; -- Can improve results
   end

   if self.affine then
      -- multiply with gamma and add beta
      self.weight_ap2:copy(self.weight)
      self.weightSign:copy(self.weight):sign()
      self.weight_ap2:copy(torch.pow(2,torch.round(torch.log(self.weight:clone():abs()):div(math.log(2))))):cmul(self.weightSign)
      --self.weight:fill(1) --Almost similar results
      self.buffer:repeatTensor(self.weight_ap2:view(1, nFeature, 1, 1),nBatch, 1, iH, iW)
      self.output:cmul(self.buffer)
      self.buffer:repeatTensor(self.bias:view(1, nFeature, 1, 1),
                               nBatch, 1, iH, iW)
      self.output:add(self.buffer)
   end

   return self.output
end

function SpatialBatchNormalizationShiftPow2:updateGradInput(input, gradOutput)
   assert(input:dim() == 4, 'only mini-batch supported')
   assert(gradOutput:dim() == 4, 'only mini-batch supported')
   assert(self.train == true, 'should be in training mode when self.train is true')
   local nBatch = input:size(1)
   local nFeature = input:size(2)
   local iH = input:size(3)
   local iW = input:size(4)

   self.gradInput:cmul(self.centered, gradOutput)
   local gi_folded = self.gradInput:view(nBatch, nFeature, iH * iW)
   self.buffer2:mean(self.buffer:mean(gi_folded, 1), 3)
   self.gradInput:repeatTensor(self.buffer2:view(1, nFeature, 1, 1),
                               nBatch, 1, iH, iW)
   self.gradInput:cmul(self.centered):mul(-1)
   self.buffer:repeatTensor(self.std:view(1, nFeature, 1, 1),
                            nBatch, 1, iH, iW)
   self.gradInput:cmul(self.buffer):cmul(self.buffer)

   self.buffer:mean(gradOutput:view(nBatch, nFeature, iH*iW), 1)
   self.buffer2:mean(self.buffer, 3)
   self.buffer:repeatTensor(self.buffer2:view(1, nFeature, 1, 1),
                            nBatch, 1, iH, iW)
   self.gradInput:add(gradOutput):add(-1, self.buffer)
   self.buffer:repeatTensor(self.std:view(1, nFeature, 1, 1),
                            nBatch, 1, iH, iW)
   self.gradInput:cmul(self.buffer)

   if self.affine then
      self.buffer:repeatTensor(self.weight_ap2:view(1, nFeature, 1, 1),
                               nBatch, 1, iH, iW)
      self.gradInput:cmul(self.buffer)
   end

   return self.gradInput
end

function SpatialBatchNormalizationShiftPow2:accGradParameters(input, gradOutput, scale)
   if self.affine then
      scale = scale or 1.0
      local nBatch = input:size(1)
      local nFeature = input:size(2)
      local iH = input:size(3)
      local iW = input:size(4)
      self.buffer2:resizeAs(self.normalized):copy(self.normalized)
      self.buffer2 = self.buffer2:cmul(gradOutput):view(nBatch, nFeature, iH*iW)
      self.buffer:sum(self.buffer2, 1) -- sum over mini-batch
      self.buffer2:sum(self.buffer, 3) -- sum over pixels
      self.gradWeight:add(scale, self.buffer2)

      self.buffer:sum(gradOutput:view(nBatch, nFeature, iH*iW), 1)
      self.buffer2:sum(self.buffer, 3)
      self.gradBias:add(scale, self.buffer2) -- sum over mini-batch
   end
end


================================================
FILE: Models/cudnnBinarySpatialConvolution.lua
================================================
local cudnnBinarySpatialConvolution, parent =
    torch.class('cudnnBinarySpatialConvolution', 'cudnn.SpatialConvolution')
local ffi = require 'ffi'
local errcheck = cudnn.errcheck

local autotunerCache = {}
autotunerCache[1] = {} -- forward
autotunerCache[2] = {} -- backwardFilter
autotunerCache[3] = {} -- backwardData

function cudnnBinarySpatialConvolution:__init(nInputPlane, nOutputPlane,
                            kW, kH, dW, dH, padW, padH,stcWeights, groups)
    local delayedReset = self.reset
    self.reset = function() end
    parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH)
    self.reset = delayedReset
    self.padW = padW or 0
    self.padH = padH or 0
    self.groups = groups or 1
    self.stcWeights = stcWeights or false
    assert(nInputPlane % self.groups == 0,
           'nInputPlane should be divisible by nGroups')
    assert(nOutputPlane % self.groups == 0,
           'nOutputPlane should be divisible by nGroups')
    self.weight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kH, kW)
    self.weightB = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
    self.weightOrg = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
    self.randmat = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
    self.maskStc = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
    self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kH, kW)
    self:reset()
    -- should nil for serialization, the reset will still work
    self.reset = nil
end

function cudnnBinarySpatialConvolution:binarized(trainFlag)
  self.weightOrg:copy(self.weight)
  self.binaryFlag = true
  if not self.binaryFlag then
    self.weight:copy(self.weightOrg)
  else
    self.weightB:copy(self.weight):add(1):div(2):clamp(0,1)

    if not self.stcWeights or not trainFlag then
      self.weightB:round():mul(2):add(-1)
      --print(self.weightB)
    else
      self.maskStc=self.weightB-self.randmat:rand(self.randmat:size())
      self.weightB:copy(self.maskStc)

    end
  end

  return  self.weightB
end

-- if you change the configuration of the module manually, call this
function cudnnBinarySpatialConvolution:resetWeightDescriptors()
    assert(torch.typename(self.weight) == 'torch.CudaTensor',
           'Only Cuda supported duh!')
    assert(torch.typename(self.bias) == 'torch.CudaTensor' or not self.bias,
           'Only Cuda supported duh!')
    -- for compatibility
    self.groups = self.groups or 1
    -- create filterDescriptor for weight
    self.weightDesc = ffi.new('struct cudnnFilterStruct*[1]')
    errcheck('cudnnCreateFilterDescriptor', self.weightDesc)
    local desc = torch.IntTensor({self.nOutputPlane/self.groups,
                              self.nInputPlane/self.groups,
                              self.kH, self.kW})
    errcheck('cudnnSetFilterNdDescriptor', self.weightDesc[0],
             'CUDNN_DATA_FLOAT', 'CUDNN_TENSOR_NCHW', 4,
             desc:data());
    local function destroyWDesc(d)
        errcheck('cudnnDestroyFilterDescriptor', d[0]);
    end
    ffi.gc(self.weightDesc, destroyWDesc)

    -- create descriptor for bias
    if self.bias then
        self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane,1,1))
    end
end

function cudnnBinarySpatialConvolution:fastest(mode)
    if mode == nil then mode = true end
    self.fastest_mode = mode
    self.iSize = self.iSize or torch.LongStorage(4)
    self.iSize:fill(0)
    return self
end

function cudnnBinarySpatialConvolution:setMode(fmode, bdmode, bwmode)
    if fmode ~= nil then
        self.fmode = fmode
    end
    if bdmode ~= nil then
        self.bdmode = bdmode
    end
    if bwmode ~= nil then
        self.bwmode = bwmode
    end
    self.iSize = self.iSize or torch.LongStorage(4)
    self.iSize:fill(0)
    return self
end

function cudnnBinarySpatialConvolution:resetMode()
    self.fmode = nil
    self.bdmode = nil
    self.bwmode = nil
    return self
end

function cudnnBinarySpatialConvolution:noBias()
   self.bias = nil
   self.gradBias = nil
   return self
end

function cudnnBinarySpatialConvolution:createIODescriptors(input)
    parent.createIODescriptors(self,input)
end

local one = torch.FloatTensor({1});
local zero = torch.FloatTensor({0});

local function makeContiguous(self, input, gradOutput)
   if not input:isContiguous() then
      self._input = self._input or input.new()
      self._input:typeAs(input):resizeAs(input):copy(input)
      input = self._input
   end
   if gradOutput and not gradOutput:isContiguous() then
      self._gradOutput = self._gradOutput or gradOutput.new()
      self._gradOutput:typeAs(gradOutput):resizeAs(gradOutput):copy(gradOutput)
      gradOutput = self._gradOutput
   end
   return input, gradOutput
end

function cudnnBinarySpatialConvolution:updateOutput(input)
    self.weightOrg:copy(self.weight)
    self.weightB = self:binarized(self.train)
    self.weight:copy(self.weightB)
    parent.updateOutput(self,input)
    self.weight:copy(self.weightOrg)
    return self.output
end

function cudnnBinarySpatialConvolution:updateGradInput(input, gradOutput)
    if not self.gradInput then return end
    self.weight:copy(self.weightB)
    parent.updateGradInput(self, input, gradOutput:contiguous(), scale)
    self.weight:copy(self.weightOrg)
    return self.gradInput
end

function cudnnBinarySpatialConvolution:accGradParameters(input, gradOutput, scale)
    parent.accGradParameters(self, input, gradOutput:contiguous(), scale)
end

function cudnnBinarySpatialConvolution:clearDesc()
    self.weightDesc = nil
    self.biasDesc = nil
    self.convDesc = nil
    self.iDesc = nil
    self.oDesc = nil
    self.oDescForBias = nil
    self.algType = nil
    self.fwdAlgType = nil
    self.bwdDataAlgType = nil
    self.bwdFilterAlgType = nil
    self.extraBuffer = nil
    self.extraBufferSizeInBytes = nil
    self.scaleT = nil
end

function cudnnBinarySpatialConvolution:write(f)
    self:clearDesc()
    local var = {}
    for k,v in pairs(self) do
        var[k] = v
    end
    f:writeObject(var)
end

function cudnnBinarySpatialConvolution:clearState()
   self:clearDesc()
   return nn.Module.clearState(self)
end


================================================
FILE: README.md
================================================
Deep Networks on classification tasks using Torch
=================================================
This is a complete training example for BinaryNets using Binary-Backpropagation algorithm as explained in
"Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Matthieu Courbariaux, Itay Hubara, Daniel Soudry, Ran El-Yaniv, Yoshua Bengio'
on following datasets: Cifar10/100, SVHN, MNIST

## Data
We use dp library to extract all the data please view installation section

## Dependencies
* Torch (http://torch.ch)
* "DataProvider.torch" (https://github.com/eladhoffer/DataProvider.torch) for DataProvider class.
* "cudnn.torch" (https://github.com/soumith/cudnn.torch) for faster training. Can be avoided by changing "cudnn" to "nn" in models.
* "dp" (https://github.com/nicholas-leonard/dp.git) for data extraction
* "unsup" (https://github.com/koraykv/unsup.git) for data pre-processing

To install all dependencies (assuming torch is installed) use:
```bash
luarocks install https://raw.githubusercontent.com/eladhoffer/DataProvider.torch/master/dataprovider-scm-1.rockspec
luarocks install cudnn
luarocks install dp
luarocks install unsup
```

## Training
Create pre-processing folder:
```lua
cd BinaryNet
mkdir PreProcData
```

Start training using:
```lua
th Main_BinaryNet_Cifar10.lua -network BinaryNet_Cifar10_Model
```

or,

```lua
th Main_BinaryNet_MNIST.lua -network BinaryNet_MNIST_Model
```

## Run with Docker
The Docker is built from `nvidia/cuda:8.0-cudnn5-devel` with Torch commit `0219027e6c4644a0ba5c5bf137c989a0a8c9e01b`

- To build image, run: `docker build -t binarynet:torch-gpu-cuda-8.0 -f Dockerfile/binarynet-torch-gpu-cuda-8.0 .` or to pull docker image: `docker pull hychiang/binarynet:torch-gpu-cuda-8.0`

- To launch image with gpu, run: `docker run -it --gpus all binarynet:torch-gpu-cuda-8.0`

- To train BNN with Cifar10: `th Main_BinaryNet_Cifar10.lua -network BinaryNet_Cifar10_Model`


## Additional flags
|Flag             | Default Value        |Description
|:----------------|:--------------------:|:----------------------------------------------
|modelsFolder     |  ./Models/           | Models Folder
|network          |  Model.lua           | Model file - must return valid network.
|LR               |  0.1                 | learning rate
|LRDecay          |  0                   | learning rate decay (in # samples
|weightDecay      |  1e-4                | L2 penalty on the weights
|momentum         |  0.9                 | momentum
|batchSize        |  128                 | batch size
|stcNeurons       |  true                | using stochastic binarization for the neurons or not
|stcWeights       |  false               | using stochastic binarization for the weights or not
|optimization     |  adam                | optimization method
|SBN              |  true                | use shift based batch-normalization or not
|runningVal       |  true                | use running mean and std or not
|epoch            |  -1                  | number of epochs to train (-1 for unbounded)
|threads          |  8                   | number of threads
|type             |  cuda                | float or cuda
|devid            |  1                   | device ID (if using CUDA)
|load             |  none                |  load existing net weights
|save             |  time-identifier     | save directory
|dataset          |  Cifar10             | Dataset - Cifar10, Cifar100, STL10, SVHN, MNIST
|dp_prepro        |  false               | preprocessing using dp lib
|whiten           |  false               | whiten data
|augment          |  false               | Augment training data
|preProcDir       |  ./PreProcData/      | Data for pre-processing (means,Pinv,P)


================================================
FILE: SqrHingeEmbeddingCriterion.lua
================================================
--[[
This Function implement the squared hinge loss criterion
]]
local SqrtHingeEmbeddingCriterion, parent = torch.class('SqrtHingeEmbeddingCriterion', 'nn.Criterion')

function SqrtHingeEmbeddingCriterion:__init(margin)
   parent.__init(self)
   self.margin = margin or 1
   self.sizeAverage = true
end

function SqrtHingeEmbeddingCriterion:updateOutput(input,y)
   self.buffer = self.buffer or input.new()
   if not torch.isTensor(y) then
      self.ty = self.ty or input.new():resize(1)
      self.ty[1]=y
      y=self.ty
   end

   self.buffer:resizeAs(input):copy(input)
   self.buffer:cmul(y):mul(-1):add(self.margin)
   self.buffer[torch.le(self.buffer ,0)]=0
   self.output=self.buffer:clone():pow(2):sum()

   if (self.sizeAverage == nil or self.sizeAverage == true) then
      self.output = self.output / input:nElement()
   end

   return self.output
end

function SqrtHingeEmbeddingCriterion:updateGradInput(input, y)
   if not torch.isTensor(y) then self.ty[1]=y; y=self.ty end
   self.gradInput:resizeAs(input):copy(y):mul(-2):cmul(self.buffer)
   self.gradInput[torch.cmul(y,input):gt(self.margin)] = 0
   if (self.sizeAverage == nil or self.sizeAverage == true) then
      self.gradInput:mul(1 / input:nElement())
   end
   return self.gradInput
end


================================================
FILE: adaMax_binary_clip_shift.lua
================================================
--[[ An implementation of Shift based AdaMax based on  http://arxiv.org/pdf/1412.6980.pdf as described the paper:
   "Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Matthieu Courbariaux, Itay Hubara, Daniel Soudry, Ran El-Yaniv, Yoshua Bengio'

Note that this function perform the weight cliping as well

ARGS:

- 'opfunc' : a function that takes a single input (X), the point
             of a evaluation, and returns f(X) and df/dX
- 'x'      : the initial point
- 'config` : a table with configuration parameters for the optimizer
- 'config.learningRate'      : learning rate
- 'config.beta1'             : first moment coefficient
- 'config.beta2'             : second moment coefficient
- 'config.epsilon'           : for numerical stability
- 'state'                    : a table describing the state of the optimizer; after each
                              call the state is modified

RETURN:
- `x`     : the new x vector
- `f(x)`  : the function, evaluated before the update

]]

function adaMax_binary_clip_shift(opfunc, x, config, state)
    -- (0) get/update state
    local config = config or {}
    local state = state or config
    local lr = config.learningRate or 0.002
    local GLRvec = config.GLRvec or 1
    local clipV = config.clipV or 0

    local beta1 = config.beta1 or 0.9
    local beta2 = config.beta2 or 0.999
    local epsilon = config.epsilon or 2^-27

    -- (1) evaluate f(x) and df/dx
    local fx, dfdx = opfunc(x)
    -- Initialization
    state.t = state.t or 0
    -- Exponential moving average of gradient values
    state.m = state.m or x.new(dfdx:size()):zero()
    -- Exponential moving average of squared gradient values
    state.v = state.v or x.new(dfdx:size()):zero()
    -- A tmp tensor to hold the sqrt(v) + epsilon
    state.denom = state.denom or x.new(dfdx:size()):zero()

    state.t = state.t + 1

    -- Decay the first and second moment running average coefficient
    state.m:mul(beta1):add(1-beta1, dfdx)
    state.v:copy( torch.cmax(state.v:mul(beta2),dfdx:abs()) )
    local biasCorrection1 = 1 - beta1^state.t

    local stepSize = lr/biasCorrection1 --math.sqrt(biasCorrection2)/biasCorrection1

    stepSize=math.pow(2,torch.round(math.log(stepSize)/(math.log(2))))
    -- (2) update x
    local tmp=torch.zeros(x:size())
    if opt.type == 'cuda' then
      tmp=tmp:cuda()
    end


    state.v:copy(torch.pow(2,torch.round(torch.log(state.v):div(math.log(2)))))
    state.v:add(epsilon)
    tmp:addcdiv(1, state.m, state.v)
    -- Multiply by Glorot learning rate vector
    x:addcmul(-stepSize, tmp, GLRvec)
    -- Clip to [-1,1]
    x[clipV:eq(1)]=x[clipV:eq(1)]:clamp(-1,1)
    -- return x*, f(x) before optimization
    return x, {fx}
end


================================================
FILE: adam_binary_clip_b.lua
================================================
--[[ An implementation of Adam http://arxiv.org/pdf/1412.6980.pdf

Note that this function perform the weight cliping as well

ARGS:

- 'opfunc' : a function that takes a single input (X), the point
             of a evaluation, and returns f(X) and df/dX
- 'x'      : the initial point
- 'config` : a table with configuration parameters for the optimizer
- 'config.learningRate'      : learning rate
- 'config.beta1'             : first moment coefficient
- 'config.beta2'             : second moment coefficient
- 'config.epsilon'           : for numerical stability
- 'state'                    : a table describing the state of the optimizer; after each
                              call the state is modified

RETURN:
- `x`     : the new x vector
- `f(x)`  : the function, evaluated before the update

]]

function adam_binary_clip_b(opfunc, x, config, state)
    -- (0) get/update state
    local config = config or {}
    local state = state or config
    local lr = config.learningRate or 0.001
    local GLRvec = config.GLRvec or 1

    local beta1 = config.beta1 or 0.9
    local beta2 = config.beta2 or 0.999
    local epsilon = config.epsilon or 1e-8

    -- (1) evaluate f(x) and df/dx
    local fx, dfdx = opfunc(x)
    --print(lr,dfdx:size())
    -- Initialization
    state.t = state.t or 0
    -- Exponential moving average of gradient values
    state.m = state.m or x.new(dfdx:size()):zero()
    -- Exponential moving average of squared gradient values
    state.v = state.v or x.new(dfdx:size()):zero()
    -- A tmp tensor to hold the sqrt(v) + epsilon
    state.denom = state.denom or x.new(dfdx:size()):zero()

    state.t = state.t + 1

    -- Decay the first and second moment running average coefficient
    state.m:mul(beta1):add(1-beta1, dfdx)
    state.v:mul(beta2):addcmul(1-beta2, dfdx, dfdx)

    state.denom:copy(state.v):sqrt():add(epsilon)

    local biasCorrection1 = 1 - beta1^state.t
    local biasCorrection2 = 1 - beta2^state.t
    local stepSize = lr * math.sqrt(biasCorrection2)/biasCorrection1
    -- (2) update x
    local tmp=torch.zeros(x:size())
    if opt.type == 'cuda' then
      tmp=tmp:cuda()
    end

    tmp:addcdiv(1, state.m, state.denom)
    x:addcmul(-stepSize, tmp, GLRvec)
    x[clipV:eq(1)]=x[clipV:eq(1)]:clamp(-1,1)

    return x, {fx}
end
Download .txt
gitextract_kj09dah_/

├── Data.lua
├── Dockerfile/
│   └── binarynet-torch-gpu-cuda-8.0
├── Main_BinaryNet_Cifar10.lua
├── Main_BinaryNet_MNIST.lua
├── Main_BinaryNet_SVHN.lua
├── Models/
│   ├── BatchNormalizationShiftPow2.lua
│   ├── BinarizedNeurons.lua
│   ├── BinaryLinear.lua
│   ├── BinaryNet_Cifar10_Model.lua
│   ├── BinaryNet_MNIST_Model.lua
│   ├── BinaryNet_SVHN_Model.lua
│   ├── BinarySpatialConvolution.lua
│   ├── SpatialBatchNormalizationShiftPow2.lua
│   └── cudnnBinarySpatialConvolution.lua
├── README.md
├── SqrHingeEmbeddingCriterion.lua
├── adaMax_binary_clip_shift.lua
└── adam_binary_clip_b.lua
Condensed preview — 18 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (105K chars).
[
  {
    "path": "Data.lua",
    "chars": 9001,
    "preview": "--[[\nThis code create the training test and validation datasets and preform diffrent kinds of preprocessing\nThis code is"
  },
  {
    "path": "Dockerfile/binarynet-torch-gpu-cuda-8.0",
    "chars": 1173,
    "preview": "FROM nvidia/cuda:8.0-cudnn5-devel\nWORKDIR /workspace\n\n# Install dependencies\nRUN apt-get update \\\n && apt-get install -y"
  },
  {
    "path": "Main_BinaryNet_Cifar10.lua",
    "chars": 10343,
    "preview": "require 'torch'\nrequire 'xlua'\nrequire 'optim'\nrequire 'gnuplot'\nrequire 'pl'\nrequire 'trepl'\nrequire 'adaMax_binary_cli"
  },
  {
    "path": "Main_BinaryNet_MNIST.lua",
    "chars": 10322,
    "preview": "require 'torch'\nrequire 'xlua'\nrequire 'optim'\nrequire 'gnuplot'\nrequire 'pl'\nrequire 'trepl'\nrequire 'adaMax_binary_cli"
  },
  {
    "path": "Main_BinaryNet_SVHN.lua",
    "chars": 10360,
    "preview": "require 'torch'\nrequire 'xlua'\nrequire 'optim'\nrequire 'gnuplot'\nrequire 'pl'\nrequire 'trepl'\nrequire 'adaMax_binary_cli"
  },
  {
    "path": "Models/BatchNormalizationShiftPow2.lua",
    "chars": 6454,
    "preview": "--[[\n   This file implements Shift based Batch Normalization based a variant of the vanilla BN as described in the paper"
  },
  {
    "path": "Models/BinarizedNeurons.lua",
    "chars": 858,
    "preview": "local BinarizedNeurons,parent = torch.class('BinarizedNeurons', 'nn.Module')\n\n\nfunction BinarizedNeurons:__init(stcFlag)"
  },
  {
    "path": "Models/BinaryLinear.lua",
    "chars": 2700,
    "preview": "--require 'randomkit'\n\nlocal BinaryLinear, parent = torch.class('BinaryLinear', 'nn.Linear')\n\nfunction BinaryLinear:__in"
  },
  {
    "path": "Models/BinaryNet_Cifar10_Model.lua",
    "chars": 7655,
    "preview": "--[[This code specify the model for CIFAR 10 dataset. This model uses the Shift based batch-normalization algorithm.\nIn "
  },
  {
    "path": "Models/BinaryNet_MNIST_Model.lua",
    "chars": 3339,
    "preview": "--[[This code specify the model for MNIST dataset. This model uses the Shift based batch-normalization algorithm.\nIn thi"
  },
  {
    "path": "Models/BinaryNet_SVHN_Model.lua",
    "chars": 8202,
    "preview": "--[[This code specify the model for SVHN dataset. This model uses the Shift based batch-normalization algorithm.\nIn this"
  },
  {
    "path": "Models/BinarySpatialConvolution.lua",
    "chars": 6381,
    "preview": "local BinarySpatialConvolution, parent = torch.class('BinarySpatialConvolution', 'nn.SpatialConvolution')\n\nfunction Bina"
  },
  {
    "path": "Models/SpatialBatchNormalizationShiftPow2.lua",
    "chars": 8179,
    "preview": "--[[\n   This file implements Shift based Batch Normalization based a variant of the vanilla BN as described in the paper"
  },
  {
    "path": "Models/cudnnBinarySpatialConvolution.lua",
    "chars": 6199,
    "preview": "local cudnnBinarySpatialConvolution, parent =\n    torch.class('cudnnBinarySpatialConvolution', 'cudnn.SpatialConvolution"
  },
  {
    "path": "README.md",
    "chars": 3776,
    "preview": "Deep Networks on classification tasks using Torch\n=================================================\nThis is a complete t"
  },
  {
    "path": "SqrHingeEmbeddingCriterion.lua",
    "chars": 1266,
    "preview": "--[[\nThis Function implement the squared hinge loss criterion\n]]\nlocal SqrtHingeEmbeddingCriterion, parent = torch.class"
  },
  {
    "path": "adaMax_binary_clip_shift.lua",
    "chars": 2774,
    "preview": "--[[ An implementation of Shift based AdaMax based on  http://arxiv.org/pdf/1412.6980.pdf as described the paper:\n   \"Bi"
  },
  {
    "path": "adam_binary_clip_b.lua",
    "chars": 2303,
    "preview": "--[[ An implementation of Adam http://arxiv.org/pdf/1412.6980.pdf\n\nNote that this function perform the weight cliping as"
  }
]

About this extraction

This page contains the full source code of the itayhubara/BinaryNet GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 18 files (98.9 KB), approximately 27.4k tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!