Repository: 1adrianb/face-alignment-training
Branch: master
Commit: 1e3127d9a407
Files: 13
Total size: 32.8 KB

Directory structure:
gitextract_3ng_0kzy/

├── .gitignore
├── LICENSE
├── README.md
├── checkpoints.lua
├── dataloader.lua
├── dataset-images.lua
├── dataset-init.lua
├── main.lua
├── models/
│   ├── fan.lua
│   └── init.lua
├── opts.lua
├── train.lua
└── utils.lua

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Compiled Lua sources
luac.out

# luarocks build files
*.src.rock
*.zip
*.tar.gz

# Object files
*.o
*.os
*.ko
*.obj
*.elf

# Precompiled Headers
*.gch
*.pch

# Libraries
*.lib
*.a
*.la
*.lo
*.def
*.exp

# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib

# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex


================================================
FILE: LICENSE
================================================
This pipeline is build around the ImageNet training code avaialable at <https://github.com/facebook/fb.resnet.torch> and HourGlass(HG) code available at https://github.com/anewell/pose-hg-train
Copyright (c) 2016, Facebook, Inc. 
Copyright (c) 2016, University of Michigan

For the rest of the code and models:
Copyright (c) 2017, University of Nottingham
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1.Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.

2.Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

3.Neither the name of the paper nor the names of its
  contributors may be used to endorse or promote products derived from
  this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: README.md
================================================
# How far are we from solving the 2D \& 3D Face Alignment problem? (and a dataset of 230,000 3D facial landmarks)

This is the training code for 2D-FAN and 3D-FAN decribed in "How far are we from solving the 2D \& 3D Face Alignment problem? (and a dataset of 230,000 3D facial landmarks)" paper. Please visit [our](https://www.adrianbulat.com) webpage or read bellow for instructions on how to run the code.

Pretrained models are available on our page.

**Demo code: <https://www.github.com/1adrianb/2D-and-3D-face-alignment>**

Note: If you are interested in a binarized version, capable of running on devices with limited resources please also check <https://github.com/1adrianb/binary-face-alignment> for a demo.

## Requirments

- Install the latest [Torch7](http://torch.ch/docs/getting-started.html) version (for Windows, please follow the instructions available [here](https://github.com/torch/distro/blob/master/win-files/README.md))

### Packages

- [cutorch](https://github.com/torch/cutorch)
- [nn](https://github.com/torch/nn)
- [nngraph](https://github.com/torch/nngraph)
- [cudnn](https://github.com/soumith/cudnn.torch)
- [xlua](https://github.com/torch/xlua)
- [image](https://github.com/torch/image)
- [paths](https://github.com/torch/paths)
- [matio](https://github.com/soumith/matio-ffi.torch)

## Setup

1. Clone the github repository and install all the dependencies mentiones above.

```bash

git  clone https://github.com/1adrianb/face-alignment-training
cd face-alignment-training
```

2. Download the 300W-LP dataset from the authors webpage. In order to train on your own data the dataloader.lua file needs to be adapted.

3. Download the 300W-LP annotations converted to t7 format from [here](https://www.adrianbulat.com/downloads/FaceAlignment/landmarks.zip), extract it and move the ```landmarks``` folder to the root of the 300W-LP dataset.

## Usage

In order to run the demo please download the required models available bellow and the associated data.

```bash
th main.lua -data path_to_300W_LP_dataset
```

In order to see all the available options please run:

```bash
th main.lua --help
```

## Citation

```
@inproceedings{bulat2017far,
  title={How far are we from solving the 2D \& 3D Face Alignment problem? (and a dataset of 230,000 3D facial landmarks)},
  author={Bulat, Adrian and Tzimiropoulos, Georgios},
  booktitle={International Conference on Computer Vision},
  year={2017}
}
```

## Acknowledgements

This pipeline is build around the ImageNet training code avaialable at <https://github.com/facebook/fb.resnet.torch> and HourGlass(HG) code available at https://github.com/anewell/pose-hg-train


================================================
FILE: checkpoints.lua
================================================
local checkpoint = {}

function checkpoint.latest(opt)
   if opt.resume == 'none' then
      return nil
   end

   local latestPath = paths.concat(opt.resume, 'latest.t7')
   if not paths.filep(latestPath) then
      return nil
   end

   print('=> Loading checkpoint ' .. latestPath)
   local latest = torch.load(latestPath)
   local optimState = torch.load(paths.concat(opt.resume, latest.optimFile))
   return latest, optimState
end

function checkpoint.save(epoch, model, optimState)
   -- Don't save the DataParallelTable for easier loading on other machines
   if torch.type(model) == 'nn.DataParallelTable' then
      model = model:get(1)
   end

   local modelFile = 'model_' .. epoch .. '.t7'
   local optimFile = 'optimState_' .. epoch .. '.t7'

   torch.save(modelFile, model)
   torch.save(optimFile, optimState)
   torch.save('latest.t7', {
      epoch = epoch,
      modelFile = modelFile,
      optimFile = optimFile,
   })
end

return checkpoint


================================================
FILE: dataloader.lua
================================================
local datasets = require 'dataset-init'
local Threads = require 'threads'
Threads.serialization('threads.sharedserialize')

local M = {}
local DataLoader = torch.class('DataLoader', M)

function DataLoader.getDataFaces(opt, split)
    print('=> Building dataset...')
    base_dir = opt.data..'landmarks/'
    dirs = paths.dir(base_dir)
    lines = {}

    for i=1,#dirs do
        if string.sub(dirs[i],1,1) ~= '.' then
            for f in paths.files(base_dir..dirs[i],'.mat') do
                if not string.find(f, "test") then              
                        lines[#lines+1] = f
                end
            end
        end
    end
    print('=> Dataset built. '..#lines..' images were found.')
    return lines
end

function DataLoader.create(opt,split)
    local _dataset = nil
    local dataAnnot = DataLoader.getDataFaces(opt,split) 

    return M.DataLoader(_dataset,opt,split,dataAnnot)
end

function DataLoader:__init(_dataset, opt, split, dataAnnot)
    local manualSeed = opt.manualSeed

    local function init()
        local datasets = require 'dataset-init'

        trainLoader, valLoader = datasets.create(opt,split,dataAnnot)
    end

    local function main(idx)
        if manualSeed ~= 0 then
            torch.manualSeed(manualSeed + idx)
        end
        torch.setnumthreads(1)
        _G.dataset = trainLoader
        return trainLoader:size()
    end

    local threads, sizes = Threads(opt.nThreads,init, main)
    self.nCrops = 1
    self.threads = threads
    self.__size = sizes[1][1]
    self.batchSize = math.floor(opt.batchSize / self.nCrops)
    self.opt = opt
    self.split = split
    self.dataAnnot = dataAnnot
end

function DataLoader:size()
        return math.ceil(self.__size/self.batchSize)
end

function DataLoader:annot()
        trainLoader = datasets.create(self.opt,self.split,self.dataAnnot)
        return  trainLoader.annot
end

function DataLoader:run()
    local threads = self.threads
    local size, batchSize = self.__size, self.batchSize
    local perm = torch.randperm(size)

    local idx, sample = 1, nil
    local function enqueue()
        while idx <= size and threads:acceptsjob() do
            local indices = perm:narrow(1,idx,math.min(batchSize,size-idx+1))
            threads:addjob(
                function(indices,nCrops)
                    local sz = indices:size(1)
                    local batch, imageSize
                    local target
                    local indicesCopy = indices
                    for i,idx in ipairs(indices:totable()) do
                        local sample, label = _G.dataset:get(false,idx)
                        local input, label = _G.dataset:preprocess(sample, label)
                        if not batch then
                            imageSize = input:size():totable()
                            if nCrops > 1 then table.remove(imageSize,1) end
                                batch = torch.FloatTensor(sz,nCrops, table.unpack(imageSize))
                            end
                            if not target then
                                targetSize = label:size():totable()
                                target = torch.FloatTensor(sz,nCrops, table.unpack(targetSize))
                            end
                            batch[i]:copy(input)
                            target[i]:copy(label)
                        end
                        collectgarbage()
                        return {
                            input = batch:view(sz*nCrops,table.unpack(imageSize)),
                            label = target:view(sz*nCrops,table.unpack(targetSize)),
                            indx = indicesCopy ,
                        }
                        end,
                        function(_sample_)
                            sample = _sample_
                            end,
                            indices,
                            self.nCrops
                        )
                        idx = idx + batchSize
                end
        end
    local n = 0
    local function loop()
        enqueue()
        if not threads:hasjob() then
            return nil
        end
        threads:dojob()
        if threads:haserror() then
            threads:synchronize()
        end
        enqueue()
        n = n+1
        return n, sample
    end

    return loop
end
return M.DataLoader


================================================
FILE: dataset-images.lua
================================================
local image = require('image')
require 'utils'

local M = {}
local DatasetImages = torch.class('DatasetImages', M)

function DatasetImages:__init( opt, split, annot )
    self.total = #annot
    self.nParts = 68
    self.annot = annot
    self.opt = opt
    self.typeOfData = split
end

function DatasetImages:generateSampleFace(idx)
    local main_pts = torch.load(self.opt.data..'landmarks/'..self.annot[idx]:split('_')[1]..'/'..string.sub(self.annot[idx],1,#self.annot[idx]-4)..'.t7')
    local pts = main_pts[1] --- 2:3D
    local c = torch.Tensor{450/2,450/2+50}
    local s = 1.8

    local img = image.load(self.opt.data..self.annot[idx]:split('_')[1]..'/'..string.sub(self.annot[idx],1,#self.annot[idx]-8)..'.jpg')
    local inp = crop(img, c, s, 0, 256)
    local out = torch.zeros(self.nParts, 64, 64)
    for i = 1, self.nParts do
        if pts[i][1] > 0 then -- Checks that there is a ground truth annotation
            drawGaussian(out[i], transform(torch.add(pts[i],1), c, s, 0, 64), 1)
        end
    end

    return inp,out,pts,c,s
end

function DatasetImages:get(shuffle,i)
    local inp, out, pts, c, s = self:generateSampleFace(i)
    self.pts, self.c, self.s = pts,c,s
    return inp, out
end

function DatasetImages:size()
    return self.total
end

function DatasetImages:preprocess(input, label)
    if self.typeOfData == 'train'  then
        local s = torch.randn(1):mul(self.opt.scaleFactor):add(1):clamp(1-self.opt.scaleFactor,1+self.opt.scaleFactor)[1]
        local r = torch.randn(1):mul(self.opt.rotFactor):clamp(-2*self.opt.rotFactor,2*self.opt.rotFactor)[1]

        -- Scale/rotation
        if torch.uniform() <= .6 then r = 0 end
        local inp,out = 256, 64
        local divideBy = 200

        input = crop(input, {(inp+1)/2,(inp+1)/2}, inp*s/divideBy, r, inp)
        label = crop(label, {(out+1)/2,(out+1)/2}, out*s/divideBy, r, out)

        -- Emulate row resolution
        if torch.uniform()<=.2 and false then --.35
            input = image.scale(input,96,96)
            input = image.scale(input,256,256)
        end

        -- Add jpeg artefacts
        --[[
        if torch.uniform()<=.2 and false then
            local onlyImg = input[{{1,3},{},{}}]
            onlyImg = image.compressJPG(onlyImg,30)
            onlyImg = image.decompressJPG(onlyImg)
            input[{{1,3},{},{}}] = onlyImg
        end
        ]]--

        -- Add random translation
        --[[
        wh_t = torch.Tensor(2):random(0,80)-40
        input = image.translate(input,wh_t[1],wh_t[2])
        label = image.translate(label,wh_t[1]/4.0,wh_t[2]/4.0)
        ]]--

        -- Add some gaussian blue
        --[[
        if torch.uniform()<.4 and false  then
            gauss_s = torch.Tensor(1):random(10,30):int()
            local kernel_gauss = image.gaussian(gauss_s[1])
            input = image.convolve(input, kernel_gauss, 'same')/255.0
        end
        ]]--

        local flip_ = customFlip or flip

        local shuffleLR_ = customShuffleLR or shuffleLR
        if torch.uniform() <= .5 then
            input = flip_(input)
            label = flip_(shuffleLR_(label))
        end

        -- Color augumentation
        input[{1, {}, {}}]:mul(torch.uniform(0.7, 1.3)):clamp(0, 1)
        input[{2, {}, {}}]:mul(torch.uniform(0.7, 1.3)):clamp(0, 1)
        input[{3, {}, {}}]:mul(torch.uniform(0.7, 1.3)):clamp(0, 1)
    end
    return input, label
end

return M.DatasetImages


================================================
FILE: dataset-init.lua
================================================
local M = {}

function M.create(opt, split, annot)
   local Dataset = require('dataset-images')
   return Dataset(opt, split, annot)
end

return M


================================================
FILE: main.lua
================================================
require 'torch'
require 'cutorch'
require 'paths'
require 'nn'
require 'nngraph'

local DataLoader = require 'dataloader'
local checkpoints = require 'checkpoints'
local models = require 'models/init'
local Trainer = require 'train'
local opts = require 'opts'

local opt = opts.parse(arg)

torch.setdefaulttensortype('torch.FloatTensor')
torch.setnumthreads(1)
cutorch.setDevice(1)
torch.setheaptracking(true)

torch.manualSeed(opt.manualSeed)
cutorch.manualSeed(opt.manualSeed)

--Load previous checkpoints, if it exists
local checkpoint, optimState = checkpoints.latest(opt)
local optimState = checkpoint and torch.load(checkpoint.optimFile) or nil

--Create model
local model, criterion = models.setup(opt, checkpoint, true)

print('=> Model size: ', model:getParameters():size(1))

--Data loading
local trainLoader = DataLoader.create(opt,'train')

local trainer  = Trainer(model, criterion, opt, optimState, netLogger)

local startEpoch = checkpoint and checkpoint.epoch + 1 or opt.epochNumber

for epoch = startEpoch, opt.nEpochs do
        -- Train for a single epoch
        local trainLoss, trainAcc = trainer:train(epoch, trainLoader)
        print(string.format(' *Results loss: %6.6f acc: %6.6f ',trainLoss, trainAcc))

        if opt.snapshot ~= 0 and epoch % opt.snapshot == 0 then
                checkpoints.save(epoch, model:clearState(), trainer.optimState, bestModel)
        end
end


================================================
FILE: models/fan.lua
================================================
-- Face Alignment Network
--
-- How far are we from solving the 2D \& 3D Face Alignment problem? (and a dataset of 230,000 3D facial landmarks)
-- Adrian Bulat and Georgios Tzimiropoulos
-- ICCV 2017
--

local cudnn = require 'cudnn'

-- Define some short names
local conv = cudnn.SpatialConvolution
local batchnorm = nn.SpatialBatchNormalization
local relu = cudnn.ReLU
local upsample = nn.SpatialUpSamplingNearest

-- Opts
local nModules = 1
local nFeats = 256
local nStack = 8


local function convBlock(numIn, numOut, order)
    local cnet = nn.Sequential()
        :add(batchnorm(numIn,1e-5,false))
        :add(relu(true))
        :add(conv(numIn,numOut/2,3,3,1,1,1,1):noBias())
        :add(nn.ConcatTable()
            :add(nn.Identity())
            :add(nn.Sequential()
                :add(nn.Sequential()
                    :add(batchnorm(numOut/2,1e-5,false))
                    :add(relu(true))
                    :add(conv(numOut/2,numOut/4,3,3,1,1,1,1):noBias())
                )
                :add(nn.ConcatTable()
                    :add(nn.Identity())
                    :add(nn.Sequential()
                        :add(batchnorm(numOut/4,1e-5,false))
                        :add(relu(true))
                        :add(conv(numOut/4,numOut/4,3,3,1,1,1,1):noBias())
                    )
                )
                :add(nn.JoinTable(2))
            )
        )
        :add(nn.JoinTable(2))
    return cnet
end

-- Skip layer
local function skipLayer(numIn,numOut)
    if numIn == numOut  then
        return nn.Identity()
    else
        return nn.Sequential()
            :add(batchnorm(numIn,1e-5,false))
            :add(relu(true))
            :add(conv(numIn,numOut,1,1):noBias())
    end
end

-- Residual block
local function Residual(numIn,numOut)
    return nn.Sequential()
        :add(nn.ConcatTable()
            :add(convBlock(numIn,numOut))
            :add(skipLayer(numIn,numOut)))
        :add(nn.CAddTable(true))
end

local function lin(numIn,numOut,inp)
    -- Apply 1x1 convolution, stride 1, no padding
    local l = conv(numIn,numOut,1,1,1,1,0,0)(inp)
    return relu(true)(batchnorm(numOut)(l))
end

local function hourglass(n, f)
        local model = nn.Sequential()

        local branch = nn.ConcatTable()
        local b1 = nn.Sequential()
        local b2 = nn.Sequential()

        for i = 1,nModules do b1:add(Residual(f,f)) end
        b2:add(nn.SpatialMaxPooling(2,2,2,2))

        if n>1 then
                for i = 1,nModules do b2:add(Residual(f,f)) end
        else
                for i = 1,nModules do b2:add(Residual(f,f)) end
        end

        if n>1 then
                b2:add(hourglass(n-1,f))
        else
                for i = 1,nModules do b2:add(Residual(f,f)) end
        end

        if n>1 then
                for i = 1,nModules do b2:add(Residual(f,f)) end
        else
                for i=1,nModules do b2:add(Residual(f,f)) end
        end
        b2:add(upsample(2))

        branch:add(b1):add(b2)
        model:add(branch)

        return model:add(nn.CAddTable())
end

local function hourglass(n, f, inp)
    -- Upper branch
    local up1 = inp
    for i = 1,nModules do up1 = Residual(f,f)(up1) end

    -- Lower branch
    local low1 = cudnn.SpatialMaxPooling(2,2,2,2)(inp)
    for i = 1,nModules do low1 = Residual(f,f)(low1) end
    local low2

    if n > 1 then low2 = hourglass(n-1,f,low1)
    else
        low2 = low1
        for i = 1,nModules do low2 = Residual(f,f)(low2) end
    end

    local low3 = low2
    for i = 1,nModules do low3 = Residual(f,f)(low3) end
    local up2 = nn.SpatialUpSamplingNearest(2)(low3)

    -- Bring two branches together
    return nn.CAddTable()({up1,up2})
end


function createModel(opt)
    nModules = opt.nModules
    nFeats = opt.nFeats
    nStack = opt.nStacks

    local inp = nn.Identity()()

    -- Initial processing of the image
    local cnv1_ = conv(3,64,7,7,2,2,3,3)(inp)           -- 128
    local cnv1 = relu(true)(batchnorm(64)(cnv1_))
    local r1 = Residual(64,128)(cnv1)
    local pool = nn.SpatialMaxPooling(2,2,2,2)(r1)                       -- 64
    local r4 = Residual(128,128)(pool)
    local r5 = Residual(128,nFeats)(r4)

    local out = {}
    local inter = r5

    for i = 1,nStack do
        local hg = hourglass(4,nFeats,inter)

        -- Residual layers at output resolution
        local ll = hg
        for j = 1,nModules do ll = Residual(nFeats,nFeats)(ll) end
        -- Linear layer to produce first set of predictions
        ll = lin(nFeats,nFeats,ll)

        -- Predicted heatmaps
        local tmpOut = conv(nFeats,68,1,1,1,1,0,0)(ll)
        table.insert(out,tmpOut)

        -- Add predictions back
        if i < nStack then
            local ll_ = conv(nFeats,nFeats,1,1,1,1,0,0)(ll)
            local tmpOut_ = conv(68,nFeats,1,1,1,1,0,0)(tmpOut)
            inter = nn.CAddTable()({inter, ll_, tmpOut_})
        end
    end

    -- Final model
    local model = nn.gModule({inp}, out)

    return model

end

return createModel


================================================
FILE: models/init.lua
================================================
local M = {}

function  M.setup(opt, checkpoint)
    local model 
    if checkpoint then
        local modelPath = paths.concat(opt.resume, checkpoint.modelFile)
        assert(paths.filep(modelPath), 'Saved model not found: ' .. modelPath)
        print('=> Resuming model from ' .. modelPath)
        model = torch.load(modelPath)
    elseif opt.retrain ~= 'none' then
        local modelPath = paths.concat(opt.resume, checkpoint.modelFile)
        assert(paths.filep(modelPath), 'Saved model not found: ' .. modelPath)
        print('=> Resuming model from ' .. modelPath)
        model = torch.load(modelPath)
        if preprocess == false then
            return model, nil
        end
    else
        print('=> Creating model from file: models/' .. opt.netType .. '.lua')
        model = require('models/' .. opt.netType)(opt)
    end

    if torch.type(model) == 'nn.DataParallelTable' then
        model = model:get(1)
    end

    -- Set the CUDNN flags
    if opt.cudnn == 'fastest' then
        cudnn.fastest = true
        cudnn.benchmark = true
    elseif opt.cudnn == 'deterministic' then
        -- Use a deterministic convolution implementation
        model:apply(function(m)
            if m.setMode then m:setMode(1, 1, 1) end
        end)
    end

    if opt.nGPU > 1 then
        local gpus = torch.range(1, opt.nGPU):totable()
        local fastest, benchmark = cudnn.fastest, cudnn.benchmark

        local dpt = nn.DataParallelTable(1, true, true)
            :add(model, gpus)
            :threads(function()
                local cudnn = require 'cudnn'
                require 'nngraph'
                require 'newLayers.BinActiveZ'
                cudnn.fastest, cudnn.benchmark = fastest, benchmark
            end)
        dpt.gradInput = nil

        model = dpt:cuda()
    end

    local criterion
    if opt.nStacks>1 then
        criterion = nn.ParallelCriterion()
        for i=1,opt.nStacks do
            criterion:add(nn.MSECriterion())
        end
    else
        criterion = nn.MSECriterion()
    end

    return model:cuda(), criterion:cuda()
end

return M

================================================
FILE: opts.lua
================================================
local M = { }

function M.parse(arg)
   local cmd = torch.CmdLine()
   cmd:text()
   cmd:text('2D-FAN and 3D-FAN Training script')
   cmd:text('Visit https://www.adrianbulat.com for more details')
   cmd:text()
   cmd:text('Options:')
    ------------ General options --------------------
   cmd:option('-data',       'dataset/300W-LP/',         'Path to dataset')
   cmd:option('-manualSeed', 0,          'Manually set RNG seed')
   cmd:option('-nGPU',       1,          'Number of GPUs to use by default')
   cmd:option('-backend',    'cudnn',    'Options: cudnn | cunn')
   cmd:option('-cudnn',      'fastest',  'Options: fastest | default | deterministic')
   cmd:option('-gen',        'gen',      'Path to save generated files')
   cmd:option('-snapshot',    3, 'save a snapshot every n epochs')
   ------------- Data options ------------------------
   cmd:option('-nThreads',        2, 'number of data loading threads')
   ------------- Training options --------------------
   cmd:option('-nEpochs',         100,       'Number of total epochs to run')
   cmd:option('-epochNumber',     1,       'Manual epoch number (useful on restarts)')
   cmd:option('-batchSize',       10,      'mini-batch size (1 = pure stochastic)')
   ------------- Checkpointing options ---------------
   cmd:option('-save',            'checkpoints', 'Directory in which to save checkpoints')
   cmd:option('-resume',          'none',        'Resume from the latest checkpoint in this directory')
   ---------- Optimization options ----------------------
   cmd:option('-LR',              0.00025,   'initial learning rate')
   cmd:option('-momentum',        0.0,   'momentum')
   cmd:option('-weightDecay',     0.0,  'weight decay')
   ---------- Model options ----------------------------------
   cmd:option('-netType',      'fan', 'Options: fan')
   cmd:option('-nModules',       1,       'Number of modues per level')
   cmd:option('-nStacks',         4,       'Number of stacked networks')
   cmd:option('-nFeats',         256,     'BLock width (# channels)')

   cmd:option('-retrain',      'none',   'Path to model to retrain with')
   cmd:option('-optimState',   'none',   'Path to an optimState to reload from')
   ---------- Augumentation options ----------------------------------
   cmd:option('-scaleFactor',        0.3,   'scaling factor')
   cmd:option('-rotFactor',        30,   'rotation factor (in degrees)')

   cmd:text()

   local opt = cmd:parse(arg or {})

   if not paths.dirp(opt.save) and not paths.mkdir(opt.save) then
      cmd:error('error: unable to create checkpoint directory: ' .. opt.save .. '\n')
   end

   return opt
end

return M

================================================
FILE: train.lua
================================================
require 'cunn'
local optim = require 'optim'

local lr_policy = {
    {0,50,2.5e-4},
    {50,70,1e-4},
    {70,90,5e-5},
    {90,100,1e-5},
    {100,110,5e-6}
}

local M = {}
local Trainer = torch.class('Trainer', M)

function Trainer:__init(model,criterion,opt,optimState)
        self.model = model
        self.criterion = criterion
        self.optimState = optimState or {
                learningRate = opt.LR,
                learningRateDecay = 0.0,
                momentum = opt.momentum,
                epsilon = 1e-8,
                weightDecay = opt.weightDecay,
        }

        self.opt = opt

        self.params, self.gradParams = model:getParameters()
end

function Trainer:train(epoch, dataloader)
        local avgLoss, avgAcc = 0.0, 0.0
        self.optimState.learningRate = self:learningRate(epoch)

        local timer = torch.Timer()
        local dataTimer = torch.Timer()

        local function feval()
                return self.criterion.output, self.gradParams
        end

        local trainSize = dataloader:size()
        local N = 0

        print('=> Training epoch # '..epoch)

        self.model:training()

        for n, sample in dataloader:run() do
                local dataTime = dataTimer:time().real
                self:copyInputs(sample)

                self.model:zeroGradParameters()
                local output = self.model:forward(self.input)

                local loss = self.criterion:forward(output, self.label)

                self.criterion:backward(self.model.output, self.label)

                self.model:backward(self.input,self.criterion.gradInput)

                optim.rmsprop(feval, self.params, self.optimState)
                
                avgLoss = avgLoss + loss
                N = N + 1

                print((' | Epoch: [%d][%d/%d]    Time %.3f  Data %.3f  Err %1.4f'):format(
                        epoch, n, trainSize, timer:time().real, dataTime, loss))

                -- check that the storage didn't get changed do to an unfortunate getParameters call
                assert(self.params:storage() == self.model:parameters()[1]:storage())
                collectgarbage()
                timer:reset()
                dataTimer:reset()
        end

        return avgLoss / N, avgAcc / N
end

function Trainer:learningRate(epoch)
        local decay = 0
        for i=1, #lr_policy do
                if (epoch>lr_policy[i][1]) and (lr_policy[i][2]>=epoch) then
                        print(string.format('Using lr_rate: %f',lr_policy[i][3]))
                        return lr_policy[i][3]
                end
        end
end

function Trainer:copyInputs(sample)
    -- Copies the input to a CUDA tensor, if using 1 GPU, or to pinned memory,
    -- if using DataParallelTable. The target is always copied to a CUDA tensor
    self.input = self.input or (self.opt.nGPU == 1
      and torch.CudaTensor()
      or cutorch.createCudaHostTensor())
    label = label or torch.CudaTensor()

    self.input:resize(sample.input[{{},{},{},{}}]:size()):copy(sample.input[{{},{},{},{}}])
    label:resize(sample.label:size()):copy(sample.label)

    -- Adjust the input accordingly to the network arhitecture 
    if self.opt.nStacks>1 then
        local tempLabel = {}
        for i=1,self.opt.nStacks do
            table.insert(tempLabel, label)
        end

        self.label = tempLabel
    else 
        self.label = label
    end
end

return M.Trainer


================================================
FILE: utils.lua
================================================
-------------------------------------------------------------------------------
-- Coordinate transformation
-------------------------------------------------------------------------------
function getTransform(center, scale, rot, res)
    local h = 200 * scale
    local t = torch.eye(3)

    -- Scaling
    t[1][1] = res / h
    t[2][2] = res / h

    -- Translation
    t[1][3] = res * (-center[1] / h + .5)
    t[2][3] = res * (-center[2] / h + .5)

    -- Rotation
    if rot ~= 0 then
        rot = -rot
        local r = torch.eye(3)
        local ang = rot * math.pi / 180
        local s = math.sin(ang)
        local c = math.cos(ang)
        r[1][1] = c
        r[1][2] = -s
        r[2][1] = s
        r[2][2] = c
        -- Need to make sure rotation is around center
        local t_ = torch.eye(3)
        t_[1][3] = -res/2
        t_[2][3] = -res/2
        local t_inv = torch.eye(3)
        t_inv[1][3] = res/2
        t_inv[2][3] = res/2
        t = t_inv * r * t_ * t
    end

    return t
end

function transform(pt, center, scale, rot, res, invert)
    local pt_ = torch.ones(3)
    pt_[1],pt_[2] = pt[1]-1,pt[2]-1

    local t = getTransform(center, scale, rot, res)
    if invert then
        t = torch.inverse(t)
    end
    local new_point = (t*pt_):sub(1,2):add(1e-4)

    return new_point:int():add(1)
end

function crop(img, center, scale, rot, res)
    local ul = transform({1,1}, center, scale, 0, res, true)
    local br = transform({res+1,res+1}, center, scale, 0, res, true)

    local pad = math.floor(torch.norm((ul - br):float())/2 - (br[1]-ul[1])/2)
    if rot ~= 0 then
        ul = ul - pad
        br = br + pad
    end

    local newDim,newImg,ht,wd

    if img:size():size() > 2 then
        newDim = torch.IntTensor({img:size(1), br[2] - ul[2], br[1] - ul[1]})
        newImg = torch.zeros(newDim[1],newDim[2],newDim[3])
        ht = img:size(2)
        wd = img:size(3)
    else
        newDim = torch.IntTensor({br[2] - ul[2], br[1] - ul[1]})
        newImg = torch.zeros(newDim[1],newDim[2])
        ht = img:size(1)
        wd = img:size(2)
    end

    local newX = torch.Tensor({math.max(1, -ul[1] + 2), math.min(br[1], wd+1) - ul[1]})
    local newY = torch.Tensor({math.max(1, -ul[2] + 2), math.min(br[2], ht+1) - ul[2]})
    local oldX = torch.Tensor({math.max(1, ul[1]), math.min(br[1], wd+1) - 1})
    local oldY = torch.Tensor({math.max(1, ul[2]), math.min(br[2], ht+1) - 1})

    if newDim:size(1) > 2 then
        newImg:sub(1,newDim[1],newY[1],newY[2],newX[1],newX[2]):copy(img:sub(1,newDim[1],oldY[1],oldY[2],oldX[1],oldX[2]))
    else
        newImg:sub(newY[1],newY[2],newX[1],newX[2]):copy(img:sub(oldY[1],oldY[2],oldX[1],oldX[2]))
    end

    if rot ~= 0 then
        newImg = image.rotate(newImg, rot * math.pi / 180, 'bilinear')
        if newDim:size(1) > 2 then
            newImg = newImg:sub(1,newDim[1],pad,newDim[2]-pad,pad,newDim[3]-pad)
        else
            newImg = newImg:sub(pad,newDim[1]-pad,pad,newDim[2]-pad)
        end
    end

    newImg = image.scale(newImg,res,res)
    return newImg
end

local magic_gaussian = image.gaussian(7)
function drawGaussian(img, pt, sigma)
    -- Check if the gaussian is in-bounds
    local ul = {math.floor(pt[1] - 3 * sigma), math.floor(pt[2] - 3 * sigma)}
    local br = {math.floor(pt[1] + 3 * sigma), math.floor(pt[2] + 3 * sigma)}
    -- return the image otherwise 
    if (ul[1] > img:size(2) or ul[2] > img:size(1) or br[1] < 1 or br[2] < 1) then return img end
    -- Generate gaussian
    local size = 6 * sigma + 1
    -- Avoid the need of generating the gaussian for each sample
    local g = magic_gaussian:clone()--image.gaussian(size) -- , 1 / size, 1)
    
    -- Usable gaussian range
    local g_x = {math.max(1, -ul[1]), math.min(br[1], img:size(2)) - math.max(1, ul[1]) + math.max(1, -ul[1])}
    local g_y = {math.max(1, -ul[2]), math.min(br[2], img:size(1)) - math.max(1, ul[2]) + math.max(1, -ul[2])}
    -- Image range
    local img_x = {math.max(1, ul[1]), math.min(br[1], img:size(2))}
    local img_y = {math.max(1, ul[2]), math.min(br[2], img:size(1))}
    assert(g_x[1] > 0 and g_y[1] > 0)
    img:sub(img_y[1], img_y[2], img_x[1], img_x[2]):add(g:sub(g_y[1], g_y[2], g_x[1], g_x[2]))
    img[img:gt(1)] = 1
    return img
end

function shuffleLR(x)
    local dim
    if x:nDimension() == 4 then
        dim = 2
    else
        assert(x:nDimension() == 3)
        dim = 1
    end

    -- Keypoints pairs for 300W_LP, 300VW, 300W and LS3D-W datasets
    local matchedParts = {
			{1,17},   {2,16},   {3,15},
			{4,14}, {5,13}, {6,12}, {7,11}, {8,10},
			{18,27},{19,26},{20,25},{21,24},{22,23},
			{37,46},{38,45},{39,44},{40,43},
			{42,47},{41,48},
			{32,36},{33,35},
			{51,53},{50,54},{49,55},{62,64},{61,65},{68,66},{60,56},
			{59,57}
    }

    for i = 1,#matchedParts do
        local idx1, idx2 = unpack(matchedParts[i])
        local tmp = x:narrow(dim, idx1, 1):clone()
        x:narrow(dim, idx1, 1):copy(x:narrow(dim, idx2, 1))
        x:narrow(dim, idx2, 1):copy(tmp)
    end

    return x
end

function flip(x)
    require 'image'
    local y = torch.FloatTensor(x:size())
    for i = 1, x:size(1) do
        image.hflip(y[i], x[i]:float())
    end
    return y:typeAs(x)
end