Repository: chenhsuanlin/inverse-compositional-STN
Branch: master
Commit: 4a2a8fc7b9a1
Files: 22
Total size: 86.3 KB
Directory structure:
gitextract_t1x_4nxr/
├── .editorconfig
├── .gitignore
├── LICENSE
├── MNIST-pytorch/
│ ├── data.py
│ ├── graph.py
│ ├── options.py
│ ├── train.py
│ ├── util.py
│ └── warp.py
├── MNIST-tensorflow/
│ ├── data.py
│ ├── graph.py
│ ├── options.py
│ ├── train.py
│ ├── util.py
│ └── warp.py
├── README.md
└── traffic-sign-tensorflow/
├── data.py
├── graph.py
├── options.py
├── train.py
├── util.py
└── warp.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .editorconfig
================================================
root = true
[*]
end_of_line = lf
insert_final_newline = true
indent_style = tab
indent_size = 4
trim_trailing_whitespace = true
[*.md]
trim_trailing_whitespace = false
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2018 Chen-Hsuan Lin
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: MNIST-pytorch/data.py
================================================
import numpy as np
import scipy.linalg
import os,time
import torch
import torchvision
import warp,util
# load MNIST data
def loadMNIST(opt,path):
os.makedirs(path,exist_ok=True)
trainDataset = torchvision.datasets.MNIST(path,train=True,download=True)
testDataset = torchvision.datasets.MNIST(path,train=False,download=True)
trainData,testData = {},{}
trainData["image"] = torch.tensor([np.array(sample[0])/255.0 for sample in trainDataset],dtype=torch.float32)
testData["image"] = torch.tensor([np.array(sample[0])/255.0 for sample in testDataset],dtype=torch.float32)
trainData["label"] = torch.tensor([sample[1] for sample in trainDataset])
testData["label"] = torch.tensor([sample[1] for sample in testDataset])
return trainData,testData
# generate training batch
def genPerturbations(opt):
X = np.tile(opt.canon4pts[:,0],[opt.batchSize,1])
Y = np.tile(opt.canon4pts[:,1],[opt.batchSize,1])
O = np.zeros([opt.batchSize,4],dtype=np.float32)
I = np.ones([opt.batchSize,4],dtype=np.float32)
dX = np.random.randn(opt.batchSize,4)*opt.pertScale \
+np.random.randn(opt.batchSize,1)*opt.transScale
dY = np.random.randn(opt.batchSize,4)*opt.pertScale \
+np.random.randn(opt.batchSize,1)*opt.transScale
dX,dY = dX.astype(np.float32),dY.astype(np.float32)
# fit warp parameters to generated displacements
if opt.warpType=="homography":
A = np.concatenate([np.stack([X,Y,I,O,O,O,-X*(X+dX),-Y*(X+dX)],axis=-1),
np.stack([O,O,O,X,Y,I,-X*(Y+dY),-Y*(Y+dY)],axis=-1)],axis=1)
b = np.expand_dims(np.concatenate([X+dX,Y+dY],axis=1),axis=-1)
pPert = np.matmul(np.linalg.inv(A),b).squeeze()
pPert -= np.array([1,0,0,0,1,0,0,0])
else:
if opt.warpType=="translation":
J = np.concatenate([np.stack([I,O],axis=-1),
np.stack([O,I],axis=-1)],axis=1)
if opt.warpType=="similarity":
J = np.concatenate([np.stack([X,Y,I,O],axis=-1),
np.stack([-Y,X,O,I],axis=-1)],axis=1)
if opt.warpType=="affine":
J = np.concatenate([np.stack([X,Y,I,O,O,O],axis=-1),
np.stack([O,O,O,X,Y,I],axis=-1)],axis=1)
dXY = np.expand_dims(np.concatenate([dX,dY],axis=1),axis=-1)
Jtransp = np.transpose(J,axes=[0,2,1])
pPert = np.matmul(np.linalg.inv(np.matmul(Jtransp,J)),np.matmul(Jtransp,dXY)).squeeze()
pInit = torch.from_numpy(pPert).cuda()
return pInit
# make training batch
def makeBatch(opt,data):
N = len(data["image"])
randIdx = np.random.randint(N,size=[opt.batchSize])
batch = {
"image": data["image"][randIdx].cuda(),
"label": data["label"][randIdx].cuda(),
}
return batch
# evaluation on test set
def evalTest(opt,data,geometric,classifier):
geometric.eval()
classifier.eval()
N = len(data["image"])
batchN = int(np.ceil(N/opt.batchSize))
warped = [{},{}]
count = 0
for b in range(batchN):
# use some dummy data (0) as batch filler if necessary
if b!=batchN-1:
realIdx = np.arange(opt.batchSize*b,opt.batchSize*(b+1))
else:
realIdx = np.arange(opt.batchSize*b,N)
idx = np.zeros([opt.batchSize],dtype=int)
idx[:len(realIdx)] = realIdx
# make training batch
image = data["image"][idx].cuda()
label = data["label"][idx].cuda()
image.data.unsqueeze_(dim=1)
# generate perturbation
pInit = genPerturbations(opt)
pInitMtrx = warp.vec2mtrx(opt,pInit)
imagePert = warp.transformImage(opt,image,pInitMtrx)
imageWarpAll = geometric(opt,image,pInit) if opt.netType=="IC-STN" else geometric(opt,imagePert)
imageWarp = imageWarpAll[-1]
output = classifier(opt,imageWarp)
_,pred = output.max(dim=1)
count += int((pred==label).sum().cpu().numpy())
if opt.netType=="STN" or opt.netType=="IC-STN":
imgPert = imagePert.detach().cpu().numpy()
imgWarp = imageWarp.detach().cpu().numpy()
for i in range(len(realIdx)):
l = data["label"][idx[i]].item()
if l not in warped[0]: warped[0][l] = []
if l not in warped[1]: warped[1][l] = []
warped[0][l].append(imgPert[i])
warped[1][l].append(imgWarp[i])
accuracy = float(count)/N
if opt.netType=="STN" or opt.netType=="IC-STN":
mean = [np.array([np.mean(warped[0][l],axis=0) for l in warped[0]]),
np.array([np.mean(warped[1][l],axis=0) for l in warped[1]])]
var = [np.array([np.var(warped[0][l],axis=0) for l in warped[0]]),
np.array([np.var(warped[1][l],axis=0) for l in warped[1]])]
else: mean,var = None,None
geometric.train()
classifier.train()
return accuracy,mean,var
================================================
FILE: MNIST-pytorch/graph.py
================================================
import numpy as np
import torch
import time
import data,warp,util
# build classification network
class FullCNN(torch.nn.Module):
def __init__(self,opt):
super(FullCNN,self).__init__()
self.inDim = 1
def conv2Layer(outDim):
conv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[3,3],stride=1,padding=0)
self.inDim = outDim
return conv
def linearLayer(outDim):
fc = torch.nn.Linear(self.inDim,outDim)
self.inDim = outDim
return fc
def maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2)
self.conv2Layers = torch.nn.Sequential(
conv2Layer(3),torch.nn.ReLU(True),
conv2Layer(6),torch.nn.ReLU(True),maxpoolLayer(),
conv2Layer(9),torch.nn.ReLU(True),
conv2Layer(12),torch.nn.ReLU(True)
)
self.inDim *= 8**2
self.linearLayers = torch.nn.Sequential(
linearLayer(48),torch.nn.ReLU(True),
linearLayer(opt.labelN)
)
initialize(opt,self,opt.stdC)
def forward(self,opt,image):
feat = image
feat = self.conv2Layers(feat).reshape(opt.batchSize,-1)
feat = self.linearLayers(feat)
output = feat
return output
# build classification network
class CNN(torch.nn.Module):
def __init__(self,opt):
super(CNN,self).__init__()
self.inDim = 1
def conv2Layer(outDim):
conv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[9,9],stride=1,padding=0)
self.inDim = outDim
return conv
def linearLayer(outDim):
fc = torch.nn.Linear(self.inDim,outDim)
self.inDim = outDim
return fc
def maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2)
self.conv2Layers = torch.nn.Sequential(
conv2Layer(3),torch.nn.ReLU(True)
)
self.inDim *= 20**2
self.linearLayers = torch.nn.Sequential(
linearLayer(opt.labelN)
)
initialize(opt,self,opt.stdC)
def forward(self,opt,image):
feat = image
feat = self.conv2Layers(feat).reshape(opt.batchSize,-1)
feat = self.linearLayers(feat)
output = feat
return output
# an identity class to skip geometric predictors
class Identity(torch.nn.Module):
def __init__(self): super(Identity,self).__init__()
def forward(self,opt,feat): return [feat]
# build Spatial Transformer Network
class STN(torch.nn.Module):
def __init__(self,opt):
super(STN,self).__init__()
self.inDim = 1
def conv2Layer(outDim):
conv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[7,7],stride=1,padding=0)
self.inDim = outDim
return conv
def linearLayer(outDim):
fc = torch.nn.Linear(self.inDim,outDim)
self.inDim = outDim
return fc
def maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2)
self.conv2Layers = torch.nn.Sequential(
conv2Layer(4),torch.nn.ReLU(True),
conv2Layer(8),torch.nn.ReLU(True),maxpoolLayer()
)
self.inDim *= 8**2
self.linearLayers = torch.nn.Sequential(
linearLayer(48),torch.nn.ReLU(True),
linearLayer(opt.warpDim)
)
initialize(opt,self,opt.stdGP,last0=True)
def forward(self,opt,image):
imageWarpAll = [image]
feat = image
feat = self.conv2Layers(feat).reshape(opt.batchSize,-1)
feat = self.linearLayers(feat)
p = feat
pMtrx = warp.vec2mtrx(opt,p)
imageWarp = warp.transformImage(opt,image,pMtrx)
imageWarpAll.append(imageWarp)
return imageWarpAll
# build Inverse Compositional STN
class ICSTN(torch.nn.Module):
def __init__(self,opt):
super(ICSTN,self).__init__()
self.inDim = 1
def conv2Layer(outDim):
conv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[7,7],stride=1,padding=0)
self.inDim = outDim
return conv
def linearLayer(outDim):
fc = torch.nn.Linear(self.inDim,outDim)
self.inDim = outDim
return fc
def maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2)
self.conv2Layers = torch.nn.Sequential(
conv2Layer(4),torch.nn.ReLU(True),
conv2Layer(8),torch.nn.ReLU(True),maxpoolLayer()
)
self.inDim *= 8**2
self.linearLayers = torch.nn.Sequential(
linearLayer(48),torch.nn.ReLU(True),
linearLayer(opt.warpDim)
)
initialize(opt,self,opt.stdGP,last0=True)
def forward(self,opt,image,p):
imageWarpAll = []
for l in range(opt.warpN):
pMtrx = warp.vec2mtrx(opt,p)
imageWarp = warp.transformImage(opt,image,pMtrx)
imageWarpAll.append(imageWarp)
feat = imageWarp
feat = self.conv2Layers(feat).reshape(opt.batchSize,-1)
feat = self.linearLayers(feat)
dp = feat
p = warp.compose(opt,p,dp)
pMtrx = warp.vec2mtrx(opt,p)
imageWarp = warp.transformImage(opt,image,pMtrx)
imageWarpAll.append(imageWarp)
return imageWarpAll
# initialize weights/biases
def initialize(opt,model,stddev,last0=False):
for m in model.conv2Layers:
if isinstance(m,torch.nn.Conv2d):
m.weight.data.normal_(0,stddev)
m.bias.data.normal_(0,stddev)
for m in model.linearLayers:
if isinstance(m,torch.nn.Linear):
if last0 and m is model.linearLayers[-1]:
m.weight.data.zero_()
m.bias.data.zero_()
else:
m.weight.data.normal_(0,stddev)
m.bias.data.normal_(0,stddev)
================================================
FILE: MNIST-pytorch/options.py
================================================
import numpy as np
import argparse
import warp
import util
import torch
def set(training):
# parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument("netType", choices=["CNN","STN","IC-STN"], help="type of network")
parser.add_argument("--group", default="0", help="name for group")
parser.add_argument("--model", default="test", help="name for model instance")
parser.add_argument("--size", default="28x28", help="image resolution")
parser.add_argument("--warpType", default="homography", help="type of warp function on images",
choices=["translation","similarity","affine","homography"])
parser.add_argument("--warpN", type=int, default=4, help="number of recurrent transformations (for IC-STN)")
parser.add_argument("--stdC", type=float, default=0.1, help="initialization stddev (classification network)")
parser.add_argument("--stdGP", type=float, default=0.1, help="initialization stddev (geometric predictor)")
parser.add_argument("--pertScale", type=float, default=0.25, help="initial perturbation scale")
parser.add_argument("--transScale", type=float, default=0.25, help="initial translation scale")
if training: # training
parser.add_argument("--port", type=int, default=8097, help="port number for visdom visualization")
parser.add_argument("--batchSize", type=int, default=100, help="batch size for SGD")
parser.add_argument("--lrC", type=float, default=1e-2, help="learning rate (classification network)")
parser.add_argument("--lrGP", type=float, default=None, help="learning rate (geometric predictor)")
parser.add_argument("--lrDecay", type=float, default=1.0, help="learning rate decay")
parser.add_argument("--lrStep", type=int, default=100000, help="learning rate decay step size")
parser.add_argument("--fromIt", type=int, default=0, help="resume training from iteration number")
parser.add_argument("--toIt", type=int, default=500000, help="run training to iteration number")
else: # evaluation
parser.add_argument("--batchSize", type=int, default=1, help="batch size for evaluation")
opt = parser.parse_args()
if opt.lrGP is None: opt.lrGP = 0 if opt.netType=="CNN" else \
1e-2 if opt.netType=="STN" else \
1e-4 if opt.netType=="IC-STN" else None
# --- below are automatically set ---
assert(torch.cuda.is_available()) # support only training on GPU for now
torch.set_default_tensor_type("torch.cuda.FloatTensor")
opt.training = training
opt.H,opt.W = [int(x) for x in opt.size.split("x")]
opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize)))
opt.warpDim = 2 if opt.warpType == "translation" else \
4 if opt.warpType == "similarity" else \
6 if opt.warpType == "affine" else \
8 if opt.warpType == "homography" else None
opt.labelN = 10
opt.canon4pts = np.array([[-1,-1],[-1,1],[1,1],[1,-1]],dtype=np.float32)
opt.image4pts = np.array([[0,0],[0,opt.H-1],[opt.W-1,opt.H-1],[opt.W-1,0]],dtype=np.float32)
opt.refMtrx = np.eye(3).astype(np.float32)
if opt.netType=="STN": opt.warpN = 1
print("({0}) {1}".format(
util.toGreen("{0}".format(opt.group)),
util.toGreen("{0}".format(opt.model))))
print("------------------------------------------")
print("network type: {0}, recurrent warps: {1}".format(
util.toYellow("{0}".format(opt.netType)),
util.toYellow("{0}".format(opt.warpN if opt.netType=="IC-STN" else "X"))))
print("batch size: {0}, image size: {1}x{2}".format(
util.toYellow("{0}".format(opt.batchSize)),
util.toYellow("{0}".format(opt.H)),
util.toYellow("{0}".format(opt.W))))
print("warpScale: (pert) {0} (trans) {1}".format(
util.toYellow("{0}".format(opt.pertScale)),
util.toYellow("{0}".format(opt.transScale))))
if training:
print("[geometric predictor] stddev={0}, lr={1}".format(
util.toYellow("{0:.0e}".format(opt.stdGP)),
util.toYellow("{0:.0e}".format(opt.lrGP))))
print("[classification network] stddev={0}, lr={1}".format(
util.toYellow("{0:.0e}".format(opt.stdC)),
util.toYellow("{0:.0e}".format(opt.lrC))))
print("------------------------------------------")
if training:
print(util.toMagenta("training model ({0}) {1}...".format(opt.group,opt.model)))
return opt
================================================
FILE: MNIST-pytorch/train.py
================================================
import numpy as np
import time,os,sys
import argparse
import util
print(util.toYellow("======================================================="))
print(util.toYellow("train.py (training on MNIST)"))
print(util.toYellow("======================================================="))
import torch
import data,graph,warp,util
import options
print(util.toMagenta("setting configurations..."))
opt = options.set(training=True)
# create directories for model output
util.mkdir("models_{0}".format(opt.group))
print(util.toMagenta("building network..."))
with torch.cuda.device(0):
# ------ build network ------
if opt.netType=="CNN":
geometric = graph.Identity()
classifier = graph.FullCNN(opt)
elif opt.netType=="STN":
geometric = graph.STN(opt)
classifier = graph.CNN(opt)
elif opt.netType=="IC-STN":
geometric = graph.ICSTN(opt)
classifier = graph.CNN(opt)
# ------ define loss ------
loss = torch.nn.CrossEntropyLoss()
# ------ optimizer ------
optimList = [{ "params": geometric.parameters(), "lr": opt.lrGP },
{ "params": classifier.parameters(), "lr": opt.lrC }]
optim = torch.optim.SGD(optimList)
# load data
print(util.toMagenta("loading MNIST dataset..."))
trainData,testData = data.loadMNIST(opt,"data")
# visdom visualizer
vis = util.Visdom(opt)
print(util.toYellow("======= TRAINING START ======="))
timeStart = time.time()
# start session
with torch.cuda.device(0):
geometric.train()
classifier.train()
if opt.fromIt!=0:
util.restoreModel(opt,geometric,classifier,opt.fromIt)
print(util.toMagenta("resuming from iteration {0}...".format(opt.fromIt)))
print(util.toMagenta("start training..."))
# training loop
for i in range(opt.fromIt,opt.toIt):
lrGP = opt.lrGP*opt.lrDecay**(i//opt.lrStep)
lrC = opt.lrC*opt.lrDecay**(i//opt.lrStep)
# make training batch
batch = data.makeBatch(opt,trainData)
image = batch["image"].unsqueeze(dim=1)
label = batch["label"]
# generate perturbation
pInit = data.genPerturbations(opt)
pInitMtrx = warp.vec2mtrx(opt,pInit)
# forward/backprop through network
optim.zero_grad()
imagePert = warp.transformImage(opt,image,pInitMtrx)
imageWarpAll = geometric(opt,image,pInit) if opt.netType=="IC-STN" else geometric(opt,imagePert)
imageWarp = imageWarpAll[-1]
output = classifier(opt,imageWarp)
train_loss = loss(output,label)
train_loss.backward()
# run one step
optim.step()
if (i+1)%100==0:
print("it. {0}/{1} lr={3}(GP),{4}(C), loss={5}, time={2}"
.format(util.toCyan("{0}".format(i+1)),
opt.toIt,
util.toGreen("{0:.2f}".format(time.time()-timeStart)),
util.toYellow("{0:.0e}".format(lrGP)),
util.toYellow("{0:.0e}".format(lrC)),
util.toRed("{0:.4f}".format(train_loss))))
if (i+1)%200==0: vis.trainLoss(opt,i+1,train_loss)
if (i+1)%1000==0:
# evaluate on test set
testAcc,testMean,testVar = data.evalTest(opt,testData,geometric,classifier)
testError = (1-testAcc)*100
vis.testLoss(opt,i+1,testError)
if opt.netType=="STN" or opt.netType=="IC-STN":
vis.meanVar(opt,testMean,testVar)
if (i+1)%10000==0:
util.saveModel(opt,geometric,classifier,i+1)
print(util.toGreen("model saved: {0}/{1}, it.{2}".format(opt.group,opt.model,i+1)))
print(util.toYellow("======= TRAINING DONE ======="))
================================================
FILE: MNIST-pytorch/util.py
================================================
import numpy as np
import scipy.misc
import torch
import os
import termcolor
import visdom
def mkdir(path):
if not os.path.exists(path): os.mkdir(path)
def imread(fname):
return scipy.misc.imread(fname)/255.0
def imsave(fname,array):
scipy.misc.toimage(array,cmin=0.0,cmax=1.0).save(fname)
# convert to colored strings
def toRed(content): return termcolor.colored(content,"red",attrs=["bold"])
def toGreen(content): return termcolor.colored(content,"green",attrs=["bold"])
def toBlue(content): return termcolor.colored(content,"blue",attrs=["bold"])
def toCyan(content): return termcolor.colored(content,"cyan",attrs=["bold"])
def toYellow(content): return termcolor.colored(content,"yellow",attrs=["bold"])
def toMagenta(content): return termcolor.colored(content,"magenta",attrs=["bold"])
# restore model
def restoreModel(opt,geometric,classifier,it):
geometric.load_state_dict(torch.load("models_{0}/{1}_it{2}_GP.npy".format(opt.group,opt.model,it)))
classifier.load_state_dict(torch.load("models_{0}/{1}_it{2}_C.npy".format(opt.group,opt.model,it)))
# save model
def saveModel(opt,geometric,classifier,it):
torch.save(geometric.state_dict(),"models_{0}/{1}_it{2}_GP.npy".format(opt.group,opt.model,it))
torch.save(classifier.state_dict(),"models_{0}/{1}_it{2}_C.npy".format(opt.group,opt.model,it))
class Visdom():
def __init__(self,opt):
self.vis = visdom.Visdom(port=opt.port,use_incoming_socket=False)
self.trainLossInit = True
self.testLossInit = True
self.meanVarInit = True
def tileImages(self,opt,images,H,W,HN,WN):
assert(len(images)==HN*WN)
images = images.reshape([HN,WN,-1,H,W])
images = [list(i) for i in images]
imageBlocks = np.concatenate([np.concatenate(row,axis=2) for row in images],axis=1)
return imageBlocks
def trainLoss(self,opt,it,loss):
loss = float(loss.detach().cpu().numpy())
if self.trainLossInit:
self.vis.line(Y=np.array([loss]),X=np.array([it]),win="{0}_trainloss".format(opt.model),
opts={ "title": "{0} (TRAIN_loss)".format(opt.model) })
self.trainLossInit = False
else: self.vis.line(Y=np.array([loss]),X=np.array([it]),win=opt.model+"_trainloss",update="append")
def testLoss(self,opt,it,loss):
if self.testLossInit:
self.vis.line(Y=np.array([loss]),X=np.array([it]),win="{0}_testloss".format(opt.model),
opts={ "title": "{0} (TEST_error)".format(opt.model) })
self.testLossInit = False
else: self.vis.line(Y=np.array([loss]),X=np.array([it]),win=opt.model+"_testloss",update="append")
def meanVar(self,opt,mean,var):
mean = [self.tileImages(opt,m,opt.H,opt.W,1,10) for m in mean]
var = [self.tileImages(opt,v,opt.H,opt.W,1,10)*3 for v in var]
self.vis.image(mean[0].clip(0,1),win="{0}_meaninit".format(opt.model), opts={ "title": "{0} (TEST_mean_init)".format(opt.model) })
self.vis.image(mean[1].clip(0,1),win="{0}_meanwarped".format(opt.model), opts={ "title": "{0} (TEST_mean_warped)".format(opt.model) })
self.vis.image(var[0].clip(0,1),win="{0}_varinit".format(opt.model), opts={ "title": "{0} (TEST_var_init)".format(opt.model) })
self.vis.image(var[1].clip(0,1),win="{0}_varwarped".format(opt.model), opts={ "title": "{0} (TEST_var_warped)".format(opt.model) })
================================================
FILE: MNIST-pytorch/warp.py
================================================
import numpy as np
import scipy.linalg
import torch
import util
# fit (affine) warp between two sets of points
def fit(Xsrc,Xdst):
ptsN = len(Xsrc)
X,Y,U,V,O,I = Xsrc[:,0],Xsrc[:,1],Xdst[:,0],Xdst[:,1],np.zeros([ptsN]),np.ones([ptsN])
A = np.concatenate((np.stack([X,Y,I,O,O,O],axis=1),
np.stack([O,O,O,X,Y,I],axis=1)),axis=0)
b = np.concatenate((U,V),axis=0)
p1,p2,p3,p4,p5,p6 = scipy.linalg.lstsq(A,b)[0].squeeze()
pMtrx = np.array([[p1,p2,p3],[p4,p5,p6],[0,0,1]],dtype=torch.float32)
return pMtrx
# compute composition of warp parameters
def compose(opt,p,dp):
pMtrx = vec2mtrx(opt,p)
dpMtrx = vec2mtrx(opt,dp)
pMtrxNew = dpMtrx.matmul(pMtrx)
pMtrxNew = pMtrxNew/pMtrxNew[:,2:3,2:3]
pNew = mtrx2vec(opt,pMtrxNew)
return pNew
# compute inverse of warp parameters
def inverse(opt,p):
pMtrx = vec2mtrx(opt,p)
pInvMtrx = pMtrx.inverse()
pInv = mtrx2vec(opt,pInvMtrx)
return pInv
# convert warp parameters to matrix
def vec2mtrx(opt,p):
O = torch.zeros(opt.batchSize,dtype=torch.float32).cuda()
I = torch.ones(opt.batchSize,dtype=torch.float32).cuda()
if opt.warpType=="translation":
tx,ty = torch.unbind(p,dim=1)
pMtrx = torch.stack([torch.stack([I,O,tx],dim=-1),
torch.stack([O,I,ty],dim=-1),
torch.stack([O,O,I],dim=-1)],dim=1)
if opt.warpType=="similarity":
pc,ps,tx,ty = torch.unbind(p,dim=1)
pMtrx = torch.stack([torch.stack([I+pc,-ps,tx],dim=-1),
torch.stack([ps,I+pc,ty],dim=-1),
torch.stack([O,O,I],dim=-1)],dim=1)
if opt.warpType=="affine":
p1,p2,p3,p4,p5,p6 = torch.unbind(p,dim=1)
pMtrx = torch.stack([torch.stack([I+p1,p2,p3],dim=-1),
torch.stack([p4,I+p5,p6],dim=-1),
torch.stack([O,O,I],dim=-1)],dim=1)
if opt.warpType=="homography":
p1,p2,p3,p4,p5,p6,p7,p8 = torch.unbind(p,dim=1)
pMtrx = torch.stack([torch.stack([I+p1,p2,p3],dim=-1),
torch.stack([p4,I+p5,p6],dim=-1),
torch.stack([p7,p8,I],dim=-1)],dim=1)
return pMtrx
# convert warp matrix to parameters
def mtrx2vec(opt,pMtrx):
[row0,row1,row2] = torch.unbind(pMtrx,dim=1)
[e00,e01,e02] = torch.unbind(row0,dim=1)
[e10,e11,e12] = torch.unbind(row1,dim=1)
[e20,e21,e22] = torch.unbind(row2,dim=1)
if opt.warpType=="translation": p = torch.stack([e02,e12],dim=1)
if opt.warpType=="similarity": p = torch.stack([e00-1,e10,e02,e12],dim=1)
if opt.warpType=="affine": p = torch.stack([e00-1,e01,e02,e10,e11-1,e12],dim=1)
if opt.warpType=="homography": p = torch.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],dim=1)
return p
# warp the image
def transformImage(opt,image,pMtrx):
refMtrx = torch.from_numpy(opt.refMtrx).cuda()
refMtrx = refMtrx.repeat(opt.batchSize,1,1)
transMtrx = refMtrx.matmul(pMtrx)
# warp the canonical coordinates
X,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H))
X,Y = X.flatten(),Y.flatten()
XYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T
XYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32)
XYhom = torch.from_numpy(XYhom).cuda()
XYwarpHom = transMtrx.matmul(XYhom)
XwarpHom,YwarpHom,ZwarpHom = torch.unbind(XYwarpHom,dim=1)
Xwarp = (XwarpHom/(ZwarpHom+1e-8)).reshape(opt.batchSize,opt.H,opt.W)
Ywarp = (YwarpHom/(ZwarpHom+1e-8)).reshape(opt.batchSize,opt.H,opt.W)
grid = torch.stack([Xwarp,Ywarp],dim=-1)
# sampling with bilinear interpolation
imageWarp = torch.nn.functional.grid_sample(image,grid,mode="bilinear")
return imageWarp
================================================
FILE: MNIST-tensorflow/data.py
================================================
import numpy as np
import scipy.linalg
import os,time
import tensorflow as tf
import warp
# load MNIST data
def loadMNIST(fname):
if not os.path.exists(fname):
# download and preprocess MNIST dataset
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)
trainData,validData,testData = {},{},{}
trainData["image"] = mnist.train.images.reshape([-1,28,28]).astype(np.float32)
validData["image"] = mnist.validation.images.reshape([-1,28,28]).astype(np.float32)
testData["image"] = mnist.test.images.reshape([-1,28,28]).astype(np.float32)
trainData["label"] = np.argmax(mnist.train.labels.astype(np.float32),axis=1)
validData["label"] = np.argmax(mnist.validation.labels.astype(np.float32),axis=1)
testData["label"] = np.argmax(mnist.test.labels.astype(np.float32),axis=1)
os.makedirs(os.path.dirname(fname))
np.savez(fname,train=trainData,valid=validData,test=testData)
os.system("rm -rf MNIST_data")
MNIST = np.load(fname)
trainData = MNIST["train"].item()
validData = MNIST["valid"].item()
testData = MNIST["test"].item()
return trainData,validData,testData
# generate training batch
def genPerturbations(opt):
with tf.name_scope("genPerturbations"):
X = np.tile(opt.canon4pts[:,0],[opt.batchSize,1])
Y = np.tile(opt.canon4pts[:,1],[opt.batchSize,1])
dX = tf.random_normal([opt.batchSize,4])*opt.pertScale \
+tf.random_normal([opt.batchSize,1])*opt.transScale
dY = tf.random_normal([opt.batchSize,4])*opt.pertScale \
+tf.random_normal([opt.batchSize,1])*opt.transScale
O = np.zeros([opt.batchSize,4],dtype=np.float32)
I = np.ones([opt.batchSize,4],dtype=np.float32)
# fit warp parameters to generated displacements
if opt.warpType=="homography":
A = tf.concat([tf.stack([X,Y,I,O,O,O,-X*(X+dX),-Y*(X+dX)],axis=-1),
tf.stack([O,O,O,X,Y,I,-X*(Y+dY),-Y*(Y+dY)],axis=-1)],1)
b = tf.expand_dims(tf.concat([X+dX,Y+dY],1),-1)
pPert = tf.matrix_solve(A,b)[:,:,0]
pPert -= tf.to_float([[1,0,0,0,1,0,0,0]])
else:
if opt.warpType=="translation":
J = np.concatenate([np.stack([I,O],axis=-1),
np.stack([O,I],axis=-1)],axis=1)
if opt.warpType=="similarity":
J = np.concatenate([np.stack([X,Y,I,O],axis=-1),
np.stack([-Y,X,O,I],axis=-1)],axis=1)
if opt.warpType=="affine":
J = np.concatenate([np.stack([X,Y,I,O,O,O],axis=-1),
np.stack([O,O,O,X,Y,I],axis=-1)],axis=1)
dXY = tf.expand_dims(tf.concat([dX,dY],1),-1)
pPert = tf.matrix_solve_ls(J,dXY)[:,:,0]
return pPert
# make training batch
def makeBatch(opt,data,PH):
N = len(data["image"])
randIdx = np.random.randint(N,size=[opt.batchSize])
# put data in placeholders
[image,label] = PH
batch = {
image: data["image"][randIdx],
label: data["label"][randIdx],
}
return batch
# evaluation on test set
def evalTest(opt,sess,data,PH,prediction,imagesEval=[]):
N = len(data["image"])
# put data in placeholders
[image,label] = PH
batchN = int(np.ceil(N/opt.batchSize))
warped = [{},{}]
count = 0
for b in range(batchN):
# use some dummy data (0) as batch filler if necessary
if b!=batchN-1:
realIdx = np.arange(opt.batchSize*b,opt.batchSize*(b+1))
else:
realIdx = np.arange(opt.batchSize*b,N)
idx = np.zeros([opt.batchSize],dtype=int)
idx[:len(realIdx)] = realIdx
batch = {
image: data["image"][idx],
label: data["label"][idx],
}
evalList = sess.run([prediction]+imagesEval,feed_dict=batch)
pred = evalList[0]
count += pred[:len(realIdx)].sum()
if opt.netType=="STN" or opt.netType=="IC-STN":
imgs = evalList[1:]
for i in range(len(realIdx)):
l = data["label"][idx[i]]
if l not in warped[0]: warped[0][l] = []
if l not in warped[1]: warped[1][l] = []
warped[0][l].append(imgs[0][i])
warped[1][l].append(imgs[1][i])
accuracy = float(count)/N
if opt.netType=="STN" or opt.netType=="IC-STN":
mean = [np.array([np.mean(warped[0][l],axis=0) for l in warped[0]]),
np.array([np.mean(warped[1][l],axis=0) for l in warped[1]])]
var = [np.array([np.var(warped[0][l],axis=0) for l in warped[0]]),
np.array([np.var(warped[1][l],axis=0) for l in warped[1]])]
else: mean,var = None,None
return accuracy,mean,var
================================================
FILE: MNIST-tensorflow/graph.py
================================================
import numpy as np
import tensorflow as tf
import time
import data,warp,util
# build classification network
def fullCNN(opt,image):
def conv2Layer(opt,feat,outDim):
weight,bias = createVariable(opt,[3,3,int(feat.shape[-1]),outDim],stddev=opt.stdC)
conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias
return conv
def linearLayer(opt,feat,outDim):
weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC)
fc = tf.matmul(feat,weight)+bias
return fc
with tf.variable_scope("classifier"):
feat = image
with tf.variable_scope("conv1"):
feat = conv2Layer(opt,feat,3)
feat = tf.nn.relu(feat)
with tf.variable_scope("conv2"):
feat = conv2Layer(opt,feat,6)
feat = tf.nn.relu(feat)
feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID")
with tf.variable_scope("conv3"):
feat = conv2Layer(opt,feat,9)
feat = tf.nn.relu(feat)
with tf.variable_scope("conv4"):
feat = conv2Layer(opt,feat,12)
feat = tf.nn.relu(feat)
feat = tf.reshape(feat,[opt.batchSize,-1])
with tf.variable_scope("fc5"):
feat = linearLayer(opt,feat,48)
feat = tf.nn.relu(feat)
with tf.variable_scope("fc6"):
feat = linearLayer(opt,feat,opt.labelN)
output = feat
return output
# build classification network
def CNN(opt,image):
def conv2Layer(opt,feat,outDim):
weight,bias = createVariable(opt,[9,9,int(feat.shape[-1]),outDim],stddev=opt.stdC)
conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias
return conv
def linearLayer(opt,feat,outDim):
weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC)
fc = tf.matmul(feat,weight)+bias
return fc
with tf.variable_scope("classifier"):
feat = image
with tf.variable_scope("conv1"):
feat = conv2Layer(opt,feat,3)
feat = tf.nn.relu(feat)
feat = tf.reshape(feat,[opt.batchSize,-1])
with tf.variable_scope("fc2"):
feat = linearLayer(opt,feat,opt.labelN)
output = feat
return output
# build Spatial Transformer Network
def STN(opt,image):
def conv2Layer(opt,feat,outDim):
weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP)
conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias
return conv
def linearLayer(opt,feat,outDim,final=False):
weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=0.0 if final else opt.stdGP)
fc = tf.matmul(feat,weight)+bias
return fc
imageWarpAll = [image]
with tf.variable_scope("geometric"):
feat = image
with tf.variable_scope("conv1"):
feat = conv2Layer(opt,feat,4)
feat = tf.nn.relu(feat)
with tf.variable_scope("conv2"):
feat = conv2Layer(opt,feat,8)
feat = tf.nn.relu(feat)
feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID")
feat = tf.reshape(feat,[opt.batchSize,-1])
with tf.variable_scope("fc3"):
feat = linearLayer(opt,feat,48)
feat = tf.nn.relu(feat)
with tf.variable_scope("fc4"):
feat = linearLayer(opt,feat,opt.warpDim,final=True)
p = feat
pMtrx = warp.vec2mtrx(opt,p)
imageWarp = warp.transformImage(opt,image,pMtrx)
imageWarpAll.append(imageWarp)
return imageWarpAll
# build Inverse Compositional STN
def ICSTN(opt,image,p):
def conv2Layer(opt,feat,outDim):
weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP)
conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias
return conv
def linearLayer(opt,feat,outDim,final=False):
weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=0.0 if final else opt.stdGP)
fc = tf.matmul(feat,weight)+bias
return fc
imageWarpAll = []
for l in range(opt.warpN):
with tf.variable_scope("geometric",reuse=l>0):
pMtrx = warp.vec2mtrx(opt,p)
imageWarp = warp.transformImage(opt,image,pMtrx)
imageWarpAll.append(imageWarp)
feat = imageWarp
with tf.variable_scope("conv1"):
feat = conv2Layer(opt,feat,4)
feat = tf.nn.relu(feat)
with tf.variable_scope("conv2"):
feat = conv2Layer(opt,feat,8)
feat = tf.nn.relu(feat)
feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID")
feat = tf.reshape(feat,[opt.batchSize,-1])
with tf.variable_scope("fc3"):
feat = linearLayer(opt,feat,48)
feat = tf.nn.relu(feat)
with tf.variable_scope("fc4"):
feat = linearLayer(opt,feat,opt.warpDim,final=True)
dp = feat
p = warp.compose(opt,p,dp)
pMtrx = warp.vec2mtrx(opt,p)
imageWarp = warp.transformImage(opt,image,pMtrx)
imageWarpAll.append(imageWarp)
return imageWarpAll
# auxiliary function for creating weight and bias
def createVariable(opt,weightShape,biasShape=None,stddev=None):
if biasShape is None: biasShape = [weightShape[-1]]
weight = tf.get_variable("weight",shape=weightShape,dtype=tf.float32,
initializer=tf.random_normal_initializer(stddev=stddev))
bias = tf.get_variable("bias",shape=biasShape,dtype=tf.float32,
initializer=tf.random_normal_initializer(stddev=stddev))
return weight,bias
================================================
FILE: MNIST-tensorflow/options.py
================================================
import numpy as np
import argparse
import warp
import util
def set(training):
# parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument("netType", choices=["CNN","STN","IC-STN"], help="type of network")
parser.add_argument("--group", default="0", help="name for group")
parser.add_argument("--model", default="test", help="name for model instance")
parser.add_argument("--size", default="28x28", help="image resolution")
parser.add_argument("--warpType", default="homography", help="type of warp function on images",
choices=["translation","similarity","affine","homography"])
parser.add_argument("--warpN", type=int, default=4, help="number of recurrent transformations (for IC-STN)")
parser.add_argument("--stdC", type=float, default=0.1, help="initialization stddev (classification network)")
parser.add_argument("--stdGP", type=float, default=0.1, help="initialization stddev (geometric predictor)")
parser.add_argument("--pertScale", type=float, default=0.25, help="initial perturbation scale")
parser.add_argument("--transScale", type=float, default=0.25, help="initial translation scale")
if training: # training
parser.add_argument("--batchSize", type=int, default=100, help="batch size for SGD")
parser.add_argument("--lrC", type=float, default=1e-2, help="learning rate (classification network)")
parser.add_argument("--lrCdecay", type=float, default=1.0, help="learning rate decay (classification network)")
parser.add_argument("--lrCstep", type=int, default=100000, help="learning rate decay step size (classification network)")
parser.add_argument("--lrGP", type=float, default=None, help="learning rate (geometric predictor)")
parser.add_argument("--lrGPdecay", type=float, default=1.0, help="learning rate decay (geometric predictor)")
parser.add_argument("--lrGPstep", type=int, default=100000, help="learning rate decay step size (geometric predictor)")
parser.add_argument("--fromIt", type=int, default=0, help="resume training from iteration number")
parser.add_argument("--toIt", type=int, default=500000, help="run training to iteration number")
else: # evaluation
parser.add_argument("--batchSize", type=int, default=1, help="batch size for evaluation")
opt = parser.parse_args()
if opt.lrGP is None: opt.lrGP = 0 if opt.netType=="CNN" else \
1e-2 if opt.netType=="STN" else \
1e-4 if opt.netType=="IC-STN" else None
# --- below are automatically set ---
opt.training = training
opt.H,opt.W = [int(x) for x in opt.size.split("x")]
opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize)))
opt.warpDim = 2 if opt.warpType == "translation" else \
4 if opt.warpType == "similarity" else \
6 if opt.warpType == "affine" else \
8 if opt.warpType == "homography" else None
opt.labelN = 10
opt.canon4pts = np.array([[-1,-1],[-1,1],[1,1],[1,-1]],dtype=np.float32)
opt.image4pts = np.array([[0,0],[0,opt.H-1],[opt.W-1,opt.H-1],[opt.W-1,0]],dtype=np.float32)
opt.refMtrx = warp.fit(Xsrc=opt.canon4pts,Xdst=opt.image4pts)
if opt.netType=="STN": opt.warpN = 1
print("({0}) {1}".format(
util.toGreen("{0}".format(opt.group)),
util.toGreen("{0}".format(opt.model))))
print("------------------------------------------")
print("network type: {0}, recurrent warps: {1}".format(
util.toYellow("{0}".format(opt.netType)),
util.toYellow("{0}".format(opt.warpN if opt.netType=="IC-STN" else "X"))))
print("batch size: {0}, image size: {1}x{2}".format(
util.toYellow("{0}".format(opt.batchSize)),
util.toYellow("{0}".format(opt.H)),
util.toYellow("{0}".format(opt.W))))
print("warpScale: (pert) {0} (trans) {1}".format(
util.toYellow("{0}".format(opt.pertScale)),
util.toYellow("{0}".format(opt.transScale))))
if training:
print("[geometric predictor] stddev={0}, lr={1}".format(
util.toYellow("{0:.0e}".format(opt.stdGP)),
util.toYellow("{0:.0e}".format(opt.lrGP))))
print("[classification network] stddev={0}, lr={1}".format(
util.toYellow("{0:.0e}".format(opt.stdC)),
util.toYellow("{0:.0e}".format(opt.lrC))))
print("------------------------------------------")
if training:
print(util.toMagenta("training model ({0}) {1}...".format(opt.group,opt.model)))
return opt
================================================
FILE: MNIST-tensorflow/train.py
================================================
import numpy as np
import time,os,sys
import argparse
import util
print(util.toYellow("======================================================="))
print(util.toYellow("train.py (training on MNIST)"))
print(util.toYellow("======================================================="))
import tensorflow as tf
import data,graph,warp,util
import options
print(util.toMagenta("setting configurations..."))
opt = options.set(training=True)
# create directories for model output
util.mkdir("models_{0}".format(opt.group))
print(util.toMagenta("building graph..."))
tf.reset_default_graph()
# build graph
with tf.device("/gpu:0"):
# ------ define input data ------
image = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.H,opt.W])
label = tf.placeholder(tf.int64,shape=[opt.batchSize])
PH = [image,label]
# ------ generate perturbation ------
pInit = data.genPerturbations(opt)
pInitMtrx = warp.vec2mtrx(opt,pInit)
# ------ build network ------
image = tf.expand_dims(image,axis=-1)
imagePert = warp.transformImage(opt,image,pInitMtrx)
if opt.netType=="CNN":
output = graph.fullCNN(opt,imagePert)
elif opt.netType=="STN":
imageWarpAll = graph.STN(opt,imagePert)
imageWarp = imageWarpAll[-1]
output = graph.CNN(opt,imageWarp)
elif opt.netType=="IC-STN":
imageWarpAll = graph.ICSTN(opt,image,pInit)
imageWarp = imageWarpAll[-1]
output = graph.CNN(opt,imageWarp)
softmax = tf.nn.softmax(output)
labelOnehot = tf.one_hot(label,opt.labelN)
prediction = tf.equal(tf.argmax(softmax,1),label)
# ------ define loss ------
softmaxLoss = tf.nn.softmax_cross_entropy_with_logits(logits=output,labels=labelOnehot)
loss = tf.reduce_mean(softmaxLoss)
# ------ optimizer ------
lrGP_PH,lrC_PH = tf.placeholder(tf.float32,shape=[]),tf.placeholder(tf.float32,shape=[])
optim = util.setOptimizer(opt,loss,lrGP_PH,lrC_PH)
# ------ generate summaries ------
summaryImageTrain = []
summaryImageTest = []
if opt.netType=="STN" or opt.netType=="IC-STN":
for l in range(opt.warpN+1):
summaryImageTrain.append(util.imageSummary(opt,imageWarpAll[l],"TRAIN_warp{0}".format(l),opt.H,opt.W))
summaryImageTest.append(util.imageSummary(opt,imageWarpAll[l],"TEST_warp{0}".format(l),opt.H,opt.W))
summaryImageTrain = tf.summary.merge(summaryImageTrain)
summaryImageTest = tf.summary.merge(summaryImageTest)
summaryLossTrain = tf.summary.scalar("TRAIN_loss",loss)
testErrorPH = tf.placeholder(tf.float32,shape=[])
testImagePH = tf.placeholder(tf.float32,shape=[opt.labelN,opt.H,opt.W,1])
summaryErrorTest = tf.summary.scalar("TEST_error",testErrorPH)
if opt.netType=="STN" or opt.netType=="IC-STN":
summaryMeanTest0 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_mean_init",opt.H,opt.W)
summaryMeanTest1 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_mean_warped",opt.H,opt.W)
summaryVarTest0 = util.imageSummaryMeanVar(opt,testImagePH*3,"TEST_var_init",opt.H,opt.W)
summaryVarTest1 = util.imageSummaryMeanVar(opt,testImagePH*3,"TEST_var_warped",opt.H,opt.W)
# load data
print(util.toMagenta("loading MNIST dataset..."))
trainData,validData,testData = data.loadMNIST("data/MNIST.npz")
# prepare model saver/summary writer
saver = tf.train.Saver(max_to_keep=20)
summaryWriter = tf.summary.FileWriter("summary_{0}/{1}".format(opt.group,opt.model))
print(util.toYellow("======= TRAINING START ======="))
timeStart = time.time()
# start session
tfConfig = tf.ConfigProto(allow_soft_placement=True)
tfConfig.gpu_options.allow_growth = True
with tf.Session(config=tfConfig) as sess:
sess.run(tf.global_variables_initializer())
summaryWriter.add_graph(sess.graph)
if opt.fromIt!=0:
util.restoreModel(opt,sess,saver,opt.fromIt)
print(util.toMagenta("resuming from iteration {0}...".format(opt.fromIt)))
print(util.toMagenta("start training..."))
# training loop
for i in range(opt.fromIt,opt.toIt):
lrGP = opt.lrGP*opt.lrGPdecay**(i//opt.lrGPstep)
lrC = opt.lrC*opt.lrCdecay**(i//opt.lrCstep)
# make training batch
batch = data.makeBatch(opt,trainData,PH)
batch[lrGP_PH] = lrGP
batch[lrC_PH] = lrC
# run one step
_,l = sess.run([optim,loss],feed_dict=batch)
if (i+1)%100==0:
print("it. {0}/{1} lr={3}(GP),{4}(C), loss={5}, time={2}"
.format(util.toCyan("{0}".format(i+1)),
opt.toIt,
util.toGreen("{0:.2f}".format(time.time()-timeStart)),
util.toYellow("{0:.0e}".format(lrGP)),
util.toYellow("{0:.0e}".format(lrC)),
util.toRed("{0:.4f}".format(l))))
if (i+1)%100==0:
summaryWriter.add_summary(sess.run(summaryLossTrain,feed_dict=batch),i+1)
if (i+1)%500==0 and (opt.netType=="STN" or opt.netType=="IC-STN"):
summaryWriter.add_summary(sess.run(summaryImageTrain,feed_dict=batch),i+1)
summaryWriter.add_summary(sess.run(summaryImageTest,feed_dict=batch),i+1)
if (i+1)%1000==0:
# evaluate on test set
if opt.netType=="STN" or opt.netType=="IC-STN":
testAcc,testMean,testVar = data.evalTest(opt,sess,testData,PH,prediction,imagesEval=[imagePert,imageWarp])
else:
testAcc,_,_ = data.evalTest(opt,sess,testData,PH,prediction)
testError = (1-testAcc)*100
summaryWriter.add_summary(sess.run(summaryErrorTest,feed_dict={testErrorPH:testError}),i+1)
if opt.netType=="STN" or opt.netType=="IC-STN":
summaryWriter.add_summary(sess.run(summaryMeanTest0,feed_dict={testImagePH:testMean[0]}),i+1)
summaryWriter.add_summary(sess.run(summaryMeanTest1,feed_dict={testImagePH:testMean[1]}),i+1)
summaryWriter.add_summary(sess.run(summaryVarTest0,feed_dict={testImagePH:testVar[0]}),i+1)
summaryWriter.add_summary(sess.run(summaryVarTest1,feed_dict={testImagePH:testVar[1]}),i+1)
if (i+1)%10000==0:
util.saveModel(opt,sess,saver,i+1)
print(util.toGreen("model saved: {0}/{1}, it.{2}".format(opt.group,opt.model,i+1)))
print(util.toYellow("======= TRAINING DONE ======="))
================================================
FILE: MNIST-tensorflow/util.py
================================================
import numpy as np
import scipy.misc
import tensorflow as tf
import os
import termcolor
def mkdir(path):
if not os.path.exists(path): os.mkdir(path)
def imread(fname):
return scipy.misc.imread(fname)/255.0
def imsave(fname,array):
scipy.misc.toimage(array,cmin=0.0,cmax=1.0).save(fname)
# convert to colored strings
def toRed(content): return termcolor.colored(content,"red",attrs=["bold"])
def toGreen(content): return termcolor.colored(content,"green",attrs=["bold"])
def toBlue(content): return termcolor.colored(content,"blue",attrs=["bold"])
def toCyan(content): return termcolor.colored(content,"cyan",attrs=["bold"])
def toYellow(content): return termcolor.colored(content,"yellow",attrs=["bold"])
def toMagenta(content): return termcolor.colored(content,"magenta",attrs=["bold"])
# make image summary from image batch
def imageSummary(opt,image,tag,H,W):
blockSize = opt.visBlockSize
imageOne = tf.batch_to_space(image[:blockSize**2],crops=[[0,0],[0,0]],block_size=blockSize)
imagePermute = tf.reshape(imageOne,[H,blockSize,W,blockSize,-1])
imageTransp = tf.transpose(imagePermute,[1,0,3,2,4])
imageBlocks = tf.reshape(imageTransp,[1,H*blockSize,W*blockSize,-1])
imageBlocks = tf.cast(imageBlocks*255,tf.uint8)
summary = tf.summary.image(tag,imageBlocks)
return summary
# make image summary from image batch (mean/variance)
def imageSummaryMeanVar(opt,image,tag,H,W):
imageOne = tf.batch_to_space_nd(image,crops=[[0,0],[0,0]],block_shape=[1,10])
imagePermute = tf.reshape(imageOne,[H,1,W,10,-1])
imageTransp = tf.transpose(imagePermute,[1,0,3,2,4])
imageBlocks = tf.reshape(imageTransp,[1,H*1,W*10,-1])
imageBlocks = tf.cast(imageBlocks*255,tf.uint8)
summary = tf.summary.image(tag,imageBlocks)
return summary
# set optimizer for different learning rates
def setOptimizer(opt,loss,lrGP,lrC):
varsGP = [v for v in tf.global_variables() if "geometric" in v.name]
varsC = [v for v in tf.global_variables() if "classifier" in v.name]
gradC = tf.gradients(loss,varsC)
optimC = tf.train.GradientDescentOptimizer(lrC).apply_gradients(zip(gradC,varsC))
if len(varsGP)>0:
gradGP = tf.gradients(loss,varsGP)
optimGP = tf.train.GradientDescentOptimizer(lrGP).apply_gradients(zip(gradGP,varsGP))
optim = tf.group(optimC,optimGP)
else:
optim = optimC
return optim
# restore model
def restoreModel(opt,sess,saver,it):
saver.restore(sess,"models_{0}/{1}_it{2}.ckpt".format(opt.group,opt.model,it,opt.warpN))
# save model
def saveModel(opt,sess,saver,it):
saver.save(sess,"models_{0}/{1}_it{2}.ckpt".format(opt.group,opt.model,it,opt.warpN))
================================================
FILE: MNIST-tensorflow/warp.py
================================================
import numpy as np
import scipy.linalg
import tensorflow as tf
# fit (affine) warp between two sets of points
def fit(Xsrc,Xdst):
ptsN = len(Xsrc)
X,Y,U,V,O,I = Xsrc[:,0],Xsrc[:,1],Xdst[:,0],Xdst[:,1],np.zeros([ptsN]),np.ones([ptsN])
A = np.concatenate((np.stack([X,Y,I,O,O,O],axis=1),
np.stack([O,O,O,X,Y,I],axis=1)),axis=0)
b = np.concatenate((U,V),axis=0)
p1,p2,p3,p4,p5,p6 = scipy.linalg.lstsq(A,b)[0].squeeze()
pMtrx = np.array([[p1,p2,p3],[p4,p5,p6],[0,0,1]],dtype=np.float32)
return pMtrx
# compute composition of warp parameters
def compose(opt,p,dp):
with tf.name_scope("compose"):
pMtrx = vec2mtrx(opt,p)
dpMtrx = vec2mtrx(opt,dp)
pMtrxNew = tf.matmul(dpMtrx,pMtrx)
pMtrxNew /= pMtrxNew[:,2:3,2:3]
pNew = mtrx2vec(opt,pMtrxNew)
return pNew
# compute inverse of warp parameters
def inverse(opt,p):
with tf.name_scope("inverse"):
pMtrx = vec2mtrx(opt,p)
pInvMtrx = tf.matrix_inverse(pMtrx)
pInv = mtrx2vec(opt,pInvMtrx)
return pInv
# convert warp parameters to matrix
def vec2mtrx(opt,p):
with tf.name_scope("vec2mtrx"):
O = tf.zeros([opt.batchSize])
I = tf.ones([opt.batchSize])
if opt.warpType=="translation":
tx,ty = tf.unstack(p,axis=1)
pMtrx = tf.transpose(tf.stack([[I,O,tx],[O,I,ty],[O,O,I]]),perm=[2,0,1])
if opt.warpType=="similarity":
pc,ps,tx,ty = tf.unstack(p,axis=1)
pMtrx = tf.transpose(tf.stack([[I+pc,-ps,tx],[ps,I+pc,ty],[O,O,I]]),perm=[2,0,1])
if opt.warpType=="affine":
p1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1)
pMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[O,O,I]]),perm=[2,0,1])
if opt.warpType=="homography":
p1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1)
pMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[p7,p8,I]]),perm=[2,0,1])
return pMtrx
# convert warp matrix to parameters
def mtrx2vec(opt,pMtrx):
with tf.name_scope("mtrx2vec"):
[row0,row1,row2] = tf.unstack(pMtrx,axis=1)
[e00,e01,e02] = tf.unstack(row0,axis=1)
[e10,e11,e12] = tf.unstack(row1,axis=1)
[e20,e21,e22] = tf.unstack(row2,axis=1)
if opt.warpType=="translation": p = tf.stack([e02,e12],axis=1)
if opt.warpType=="similarity": p = tf.stack([e00-1,e10,e02,e12],axis=1)
if opt.warpType=="affine": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12],axis=1)
if opt.warpType=="homography": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],axis=1)
return p
# warp the image
def transformImage(opt,image,pMtrx):
with tf.name_scope("transformImage"):
refMtrx = tf.tile(tf.expand_dims(opt.refMtrx,axis=0),[opt.batchSize,1,1])
transMtrx = tf.matmul(refMtrx,pMtrx)
# warp the canonical coordinates
X,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H))
X,Y = X.flatten(),Y.flatten()
XYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T
XYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32)
XYwarpHom = tf.matmul(transMtrx,XYhom)
XwarpHom,YwarpHom,ZwarpHom = tf.unstack(XYwarpHom,axis=1)
Xwarp = tf.reshape(XwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])
Ywarp = tf.reshape(YwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])
# get the integer sampling coordinates
Xfloor,Xceil = tf.floor(Xwarp),tf.ceil(Xwarp)
Yfloor,Yceil = tf.floor(Ywarp),tf.ceil(Ywarp)
XfloorInt,XceilInt = tf.to_int32(Xfloor),tf.to_int32(Xceil)
YfloorInt,YceilInt = tf.to_int32(Yfloor),tf.to_int32(Yceil)
imageIdx = np.tile(np.arange(opt.batchSize).reshape([opt.batchSize,1,1]),[1,opt.H,opt.W])
imageVec = tf.reshape(image,[-1,int(image.shape[-1])])
imageVecOut = tf.concat([imageVec,tf.zeros([1,int(image.shape[-1])])],axis=0)
idxUL = (imageIdx*opt.H+YfloorInt)*opt.W+XfloorInt
idxUR = (imageIdx*opt.H+YfloorInt)*opt.W+XceilInt
idxBL = (imageIdx*opt.H+YceilInt)*opt.W+XfloorInt
idxBR = (imageIdx*opt.H+YceilInt)*opt.W+XceilInt
idxOutside = tf.fill([opt.batchSize,opt.H,opt.W],opt.batchSize*opt.H*opt.W)
def insideImage(Xint,Yint):
return (Xint>=0)&(Xint