Repository: chenhsuanlin/inverse-compositional-STN
Branch: master
Commit: 4a2a8fc7b9a1
Files: 22
Total size: 86.3 KB

Directory structure:
gitextract_t1x_4nxr/

├── .editorconfig
├── .gitignore
├── LICENSE
├── MNIST-pytorch/
│   ├── data.py
│   ├── graph.py
│   ├── options.py
│   ├── train.py
│   ├── util.py
│   └── warp.py
├── MNIST-tensorflow/
│   ├── data.py
│   ├── graph.py
│   ├── options.py
│   ├── train.py
│   ├── util.py
│   └── warp.py
├── README.md
└── traffic-sign-tensorflow/
    ├── data.py
    ├── graph.py
    ├── options.py
    ├── train.py
    ├── util.py
    └── warp.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .editorconfig
================================================
root = true

[*]
end_of_line = lf
insert_final_newline = true
indent_style = tab
indent_size = 4
trim_trailing_whitespace = true

[*.md]
trim_trailing_whitespace = false


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# IPython Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# dotenv
.env

# virtualenv
venv/
ENV/

# Spyder project settings
.spyderproject

# Rope project settings
.ropeproject


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2018 Chen-Hsuan Lin

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: MNIST-pytorch/data.py
================================================
import numpy as np
import scipy.linalg
import os,time
import torch
import torchvision

import warp,util

# load MNIST data
def loadMNIST(opt,path):
	os.makedirs(path,exist_ok=True)
	trainDataset = torchvision.datasets.MNIST(path,train=True,download=True)
	testDataset = torchvision.datasets.MNIST(path,train=False,download=True)
	trainData,testData = {},{}
	trainData["image"] = torch.tensor([np.array(sample[0])/255.0 for sample in trainDataset],dtype=torch.float32)
	testData["image"] = torch.tensor([np.array(sample[0])/255.0 for sample in testDataset],dtype=torch.float32)
	trainData["label"] = torch.tensor([sample[1] for sample in trainDataset])
	testData["label"] = torch.tensor([sample[1] for sample in testDataset])
	return trainData,testData

# generate training batch
def genPerturbations(opt):
	X = np.tile(opt.canon4pts[:,0],[opt.batchSize,1])
	Y = np.tile(opt.canon4pts[:,1],[opt.batchSize,1])
	O = np.zeros([opt.batchSize,4],dtype=np.float32)
	I = np.ones([opt.batchSize,4],dtype=np.float32)
	dX = np.random.randn(opt.batchSize,4)*opt.pertScale \
		+np.random.randn(opt.batchSize,1)*opt.transScale
	dY = np.random.randn(opt.batchSize,4)*opt.pertScale \
		+np.random.randn(opt.batchSize,1)*opt.transScale
	dX,dY = dX.astype(np.float32),dY.astype(np.float32)
	# fit warp parameters to generated displacements
	if opt.warpType=="homography":
		A = np.concatenate([np.stack([X,Y,I,O,O,O,-X*(X+dX),-Y*(X+dX)],axis=-1),
							np.stack([O,O,O,X,Y,I,-X*(Y+dY),-Y*(Y+dY)],axis=-1)],axis=1)
		b = np.expand_dims(np.concatenate([X+dX,Y+dY],axis=1),axis=-1)
		pPert = np.matmul(np.linalg.inv(A),b).squeeze()
		pPert -= np.array([1,0,0,0,1,0,0,0])
	else:
		if opt.warpType=="translation":
			J = np.concatenate([np.stack([I,O],axis=-1),
								np.stack([O,I],axis=-1)],axis=1)
		if opt.warpType=="similarity":
			J = np.concatenate([np.stack([X,Y,I,O],axis=-1),
								np.stack([-Y,X,O,I],axis=-1)],axis=1)
		if opt.warpType=="affine":
			J = np.concatenate([np.stack([X,Y,I,O,O,O],axis=-1),
								np.stack([O,O,O,X,Y,I],axis=-1)],axis=1)
		dXY = np.expand_dims(np.concatenate([dX,dY],axis=1),axis=-1)
		Jtransp = np.transpose(J,axes=[0,2,1])
		pPert = np.matmul(np.linalg.inv(np.matmul(Jtransp,J)),np.matmul(Jtransp,dXY)).squeeze()
	pInit = torch.from_numpy(pPert).cuda()
	return pInit

# make training batch
def makeBatch(opt,data):
	N = len(data["image"])
	randIdx = np.random.randint(N,size=[opt.batchSize])
	batch = {
		"image": data["image"][randIdx].cuda(),
		"label": data["label"][randIdx].cuda(),
	}
	return batch

# evaluation on test set
def evalTest(opt,data,geometric,classifier):
	geometric.eval()
	classifier.eval()
	N = len(data["image"])
	batchN = int(np.ceil(N/opt.batchSize))
	warped = [{},{}]
	count = 0
	for b in range(batchN):
		# use some dummy data (0) as batch filler if necessary
		if b!=batchN-1:
			realIdx = np.arange(opt.batchSize*b,opt.batchSize*(b+1))
		else:
			realIdx = np.arange(opt.batchSize*b,N)
		idx = np.zeros([opt.batchSize],dtype=int)
		idx[:len(realIdx)] = realIdx
		# make training batch
		image = data["image"][idx].cuda()
		label = data["label"][idx].cuda()
		image.data.unsqueeze_(dim=1)
		# generate perturbation
		pInit = genPerturbations(opt)
		pInitMtrx = warp.vec2mtrx(opt,pInit)
		imagePert = warp.transformImage(opt,image,pInitMtrx)
		imageWarpAll = geometric(opt,image,pInit) if opt.netType=="IC-STN" else geometric(opt,imagePert)
		imageWarp = imageWarpAll[-1]
		output = classifier(opt,imageWarp)
		_,pred = output.max(dim=1)
		count += int((pred==label).sum().cpu().numpy())
		if opt.netType=="STN" or opt.netType=="IC-STN":
			imgPert = imagePert.detach().cpu().numpy()
			imgWarp = imageWarp.detach().cpu().numpy()
			for i in range(len(realIdx)):
				l = data["label"][idx[i]].item()
				if l not in warped[0]: warped[0][l] = []
				if l not in warped[1]: warped[1][l] = []
				warped[0][l].append(imgPert[i])
				warped[1][l].append(imgWarp[i])
	accuracy = float(count)/N
	if opt.netType=="STN" or opt.netType=="IC-STN":
		mean = [np.array([np.mean(warped[0][l],axis=0) for l in warped[0]]),
				np.array([np.mean(warped[1][l],axis=0) for l in warped[1]])]
		var = [np.array([np.var(warped[0][l],axis=0) for l in warped[0]]),
			   np.array([np.var(warped[1][l],axis=0) for l in warped[1]])]
	else: mean,var = None,None
	geometric.train()
	classifier.train()
	return accuracy,mean,var


================================================
FILE: MNIST-pytorch/graph.py
================================================
import numpy as np
import torch
import time
import data,warp,util

# build classification network
class FullCNN(torch.nn.Module):
	def __init__(self,opt):
		super(FullCNN,self).__init__()
		self.inDim = 1
		def conv2Layer(outDim):
			conv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[3,3],stride=1,padding=0)
			self.inDim = outDim
			return conv
		def linearLayer(outDim):
			fc = torch.nn.Linear(self.inDim,outDim)
			self.inDim = outDim
			return fc
		def maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2)
		self.conv2Layers = torch.nn.Sequential(
			conv2Layer(3),torch.nn.ReLU(True),
			conv2Layer(6),torch.nn.ReLU(True),maxpoolLayer(),
			conv2Layer(9),torch.nn.ReLU(True),
			conv2Layer(12),torch.nn.ReLU(True)
		)
		self.inDim *= 8**2
		self.linearLayers = torch.nn.Sequential(
			linearLayer(48),torch.nn.ReLU(True),
			linearLayer(opt.labelN)
		)
		initialize(opt,self,opt.stdC)
	def forward(self,opt,image):
		feat = image
		feat = self.conv2Layers(feat).reshape(opt.batchSize,-1)
		feat = self.linearLayers(feat)
		output = feat
		return output

# build classification network
class CNN(torch.nn.Module):
	def __init__(self,opt):
		super(CNN,self).__init__()
		self.inDim = 1
		def conv2Layer(outDim):
			conv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[9,9],stride=1,padding=0)
			self.inDim = outDim
			return conv
		def linearLayer(outDim):
			fc = torch.nn.Linear(self.inDim,outDim)
			self.inDim = outDim
			return fc
		def maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2)
		self.conv2Layers = torch.nn.Sequential(
			conv2Layer(3),torch.nn.ReLU(True)
		)
		self.inDim *= 20**2
		self.linearLayers = torch.nn.Sequential(
			linearLayer(opt.labelN)
		)
		initialize(opt,self,opt.stdC)
	def forward(self,opt,image):
		feat = image
		feat = self.conv2Layers(feat).reshape(opt.batchSize,-1)
		feat = self.linearLayers(feat)
		output = feat
		return output

# an identity class to skip geometric predictors
class Identity(torch.nn.Module):
	def __init__(self): super(Identity,self).__init__()
	def forward(self,opt,feat): return [feat]

# build Spatial Transformer Network
class STN(torch.nn.Module):
	def __init__(self,opt):
		super(STN,self).__init__()
		self.inDim = 1
		def conv2Layer(outDim):
			conv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[7,7],stride=1,padding=0)
			self.inDim = outDim
			return conv
		def linearLayer(outDim):
			fc = torch.nn.Linear(self.inDim,outDim)
			self.inDim = outDim
			return fc
		def maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2)
		self.conv2Layers = torch.nn.Sequential(
			conv2Layer(4),torch.nn.ReLU(True),
			conv2Layer(8),torch.nn.ReLU(True),maxpoolLayer()
		)
		self.inDim *= 8**2
		self.linearLayers = torch.nn.Sequential(
			linearLayer(48),torch.nn.ReLU(True),
			linearLayer(opt.warpDim)
		)
		initialize(opt,self,opt.stdGP,last0=True)
	def forward(self,opt,image):
		imageWarpAll = [image]
		feat = image
		feat = self.conv2Layers(feat).reshape(opt.batchSize,-1)
		feat = self.linearLayers(feat)
		p = feat
		pMtrx = warp.vec2mtrx(opt,p)
		imageWarp = warp.transformImage(opt,image,pMtrx)
		imageWarpAll.append(imageWarp)
		return imageWarpAll

# build Inverse Compositional STN
class ICSTN(torch.nn.Module):
	def __init__(self,opt):
		super(ICSTN,self).__init__()
		self.inDim = 1
		def conv2Layer(outDim):
			conv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[7,7],stride=1,padding=0)
			self.inDim = outDim
			return conv
		def linearLayer(outDim):
			fc = torch.nn.Linear(self.inDim,outDim)
			self.inDim = outDim
			return fc
		def maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2)
		self.conv2Layers = torch.nn.Sequential(
			conv2Layer(4),torch.nn.ReLU(True),
			conv2Layer(8),torch.nn.ReLU(True),maxpoolLayer()
		)
		self.inDim *= 8**2
		self.linearLayers = torch.nn.Sequential(
			linearLayer(48),torch.nn.ReLU(True),
			linearLayer(opt.warpDim)
		)
		initialize(opt,self,opt.stdGP,last0=True)
	def forward(self,opt,image,p):
		imageWarpAll = []
		for l in range(opt.warpN):
			pMtrx = warp.vec2mtrx(opt,p)
			imageWarp = warp.transformImage(opt,image,pMtrx)
			imageWarpAll.append(imageWarp)
			feat = imageWarp
			feat = self.conv2Layers(feat).reshape(opt.batchSize,-1)
			feat = self.linearLayers(feat)
			dp = feat
			p = warp.compose(opt,p,dp)
		pMtrx = warp.vec2mtrx(opt,p)
		imageWarp = warp.transformImage(opt,image,pMtrx)
		imageWarpAll.append(imageWarp)
		return imageWarpAll

# initialize weights/biases
def initialize(opt,model,stddev,last0=False):
	for m in model.conv2Layers:
		if isinstance(m,torch.nn.Conv2d):
			m.weight.data.normal_(0,stddev)
			m.bias.data.normal_(0,stddev)
	for m in model.linearLayers:
		if isinstance(m,torch.nn.Linear):
			if last0 and m is model.linearLayers[-1]:
				m.weight.data.zero_()
				m.bias.data.zero_()
			else:
				m.weight.data.normal_(0,stddev)
				m.bias.data.normal_(0,stddev)


================================================
FILE: MNIST-pytorch/options.py
================================================
import numpy as np
import argparse
import warp
import util
import torch

def set(training):

	# parse input arguments
	parser = argparse.ArgumentParser()
	parser.add_argument("netType",		choices=["CNN","STN","IC-STN"],		help="type of network")
	parser.add_argument("--group",					default="0",			help="name for group")
	parser.add_argument("--model",					default="test",			help="name for model instance")
	parser.add_argument("--size",					default="28x28",		help="image resolution")
	parser.add_argument("--warpType",				default="homography",	help="type of warp function on images",
																			choices=["translation","similarity","affine","homography"])
	parser.add_argument("--warpN",		type=int,	default=4,				help="number of recurrent transformations (for IC-STN)")
	parser.add_argument("--stdC",		type=float,	default=0.1,			help="initialization stddev (classification network)")
	parser.add_argument("--stdGP",		type=float,	default=0.1,			help="initialization stddev (geometric predictor)")
	parser.add_argument("--pertScale",	type=float,	default=0.25,			help="initial perturbation scale")
	parser.add_argument("--transScale",	type=float,	default=0.25,			help="initial translation scale")
	if training: # training
		parser.add_argument("--port",		type=int,	default=8097,	help="port number for visdom visualization")
		parser.add_argument("--batchSize",	type=int,	default=100,	help="batch size for SGD")
		parser.add_argument("--lrC",		type=float,	default=1e-2,	help="learning rate (classification network)")
		parser.add_argument("--lrGP",		type=float,	default=None,	help="learning rate (geometric predictor)")
		parser.add_argument("--lrDecay",	type=float,	default=1.0,	help="learning rate decay")
		parser.add_argument("--lrStep",		type=int,	default=100000,	help="learning rate decay step size")
		parser.add_argument("--fromIt",		type=int,	default=0,		help="resume training from iteration number")
		parser.add_argument("--toIt",		type=int,	default=500000,	help="run training to iteration number")
	else: # evaluation
		parser.add_argument("--batchSize",	type=int,	default=1,		help="batch size for evaluation")
	opt = parser.parse_args()

	if opt.lrGP is None: opt.lrGP = 0 if opt.netType=="CNN" else \
									1e-2 if opt.netType=="STN" else \
									1e-4 if opt.netType=="IC-STN" else None

	# --- below are automatically set ---
	assert(torch.cuda.is_available()) # support only training on GPU for now
	torch.set_default_tensor_type("torch.cuda.FloatTensor")
	opt.training = training
	opt.H,opt.W = [int(x) for x in opt.size.split("x")]
	opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize)))
	opt.warpDim = 2 if opt.warpType == "translation" else \
				  4 if opt.warpType == "similarity" else \
				  6 if opt.warpType == "affine" else \
				  8 if opt.warpType == "homography" else None
	opt.labelN = 10
	opt.canon4pts = np.array([[-1,-1],[-1,1],[1,1],[1,-1]],dtype=np.float32)
	opt.image4pts = np.array([[0,0],[0,opt.H-1],[opt.W-1,opt.H-1],[opt.W-1,0]],dtype=np.float32)
	opt.refMtrx = np.eye(3).astype(np.float32)
	if opt.netType=="STN": opt.warpN = 1

	print("({0}) {1}".format(
		util.toGreen("{0}".format(opt.group)),
		util.toGreen("{0}".format(opt.model))))
	print("------------------------------------------")
	print("network type: {0}, recurrent warps: {1}".format(
		util.toYellow("{0}".format(opt.netType)),
		util.toYellow("{0}".format(opt.warpN if opt.netType=="IC-STN" else "X"))))
	print("batch size: {0}, image size: {1}x{2}".format(
		util.toYellow("{0}".format(opt.batchSize)),
		util.toYellow("{0}".format(opt.H)),
		util.toYellow("{0}".format(opt.W))))
	print("warpScale: (pert) {0} (trans) {1}".format(
		util.toYellow("{0}".format(opt.pertScale)),
		util.toYellow("{0}".format(opt.transScale))))
	if training:
		print("[geometric predictor]    stddev={0}, lr={1}".format(
			util.toYellow("{0:.0e}".format(opt.stdGP)),
			util.toYellow("{0:.0e}".format(opt.lrGP))))
		print("[classification network] stddev={0}, lr={1}".format(
			util.toYellow("{0:.0e}".format(opt.stdC)),
			util.toYellow("{0:.0e}".format(opt.lrC))))
	print("------------------------------------------")
	if training:
		print(util.toMagenta("training model ({0}) {1}...".format(opt.group,opt.model)))

	return opt


================================================
FILE: MNIST-pytorch/train.py
================================================
import numpy as np
import time,os,sys
import argparse
import util

print(util.toYellow("======================================================="))
print(util.toYellow("train.py (training on MNIST)"))
print(util.toYellow("======================================================="))

import torch
import data,graph,warp,util
import options

print(util.toMagenta("setting configurations..."))
opt = options.set(training=True)

# create directories for model output
util.mkdir("models_{0}".format(opt.group))

print(util.toMagenta("building network..."))
with torch.cuda.device(0):
	# ------ build network ------
	if opt.netType=="CNN":
		geometric = graph.Identity()
		classifier = graph.FullCNN(opt)
	elif opt.netType=="STN":
		geometric = graph.STN(opt)
		classifier = graph.CNN(opt)
	elif opt.netType=="IC-STN":
		geometric = graph.ICSTN(opt)
		classifier = graph.CNN(opt)
	# ------ define loss ------
	loss = torch.nn.CrossEntropyLoss()
	# ------ optimizer ------
	optimList = [{ "params": geometric.parameters(), "lr": opt.lrGP },
				 { "params": classifier.parameters(), "lr": opt.lrC }]
	optim = torch.optim.SGD(optimList)

# load data
print(util.toMagenta("loading MNIST dataset..."))
trainData,testData = data.loadMNIST(opt,"data")

# visdom visualizer
vis = util.Visdom(opt)

print(util.toYellow("======= TRAINING START ======="))
timeStart = time.time()
# start session
with torch.cuda.device(0):
	geometric.train()
	classifier.train()
	if opt.fromIt!=0:
		util.restoreModel(opt,geometric,classifier,opt.fromIt)
		print(util.toMagenta("resuming from iteration {0}...".format(opt.fromIt)))
	print(util.toMagenta("start training..."))

	# training loop
	for i in range(opt.fromIt,opt.toIt):
		lrGP = opt.lrGP*opt.lrDecay**(i//opt.lrStep)
		lrC = opt.lrC*opt.lrDecay**(i//opt.lrStep)
		# make training batch
		batch = data.makeBatch(opt,trainData)
		image = batch["image"].unsqueeze(dim=1)
		label = batch["label"]
		# generate perturbation
		pInit = data.genPerturbations(opt)
		pInitMtrx = warp.vec2mtrx(opt,pInit)
		# forward/backprop through network
		optim.zero_grad()
		imagePert = warp.transformImage(opt,image,pInitMtrx)
		imageWarpAll = geometric(opt,image,pInit) if opt.netType=="IC-STN" else geometric(opt,imagePert)
		imageWarp = imageWarpAll[-1]
		output = classifier(opt,imageWarp)
		train_loss = loss(output,label)
		train_loss.backward()
		# run one step
		optim.step()
		if (i+1)%100==0:
			print("it. {0}/{1}  lr={3}(GP),{4}(C), loss={5}, time={2}"
				.format(util.toCyan("{0}".format(i+1)),
						opt.toIt,
						util.toGreen("{0:.2f}".format(time.time()-timeStart)),
						util.toYellow("{0:.0e}".format(lrGP)),
						util.toYellow("{0:.0e}".format(lrC)),
						util.toRed("{0:.4f}".format(train_loss))))
		if (i+1)%200==0: vis.trainLoss(opt,i+1,train_loss)
		if (i+1)%1000==0:
			# evaluate on test set
			testAcc,testMean,testVar = data.evalTest(opt,testData,geometric,classifier)
			testError = (1-testAcc)*100
			vis.testLoss(opt,i+1,testError)
			if opt.netType=="STN" or opt.netType=="IC-STN":
				vis.meanVar(opt,testMean,testVar)
		if (i+1)%10000==0:
			util.saveModel(opt,geometric,classifier,i+1)
			print(util.toGreen("model saved: {0}/{1}, it.{2}".format(opt.group,opt.model,i+1)))

print(util.toYellow("======= TRAINING DONE ======="))


================================================
FILE: MNIST-pytorch/util.py
================================================
import numpy as np
import scipy.misc
import torch
import os
import termcolor
import visdom

def mkdir(path):
	if not os.path.exists(path): os.mkdir(path)
def imread(fname):
	return scipy.misc.imread(fname)/255.0
def imsave(fname,array):
	scipy.misc.toimage(array,cmin=0.0,cmax=1.0).save(fname)

# convert to colored strings
def toRed(content): return termcolor.colored(content,"red",attrs=["bold"])
def toGreen(content): return termcolor.colored(content,"green",attrs=["bold"])
def toBlue(content): return termcolor.colored(content,"blue",attrs=["bold"])
def toCyan(content): return termcolor.colored(content,"cyan",attrs=["bold"])
def toYellow(content): return termcolor.colored(content,"yellow",attrs=["bold"])
def toMagenta(content): return termcolor.colored(content,"magenta",attrs=["bold"])

# restore model
def restoreModel(opt,geometric,classifier,it):
	geometric.load_state_dict(torch.load("models_{0}/{1}_it{2}_GP.npy".format(opt.group,opt.model,it)))
	classifier.load_state_dict(torch.load("models_{0}/{1}_it{2}_C.npy".format(opt.group,opt.model,it)))
# save model
def saveModel(opt,geometric,classifier,it):
	torch.save(geometric.state_dict(),"models_{0}/{1}_it{2}_GP.npy".format(opt.group,opt.model,it))
	torch.save(classifier.state_dict(),"models_{0}/{1}_it{2}_C.npy".format(opt.group,opt.model,it))

class Visdom():
	def __init__(self,opt):
		self.vis = visdom.Visdom(port=opt.port,use_incoming_socket=False)
		self.trainLossInit = True
		self.testLossInit = True
		self.meanVarInit = True
	def tileImages(self,opt,images,H,W,HN,WN):
		assert(len(images)==HN*WN)
		images = images.reshape([HN,WN,-1,H,W])
		images = [list(i) for i in images]
		imageBlocks = np.concatenate([np.concatenate(row,axis=2) for row in images],axis=1)
		return imageBlocks
	def trainLoss(self,opt,it,loss):
		loss = float(loss.detach().cpu().numpy())
		if self.trainLossInit:
			self.vis.line(Y=np.array([loss]),X=np.array([it]),win="{0}_trainloss".format(opt.model),
						  opts={ "title": "{0} (TRAIN_loss)".format(opt.model) })
			self.trainLossInit = False
		else: self.vis.line(Y=np.array([loss]),X=np.array([it]),win=opt.model+"_trainloss",update="append")
	def testLoss(self,opt,it,loss):
		if self.testLossInit:
			self.vis.line(Y=np.array([loss]),X=np.array([it]),win="{0}_testloss".format(opt.model),
						  opts={ "title": "{0} (TEST_error)".format(opt.model) })
			self.testLossInit = False
		else: self.vis.line(Y=np.array([loss]),X=np.array([it]),win=opt.model+"_testloss",update="append")
	def meanVar(self,opt,mean,var):
		mean = [self.tileImages(opt,m,opt.H,opt.W,1,10) for m in mean]
		var = [self.tileImages(opt,v,opt.H,opt.W,1,10)*3 for v in var]
		self.vis.image(mean[0].clip(0,1),win="{0}_meaninit".format(opt.model), opts={ "title": "{0} (TEST_mean_init)".format(opt.model) })
		self.vis.image(mean[1].clip(0,1),win="{0}_meanwarped".format(opt.model), opts={ "title": "{0} (TEST_mean_warped)".format(opt.model) })
		self.vis.image(var[0].clip(0,1),win="{0}_varinit".format(opt.model), opts={ "title": "{0} (TEST_var_init)".format(opt.model) })
		self.vis.image(var[1].clip(0,1),win="{0}_varwarped".format(opt.model), opts={ "title": "{0} (TEST_var_warped)".format(opt.model) })


================================================
FILE: MNIST-pytorch/warp.py
================================================
import numpy as np
import scipy.linalg
import torch

import util

# fit (affine) warp between two sets of points 
def fit(Xsrc,Xdst):
	ptsN = len(Xsrc)
	X,Y,U,V,O,I = Xsrc[:,0],Xsrc[:,1],Xdst[:,0],Xdst[:,1],np.zeros([ptsN]),np.ones([ptsN])
	A = np.concatenate((np.stack([X,Y,I,O,O,O],axis=1),
						np.stack([O,O,O,X,Y,I],axis=1)),axis=0)
	b = np.concatenate((U,V),axis=0)
	p1,p2,p3,p4,p5,p6 = scipy.linalg.lstsq(A,b)[0].squeeze()
	pMtrx = np.array([[p1,p2,p3],[p4,p5,p6],[0,0,1]],dtype=torch.float32)
	return pMtrx

# compute composition of warp parameters
def compose(opt,p,dp):
	pMtrx = vec2mtrx(opt,p)
	dpMtrx = vec2mtrx(opt,dp)
	pMtrxNew = dpMtrx.matmul(pMtrx)
	pMtrxNew = pMtrxNew/pMtrxNew[:,2:3,2:3]
	pNew = mtrx2vec(opt,pMtrxNew)
	return pNew

# compute inverse of warp parameters
def inverse(opt,p):
	pMtrx = vec2mtrx(opt,p)
	pInvMtrx = pMtrx.inverse()
	pInv = mtrx2vec(opt,pInvMtrx)
	return pInv

# convert warp parameters to matrix
def vec2mtrx(opt,p):
	O = torch.zeros(opt.batchSize,dtype=torch.float32).cuda()
	I = torch.ones(opt.batchSize,dtype=torch.float32).cuda()
	if opt.warpType=="translation":
		tx,ty = torch.unbind(p,dim=1)
		pMtrx = torch.stack([torch.stack([I,O,tx],dim=-1),
							 torch.stack([O,I,ty],dim=-1),
							 torch.stack([O,O,I],dim=-1)],dim=1)
	if opt.warpType=="similarity":
		pc,ps,tx,ty = torch.unbind(p,dim=1)
		pMtrx = torch.stack([torch.stack([I+pc,-ps,tx],dim=-1),
							 torch.stack([ps,I+pc,ty],dim=-1),
							 torch.stack([O,O,I],dim=-1)],dim=1)
	if opt.warpType=="affine":
		p1,p2,p3,p4,p5,p6 = torch.unbind(p,dim=1)
		pMtrx = torch.stack([torch.stack([I+p1,p2,p3],dim=-1),
							 torch.stack([p4,I+p5,p6],dim=-1),
							 torch.stack([O,O,I],dim=-1)],dim=1)
	if opt.warpType=="homography":
		p1,p2,p3,p4,p5,p6,p7,p8 = torch.unbind(p,dim=1)
		pMtrx = torch.stack([torch.stack([I+p1,p2,p3],dim=-1),
							 torch.stack([p4,I+p5,p6],dim=-1),
							 torch.stack([p7,p8,I],dim=-1)],dim=1)
	return pMtrx

# convert warp matrix to parameters
def mtrx2vec(opt,pMtrx):
	[row0,row1,row2] = torch.unbind(pMtrx,dim=1)
	[e00,e01,e02] = torch.unbind(row0,dim=1)
	[e10,e11,e12] = torch.unbind(row1,dim=1)
	[e20,e21,e22] = torch.unbind(row2,dim=1)
	if opt.warpType=="translation": p = torch.stack([e02,e12],dim=1)
	if opt.warpType=="similarity": p = torch.stack([e00-1,e10,e02,e12],dim=1)
	if opt.warpType=="affine": p = torch.stack([e00-1,e01,e02,e10,e11-1,e12],dim=1)
	if opt.warpType=="homography": p = torch.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],dim=1)
	return p

# warp the image
def transformImage(opt,image,pMtrx):
	refMtrx = torch.from_numpy(opt.refMtrx).cuda()
	refMtrx = refMtrx.repeat(opt.batchSize,1,1)
	transMtrx = refMtrx.matmul(pMtrx)
	# warp the canonical coordinates
	X,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H))
	X,Y = X.flatten(),Y.flatten()
	XYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T
	XYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32)
	XYhom = torch.from_numpy(XYhom).cuda()
	XYwarpHom = transMtrx.matmul(XYhom)
	XwarpHom,YwarpHom,ZwarpHom = torch.unbind(XYwarpHom,dim=1)
	Xwarp = (XwarpHom/(ZwarpHom+1e-8)).reshape(opt.batchSize,opt.H,opt.W)
	Ywarp = (YwarpHom/(ZwarpHom+1e-8)).reshape(opt.batchSize,opt.H,opt.W)
	grid = torch.stack([Xwarp,Ywarp],dim=-1)
	# sampling with bilinear interpolation
	imageWarp = torch.nn.functional.grid_sample(image,grid,mode="bilinear")
	return imageWarp


================================================
FILE: MNIST-tensorflow/data.py
================================================
import numpy as np
import scipy.linalg
import os,time
import tensorflow as tf

import warp

# load MNIST data
def loadMNIST(fname):
	if not os.path.exists(fname):
		# download and preprocess MNIST dataset
		from tensorflow.examples.tutorials.mnist import input_data
		mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)
		trainData,validData,testData = {},{},{}
		trainData["image"] = mnist.train.images.reshape([-1,28,28]).astype(np.float32)
		validData["image"] = mnist.validation.images.reshape([-1,28,28]).astype(np.float32)
		testData["image"] = mnist.test.images.reshape([-1,28,28]).astype(np.float32)
		trainData["label"] = np.argmax(mnist.train.labels.astype(np.float32),axis=1)
		validData["label"] = np.argmax(mnist.validation.labels.astype(np.float32),axis=1)
		testData["label"] = np.argmax(mnist.test.labels.astype(np.float32),axis=1)
		os.makedirs(os.path.dirname(fname))
		np.savez(fname,train=trainData,valid=validData,test=testData)
		os.system("rm -rf MNIST_data")
	MNIST = np.load(fname)
	trainData = MNIST["train"].item()
	validData = MNIST["valid"].item()
	testData = MNIST["test"].item()
	return trainData,validData,testData

# generate training batch
def genPerturbations(opt):
	with tf.name_scope("genPerturbations"):
		X = np.tile(opt.canon4pts[:,0],[opt.batchSize,1])
		Y = np.tile(opt.canon4pts[:,1],[opt.batchSize,1])
		dX = tf.random_normal([opt.batchSize,4])*opt.pertScale \
			+tf.random_normal([opt.batchSize,1])*opt.transScale
		dY = tf.random_normal([opt.batchSize,4])*opt.pertScale \
			+tf.random_normal([opt.batchSize,1])*opt.transScale
		O = np.zeros([opt.batchSize,4],dtype=np.float32)
		I = np.ones([opt.batchSize,4],dtype=np.float32)
		# fit warp parameters to generated displacements
		if opt.warpType=="homography":
			A = tf.concat([tf.stack([X,Y,I,O,O,O,-X*(X+dX),-Y*(X+dX)],axis=-1),
						   tf.stack([O,O,O,X,Y,I,-X*(Y+dY),-Y*(Y+dY)],axis=-1)],1)
			b = tf.expand_dims(tf.concat([X+dX,Y+dY],1),-1)
			pPert = tf.matrix_solve(A,b)[:,:,0]
			pPert -= tf.to_float([[1,0,0,0,1,0,0,0]])
		else:
			if opt.warpType=="translation":
				J = np.concatenate([np.stack([I,O],axis=-1),
									np.stack([O,I],axis=-1)],axis=1)
			if opt.warpType=="similarity":
				J = np.concatenate([np.stack([X,Y,I,O],axis=-1),
									np.stack([-Y,X,O,I],axis=-1)],axis=1)
			if opt.warpType=="affine":
				J = np.concatenate([np.stack([X,Y,I,O,O,O],axis=-1),
									np.stack([O,O,O,X,Y,I],axis=-1)],axis=1)
			dXY = tf.expand_dims(tf.concat([dX,dY],1),-1)
			pPert = tf.matrix_solve_ls(J,dXY)[:,:,0]
	return pPert

# make training batch
def makeBatch(opt,data,PH):
	N = len(data["image"])
	randIdx = np.random.randint(N,size=[opt.batchSize])
	# put data in placeholders
	[image,label] = PH
	batch = {
		image: data["image"][randIdx],
		label: data["label"][randIdx],
	}
	return batch

# evaluation on test set
def evalTest(opt,sess,data,PH,prediction,imagesEval=[]):
	N = len(data["image"])
	# put data in placeholders
	[image,label] = PH
	batchN = int(np.ceil(N/opt.batchSize))
	warped = [{},{}]
	count = 0
	for b in range(batchN):
		# use some dummy data (0) as batch filler if necessary
		if b!=batchN-1:
			realIdx = np.arange(opt.batchSize*b,opt.batchSize*(b+1))
		else:
			realIdx = np.arange(opt.batchSize*b,N)
		idx = np.zeros([opt.batchSize],dtype=int)
		idx[:len(realIdx)] = realIdx
		batch = {
			image: data["image"][idx],
			label: data["label"][idx],
		}
		evalList = sess.run([prediction]+imagesEval,feed_dict=batch)
		pred = evalList[0]
		count += pred[:len(realIdx)].sum()
		if opt.netType=="STN" or opt.netType=="IC-STN":
			imgs = evalList[1:]
			for i in range(len(realIdx)):
				l = data["label"][idx[i]]
				if l not in warped[0]: warped[0][l] = []
				if l not in warped[1]: warped[1][l] = []
				warped[0][l].append(imgs[0][i])
				warped[1][l].append(imgs[1][i])
	accuracy = float(count)/N
	if opt.netType=="STN" or opt.netType=="IC-STN":
		mean = [np.array([np.mean(warped[0][l],axis=0) for l in warped[0]]),
				np.array([np.mean(warped[1][l],axis=0) for l in warped[1]])]
		var = [np.array([np.var(warped[0][l],axis=0) for l in warped[0]]),
			   np.array([np.var(warped[1][l],axis=0) for l in warped[1]])]
	else: mean,var = None,None
	return accuracy,mean,var


================================================
FILE: MNIST-tensorflow/graph.py
================================================
import numpy as np
import tensorflow as tf
import time
import data,warp,util

# build classification network
def fullCNN(opt,image):
	def conv2Layer(opt,feat,outDim):
		weight,bias = createVariable(opt,[3,3,int(feat.shape[-1]),outDim],stddev=opt.stdC)
		conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias
		return conv
	def linearLayer(opt,feat,outDim):
		weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC)
		fc = tf.matmul(feat,weight)+bias
		return fc
	with tf.variable_scope("classifier"):
		feat = image
		with tf.variable_scope("conv1"):
			feat = conv2Layer(opt,feat,3)
			feat = tf.nn.relu(feat)
		with tf.variable_scope("conv2"):
			feat = conv2Layer(opt,feat,6)
			feat = tf.nn.relu(feat)
			feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID")
		with tf.variable_scope("conv3"):
			feat = conv2Layer(opt,feat,9)
			feat = tf.nn.relu(feat)
		with tf.variable_scope("conv4"):
			feat = conv2Layer(opt,feat,12)
			feat = tf.nn.relu(feat)
		feat = tf.reshape(feat,[opt.batchSize,-1])
		with tf.variable_scope("fc5"):
			feat = linearLayer(opt,feat,48)
			feat = tf.nn.relu(feat)
		with tf.variable_scope("fc6"):
			feat = linearLayer(opt,feat,opt.labelN)
		output = feat
	return output

# build classification network
def CNN(opt,image):
	def conv2Layer(opt,feat,outDim):
		weight,bias = createVariable(opt,[9,9,int(feat.shape[-1]),outDim],stddev=opt.stdC)
		conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias
		return conv
	def linearLayer(opt,feat,outDim):
		weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC)
		fc = tf.matmul(feat,weight)+bias
		return fc
	with tf.variable_scope("classifier"):
		feat = image
		with tf.variable_scope("conv1"):
			feat = conv2Layer(opt,feat,3)
			feat = tf.nn.relu(feat)
		feat = tf.reshape(feat,[opt.batchSize,-1])
		with tf.variable_scope("fc2"):
			feat = linearLayer(opt,feat,opt.labelN)
		output = feat
	return output

# build Spatial Transformer Network
def STN(opt,image):
	def conv2Layer(opt,feat,outDim):
		weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP)
		conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias
		return conv
	def linearLayer(opt,feat,outDim,final=False):
		weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=0.0 if final else opt.stdGP)
		fc = tf.matmul(feat,weight)+bias
		return fc
	imageWarpAll = [image]
	with tf.variable_scope("geometric"):
		feat = image
		with tf.variable_scope("conv1"):
			feat = conv2Layer(opt,feat,4)
			feat = tf.nn.relu(feat)
		with tf.variable_scope("conv2"):
			feat = conv2Layer(opt,feat,8)
			feat = tf.nn.relu(feat)
			feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID")
		feat = tf.reshape(feat,[opt.batchSize,-1])
		with tf.variable_scope("fc3"):
			feat = linearLayer(opt,feat,48)
			feat = tf.nn.relu(feat)
		with tf.variable_scope("fc4"):
			feat = linearLayer(opt,feat,opt.warpDim,final=True)
		p = feat
	pMtrx = warp.vec2mtrx(opt,p)
	imageWarp = warp.transformImage(opt,image,pMtrx)
	imageWarpAll.append(imageWarp)
	return imageWarpAll

# build Inverse Compositional STN
def ICSTN(opt,image,p):
	def conv2Layer(opt,feat,outDim):
		weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP)
		conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias
		return conv
	def linearLayer(opt,feat,outDim,final=False):
		weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=0.0 if final else opt.stdGP)
		fc = tf.matmul(feat,weight)+bias
		return fc
	imageWarpAll = []
	for l in range(opt.warpN):
		with tf.variable_scope("geometric",reuse=l>0):
			pMtrx = warp.vec2mtrx(opt,p)
			imageWarp = warp.transformImage(opt,image,pMtrx)
			imageWarpAll.append(imageWarp)
			feat = imageWarp
			with tf.variable_scope("conv1"):
				feat = conv2Layer(opt,feat,4)
				feat = tf.nn.relu(feat)
			with tf.variable_scope("conv2"):
				feat = conv2Layer(opt,feat,8)
				feat = tf.nn.relu(feat)
				feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID")
			feat = tf.reshape(feat,[opt.batchSize,-1])
			with tf.variable_scope("fc3"):
				feat = linearLayer(opt,feat,48)
				feat = tf.nn.relu(feat)
			with tf.variable_scope("fc4"):
				feat = linearLayer(opt,feat,opt.warpDim,final=True)
			dp = feat
		p = warp.compose(opt,p,dp)
	pMtrx = warp.vec2mtrx(opt,p)
	imageWarp = warp.transformImage(opt,image,pMtrx)
	imageWarpAll.append(imageWarp)
	return imageWarpAll

# auxiliary function for creating weight and bias
def createVariable(opt,weightShape,biasShape=None,stddev=None):
	if biasShape is None: biasShape = [weightShape[-1]]
	weight = tf.get_variable("weight",shape=weightShape,dtype=tf.float32,
									  initializer=tf.random_normal_initializer(stddev=stddev))
	bias = tf.get_variable("bias",shape=biasShape,dtype=tf.float32,
								  initializer=tf.random_normal_initializer(stddev=stddev))
	return weight,bias


================================================
FILE: MNIST-tensorflow/options.py
================================================
import numpy as np
import argparse
import warp
import util

def set(training):

	# parse input arguments
	parser = argparse.ArgumentParser()
	parser.add_argument("netType",		choices=["CNN","STN","IC-STN"],		help="type of network")
	parser.add_argument("--group",					default="0",			help="name for group")
	parser.add_argument("--model",					default="test",			help="name for model instance")
	parser.add_argument("--size",					default="28x28",		help="image resolution")
	parser.add_argument("--warpType",				default="homography",	help="type of warp function on images",
																			choices=["translation","similarity","affine","homography"])
	parser.add_argument("--warpN",		type=int,	default=4,				help="number of recurrent transformations (for IC-STN)")
	parser.add_argument("--stdC",		type=float,	default=0.1,			help="initialization stddev (classification network)")
	parser.add_argument("--stdGP",		type=float,	default=0.1,			help="initialization stddev (geometric predictor)")
	parser.add_argument("--pertScale",	type=float,	default=0.25,			help="initial perturbation scale")
	parser.add_argument("--transScale",	type=float,	default=0.25,			help="initial translation scale")
	if training: # training
		parser.add_argument("--batchSize",	type=int,	default=100,	help="batch size for SGD")
		parser.add_argument("--lrC",		type=float,	default=1e-2,	help="learning rate (classification network)")
		parser.add_argument("--lrCdecay",	type=float,	default=1.0,	help="learning rate decay (classification network)")
		parser.add_argument("--lrCstep",	type=int,	default=100000,	help="learning rate decay step size (classification network)")
		parser.add_argument("--lrGP",		type=float,	default=None,	help="learning rate (geometric predictor)")
		parser.add_argument("--lrGPdecay",	type=float,	default=1.0,	help="learning rate decay (geometric predictor)")
		parser.add_argument("--lrGPstep",	type=int,	default=100000,	help="learning rate decay step size (geometric predictor)")
		parser.add_argument("--fromIt",		type=int,	default=0,		help="resume training from iteration number")
		parser.add_argument("--toIt",		type=int,	default=500000,	help="run training to iteration number")
	else: # evaluation
		parser.add_argument("--batchSize",	type=int,	default=1,		help="batch size for evaluation")
	opt = parser.parse_args()

	if opt.lrGP is None: opt.lrGP = 0 if opt.netType=="CNN" else \
									1e-2 if opt.netType=="STN" else \
									1e-4 if opt.netType=="IC-STN" else None

	# --- below are automatically set ---
	opt.training = training
	opt.H,opt.W = [int(x) for x in opt.size.split("x")]
	opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize)))
	opt.warpDim = 2 if opt.warpType == "translation" else \
				  4 if opt.warpType == "similarity" else \
				  6 if opt.warpType == "affine" else \
				  8 if opt.warpType == "homography" else None
	opt.labelN = 10
	opt.canon4pts = np.array([[-1,-1],[-1,1],[1,1],[1,-1]],dtype=np.float32)
	opt.image4pts = np.array([[0,0],[0,opt.H-1],[opt.W-1,opt.H-1],[opt.W-1,0]],dtype=np.float32)
	opt.refMtrx = warp.fit(Xsrc=opt.canon4pts,Xdst=opt.image4pts)
	if opt.netType=="STN": opt.warpN = 1

	print("({0}) {1}".format(
		util.toGreen("{0}".format(opt.group)),
		util.toGreen("{0}".format(opt.model))))
	print("------------------------------------------")
	print("network type: {0}, recurrent warps: {1}".format(
		util.toYellow("{0}".format(opt.netType)),
		util.toYellow("{0}".format(opt.warpN if opt.netType=="IC-STN" else "X"))))
	print("batch size: {0}, image size: {1}x{2}".format(
		util.toYellow("{0}".format(opt.batchSize)),
		util.toYellow("{0}".format(opt.H)),
		util.toYellow("{0}".format(opt.W))))
	print("warpScale: (pert) {0} (trans) {1}".format(
		util.toYellow("{0}".format(opt.pertScale)),
		util.toYellow("{0}".format(opt.transScale))))
	if training:
		print("[geometric predictor]    stddev={0}, lr={1}".format(
			util.toYellow("{0:.0e}".format(opt.stdGP)),
			util.toYellow("{0:.0e}".format(opt.lrGP))))
		print("[classification network] stddev={0}, lr={1}".format(
			util.toYellow("{0:.0e}".format(opt.stdC)),
			util.toYellow("{0:.0e}".format(opt.lrC))))
	print("------------------------------------------")
	if training:
		print(util.toMagenta("training model ({0}) {1}...".format(opt.group,opt.model)))

	return opt


================================================
FILE: MNIST-tensorflow/train.py
================================================
import numpy as np
import time,os,sys
import argparse
import util

print(util.toYellow("======================================================="))
print(util.toYellow("train.py (training on MNIST)"))
print(util.toYellow("======================================================="))

import tensorflow as tf
import data,graph,warp,util
import options

print(util.toMagenta("setting configurations..."))
opt = options.set(training=True)

# create directories for model output
util.mkdir("models_{0}".format(opt.group))

print(util.toMagenta("building graph..."))
tf.reset_default_graph()
# build graph
with tf.device("/gpu:0"):
	# ------ define input data ------
	image = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.H,opt.W])
	label = tf.placeholder(tf.int64,shape=[opt.batchSize])
	PH = [image,label]
	# ------ generate perturbation ------
	pInit = data.genPerturbations(opt)
	pInitMtrx = warp.vec2mtrx(opt,pInit)
	# ------ build network ------
	image = tf.expand_dims(image,axis=-1)
	imagePert = warp.transformImage(opt,image,pInitMtrx)
	if opt.netType=="CNN":
		output = graph.fullCNN(opt,imagePert)
	elif opt.netType=="STN":
		imageWarpAll = graph.STN(opt,imagePert)
		imageWarp = imageWarpAll[-1]
		output = graph.CNN(opt,imageWarp)
	elif opt.netType=="IC-STN":
		imageWarpAll = graph.ICSTN(opt,image,pInit)
		imageWarp = imageWarpAll[-1]
		output = graph.CNN(opt,imageWarp)
	softmax = tf.nn.softmax(output)
	labelOnehot = tf.one_hot(label,opt.labelN)
	prediction = tf.equal(tf.argmax(softmax,1),label)
	# ------ define loss ------
	softmaxLoss = tf.nn.softmax_cross_entropy_with_logits(logits=output,labels=labelOnehot)
	loss = tf.reduce_mean(softmaxLoss)
	# ------ optimizer ------
	lrGP_PH,lrC_PH = tf.placeholder(tf.float32,shape=[]),tf.placeholder(tf.float32,shape=[])
	optim = util.setOptimizer(opt,loss,lrGP_PH,lrC_PH)
	# ------ generate summaries ------
	summaryImageTrain = []
	summaryImageTest = []
	if opt.netType=="STN" or opt.netType=="IC-STN":
		for l in range(opt.warpN+1):
			summaryImageTrain.append(util.imageSummary(opt,imageWarpAll[l],"TRAIN_warp{0}".format(l),opt.H,opt.W))
			summaryImageTest.append(util.imageSummary(opt,imageWarpAll[l],"TEST_warp{0}".format(l),opt.H,opt.W))
		summaryImageTrain = tf.summary.merge(summaryImageTrain)
		summaryImageTest = tf.summary.merge(summaryImageTest)
	summaryLossTrain = tf.summary.scalar("TRAIN_loss",loss)
	testErrorPH = tf.placeholder(tf.float32,shape=[])
	testImagePH = tf.placeholder(tf.float32,shape=[opt.labelN,opt.H,opt.W,1])
	summaryErrorTest = tf.summary.scalar("TEST_error",testErrorPH)
	if opt.netType=="STN" or opt.netType=="IC-STN":
		summaryMeanTest0 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_mean_init",opt.H,opt.W)
		summaryMeanTest1 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_mean_warped",opt.H,opt.W)
		summaryVarTest0 = util.imageSummaryMeanVar(opt,testImagePH*3,"TEST_var_init",opt.H,opt.W)
		summaryVarTest1 = util.imageSummaryMeanVar(opt,testImagePH*3,"TEST_var_warped",opt.H,opt.W)

# load data
print(util.toMagenta("loading MNIST dataset..."))
trainData,validData,testData = data.loadMNIST("data/MNIST.npz")

# prepare model saver/summary writer
saver = tf.train.Saver(max_to_keep=20)
summaryWriter = tf.summary.FileWriter("summary_{0}/{1}".format(opt.group,opt.model))

print(util.toYellow("======= TRAINING START ======="))
timeStart = time.time()
# start session
tfConfig = tf.ConfigProto(allow_soft_placement=True)
tfConfig.gpu_options.allow_growth = True
with tf.Session(config=tfConfig) as sess:
	sess.run(tf.global_variables_initializer())
	summaryWriter.add_graph(sess.graph)
	if opt.fromIt!=0:
		util.restoreModel(opt,sess,saver,opt.fromIt)
		print(util.toMagenta("resuming from iteration {0}...".format(opt.fromIt)))
	print(util.toMagenta("start training..."))

	# training loop
	for i in range(opt.fromIt,opt.toIt):
		lrGP = opt.lrGP*opt.lrGPdecay**(i//opt.lrGPstep)
		lrC = opt.lrC*opt.lrCdecay**(i//opt.lrCstep)
		# make training batch
		batch = data.makeBatch(opt,trainData,PH)
		batch[lrGP_PH] = lrGP
		batch[lrC_PH] = lrC
		# run one step
		_,l = sess.run([optim,loss],feed_dict=batch)
		if (i+1)%100==0:
			print("it. {0}/{1}  lr={3}(GP),{4}(C), loss={5}, time={2}"
				.format(util.toCyan("{0}".format(i+1)),
						opt.toIt,
						util.toGreen("{0:.2f}".format(time.time()-timeStart)),
						util.toYellow("{0:.0e}".format(lrGP)),
						util.toYellow("{0:.0e}".format(lrC)),
						util.toRed("{0:.4f}".format(l))))
		if (i+1)%100==0:
			summaryWriter.add_summary(sess.run(summaryLossTrain,feed_dict=batch),i+1)
		if (i+1)%500==0 and (opt.netType=="STN" or opt.netType=="IC-STN"):
			summaryWriter.add_summary(sess.run(summaryImageTrain,feed_dict=batch),i+1)
			summaryWriter.add_summary(sess.run(summaryImageTest,feed_dict=batch),i+1)
		if (i+1)%1000==0:
			# evaluate on test set
			if opt.netType=="STN" or opt.netType=="IC-STN":
				testAcc,testMean,testVar = data.evalTest(opt,sess,testData,PH,prediction,imagesEval=[imagePert,imageWarp])
			else:
				testAcc,_,_ = data.evalTest(opt,sess,testData,PH,prediction)
			testError = (1-testAcc)*100
			summaryWriter.add_summary(sess.run(summaryErrorTest,feed_dict={testErrorPH:testError}),i+1)
			if opt.netType=="STN" or opt.netType=="IC-STN":
				summaryWriter.add_summary(sess.run(summaryMeanTest0,feed_dict={testImagePH:testMean[0]}),i+1)
				summaryWriter.add_summary(sess.run(summaryMeanTest1,feed_dict={testImagePH:testMean[1]}),i+1)
				summaryWriter.add_summary(sess.run(summaryVarTest0,feed_dict={testImagePH:testVar[0]}),i+1)
				summaryWriter.add_summary(sess.run(summaryVarTest1,feed_dict={testImagePH:testVar[1]}),i+1)
		if (i+1)%10000==0:
			util.saveModel(opt,sess,saver,i+1)
			print(util.toGreen("model saved: {0}/{1}, it.{2}".format(opt.group,opt.model,i+1)))

print(util.toYellow("======= TRAINING DONE ======="))


================================================
FILE: MNIST-tensorflow/util.py
================================================
import numpy as np
import scipy.misc
import tensorflow as tf
import os
import termcolor

def mkdir(path):
	if not os.path.exists(path): os.mkdir(path)
def imread(fname):
	return scipy.misc.imread(fname)/255.0
def imsave(fname,array):
	scipy.misc.toimage(array,cmin=0.0,cmax=1.0).save(fname)

# convert to colored strings
def toRed(content): return termcolor.colored(content,"red",attrs=["bold"])
def toGreen(content): return termcolor.colored(content,"green",attrs=["bold"])
def toBlue(content): return termcolor.colored(content,"blue",attrs=["bold"])
def toCyan(content): return termcolor.colored(content,"cyan",attrs=["bold"])
def toYellow(content): return termcolor.colored(content,"yellow",attrs=["bold"])
def toMagenta(content): return termcolor.colored(content,"magenta",attrs=["bold"])

# make image summary from image batch
def imageSummary(opt,image,tag,H,W):
	blockSize = opt.visBlockSize
	imageOne = tf.batch_to_space(image[:blockSize**2],crops=[[0,0],[0,0]],block_size=blockSize)
	imagePermute = tf.reshape(imageOne,[H,blockSize,W,blockSize,-1])
	imageTransp = tf.transpose(imagePermute,[1,0,3,2,4])
	imageBlocks = tf.reshape(imageTransp,[1,H*blockSize,W*blockSize,-1])
	imageBlocks = tf.cast(imageBlocks*255,tf.uint8)
	summary = tf.summary.image(tag,imageBlocks)
	return summary

# make image summary from image batch (mean/variance)
def imageSummaryMeanVar(opt,image,tag,H,W):
	imageOne = tf.batch_to_space_nd(image,crops=[[0,0],[0,0]],block_shape=[1,10])
	imagePermute = tf.reshape(imageOne,[H,1,W,10,-1])
	imageTransp = tf.transpose(imagePermute,[1,0,3,2,4])
	imageBlocks = tf.reshape(imageTransp,[1,H*1,W*10,-1])
	imageBlocks = tf.cast(imageBlocks*255,tf.uint8)
	summary = tf.summary.image(tag,imageBlocks)
	return summary

# set optimizer for different learning rates
def setOptimizer(opt,loss,lrGP,lrC):
	varsGP = [v for v in tf.global_variables() if "geometric" in v.name]
	varsC = [v for v in tf.global_variables() if "classifier" in v.name]
	gradC = tf.gradients(loss,varsC)
	optimC = tf.train.GradientDescentOptimizer(lrC).apply_gradients(zip(gradC,varsC))
	if len(varsGP)>0:
		gradGP = tf.gradients(loss,varsGP)
		optimGP = tf.train.GradientDescentOptimizer(lrGP).apply_gradients(zip(gradGP,varsGP))
		optim = tf.group(optimC,optimGP)
	else:
		optim = optimC
	return optim

# restore model
def restoreModel(opt,sess,saver,it):
	saver.restore(sess,"models_{0}/{1}_it{2}.ckpt".format(opt.group,opt.model,it,opt.warpN))
# save model
def saveModel(opt,sess,saver,it):
	saver.save(sess,"models_{0}/{1}_it{2}.ckpt".format(opt.group,opt.model,it,opt.warpN))


================================================
FILE: MNIST-tensorflow/warp.py
================================================
import numpy as np
import scipy.linalg
import tensorflow as tf

# fit (affine) warp between two sets of points 
def fit(Xsrc,Xdst):
	ptsN = len(Xsrc)
	X,Y,U,V,O,I = Xsrc[:,0],Xsrc[:,1],Xdst[:,0],Xdst[:,1],np.zeros([ptsN]),np.ones([ptsN])
	A = np.concatenate((np.stack([X,Y,I,O,O,O],axis=1),
						np.stack([O,O,O,X,Y,I],axis=1)),axis=0)
	b = np.concatenate((U,V),axis=0)
	p1,p2,p3,p4,p5,p6 = scipy.linalg.lstsq(A,b)[0].squeeze()
	pMtrx = np.array([[p1,p2,p3],[p4,p5,p6],[0,0,1]],dtype=np.float32)
	return pMtrx

# compute composition of warp parameters
def compose(opt,p,dp):
	with tf.name_scope("compose"):
		pMtrx = vec2mtrx(opt,p)
		dpMtrx = vec2mtrx(opt,dp)
		pMtrxNew = tf.matmul(dpMtrx,pMtrx)
		pMtrxNew /= pMtrxNew[:,2:3,2:3]
		pNew = mtrx2vec(opt,pMtrxNew)
	return pNew

# compute inverse of warp parameters
def inverse(opt,p):
	with tf.name_scope("inverse"):
		pMtrx = vec2mtrx(opt,p)
		pInvMtrx = tf.matrix_inverse(pMtrx)
		pInv = mtrx2vec(opt,pInvMtrx)
	return pInv

# convert warp parameters to matrix
def vec2mtrx(opt,p):
	with tf.name_scope("vec2mtrx"):
		O = tf.zeros([opt.batchSize])
		I = tf.ones([opt.batchSize])
		if opt.warpType=="translation":
			tx,ty = tf.unstack(p,axis=1)
			pMtrx = tf.transpose(tf.stack([[I,O,tx],[O,I,ty],[O,O,I]]),perm=[2,0,1])
		if opt.warpType=="similarity":
			pc,ps,tx,ty = tf.unstack(p,axis=1)
			pMtrx = tf.transpose(tf.stack([[I+pc,-ps,tx],[ps,I+pc,ty],[O,O,I]]),perm=[2,0,1])
		if opt.warpType=="affine":
			p1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1)
			pMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[O,O,I]]),perm=[2,0,1])
		if opt.warpType=="homography":
			p1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1)
			pMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[p7,p8,I]]),perm=[2,0,1])
	return pMtrx

# convert warp matrix to parameters
def mtrx2vec(opt,pMtrx):
	with tf.name_scope("mtrx2vec"):
		[row0,row1,row2] = tf.unstack(pMtrx,axis=1)
		[e00,e01,e02] = tf.unstack(row0,axis=1)
		[e10,e11,e12] = tf.unstack(row1,axis=1)
		[e20,e21,e22] = tf.unstack(row2,axis=1)
		if opt.warpType=="translation": p = tf.stack([e02,e12],axis=1)
		if opt.warpType=="similarity": p = tf.stack([e00-1,e10,e02,e12],axis=1)
		if opt.warpType=="affine": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12],axis=1)
		if opt.warpType=="homography": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],axis=1)
	return p

# warp the image
def transformImage(opt,image,pMtrx):
	with tf.name_scope("transformImage"):
		refMtrx = tf.tile(tf.expand_dims(opt.refMtrx,axis=0),[opt.batchSize,1,1])
		transMtrx = tf.matmul(refMtrx,pMtrx)
		# warp the canonical coordinates
		X,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H))
		X,Y = X.flatten(),Y.flatten()
		XYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T
		XYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32)
		XYwarpHom = tf.matmul(transMtrx,XYhom)
		XwarpHom,YwarpHom,ZwarpHom = tf.unstack(XYwarpHom,axis=1)
		Xwarp = tf.reshape(XwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])
		Ywarp = tf.reshape(YwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])
		# get the integer sampling coordinates
		Xfloor,Xceil = tf.floor(Xwarp),tf.ceil(Xwarp)
		Yfloor,Yceil = tf.floor(Ywarp),tf.ceil(Ywarp)
		XfloorInt,XceilInt = tf.to_int32(Xfloor),tf.to_int32(Xceil)
		YfloorInt,YceilInt = tf.to_int32(Yfloor),tf.to_int32(Yceil)
		imageIdx = np.tile(np.arange(opt.batchSize).reshape([opt.batchSize,1,1]),[1,opt.H,opt.W])
		imageVec = tf.reshape(image,[-1,int(image.shape[-1])])
		imageVecOut = tf.concat([imageVec,tf.zeros([1,int(image.shape[-1])])],axis=0)
		idxUL = (imageIdx*opt.H+YfloorInt)*opt.W+XfloorInt
		idxUR = (imageIdx*opt.H+YfloorInt)*opt.W+XceilInt
		idxBL = (imageIdx*opt.H+YceilInt)*opt.W+XfloorInt
		idxBR = (imageIdx*opt.H+YceilInt)*opt.W+XceilInt
		idxOutside = tf.fill([opt.batchSize,opt.H,opt.W],opt.batchSize*opt.H*opt.W)
		def insideImage(Xint,Yint):
			return (Xint>=0)&(Xint<opt.W)&(Yint>=0)&(Yint<opt.H)
		idxUL = tf.where(insideImage(XfloorInt,YfloorInt),idxUL,idxOutside)
		idxUR = tf.where(insideImage(XceilInt,YfloorInt),idxUR,idxOutside)
		idxBL = tf.where(insideImage(XfloorInt,YceilInt),idxBL,idxOutside)
		idxBR = tf.where(insideImage(XceilInt,YceilInt),idxBR,idxOutside)
		# bilinear interpolation
		Xratio = tf.reshape(Xwarp-Xfloor,[opt.batchSize,opt.H,opt.W,1])
		Yratio = tf.reshape(Ywarp-Yfloor,[opt.batchSize,opt.H,opt.W,1])
		imageUL = tf.to_float(tf.gather(imageVecOut,idxUL))*(1-Xratio)*(1-Yratio)
		imageUR = tf.to_float(tf.gather(imageVecOut,idxUR))*(Xratio)*(1-Yratio)
		imageBL = tf.to_float(tf.gather(imageVecOut,idxBL))*(1-Xratio)*(Yratio)
		imageBR = tf.to_float(tf.gather(imageVecOut,idxBR))*(Xratio)*(Yratio)
		imageWarp = imageUL+imageUR+imageBL+imageBR
	return imageWarp


================================================
FILE: README.md
================================================
## Inverse Compositional Spatial Transformer Networks
[Chen-Hsuan Lin](https://chenhsuanlin.bitbucket.io/)
and [Simon Lucey](http://www.simonlucey.com/)  
IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017 (**oral presentation**)  

Project page: https://chenhsuanlin.bitbucket.io/inverse-compositional-STN  
Paper: https://chenhsuanlin.bitbucket.io/inverse-compositional-STN/paper.pdf  
Poster: https://chenhsuanlin.bitbucket.io/inverse-compositional-STN/poster.pdf   
arXiv preprint: https://arxiv.org/abs/1612.03897

<p align="center"><img src="https://www.andrew.cmu.edu/user/chenhsul/images/ICSTN2.png" width=600 height=250></p>

We provide TensorFlow code for the following experiments:
- MNIST classification
- traffic sign classification

**[NEW!]** The PyTorch implementation of the MNIST experiment is now up!  

--------------------------------------

## TensorFlow

### Prerequisites  
This code is developed with Python3 (`python3`) but it is also compatible with Python2.7 (`python`). TensorFlow r1.0+ is required. The dependencies can install by running
```
pip3 install --upgrade numpy scipy termcolor matplotlib tensorflow-gpu
```
If you're using Python2.7, use `pip2` instead; if you don't have sudo access, add the `--user` flag.  

### Running the code  
The training code can be executed via the command
```
python3 train.py <netType> [(options)]
```
`<netType>` should be one of the following:  
1. `CNN` - standard convolutional neural network  
2. `STN` - Spatial Transformer Network (STN)  
3. `IC-STN` - Inverse Compositional Spatial Transformer Network (IC-STN)  

The list of optional arguments can be found by executing `python3 train.py --help`.  
The default training settings in this released code is slightly different from that in the paper; it is stabler and optimizes the networks better.  

When the code is run for the first time, the datasets will be automatically downloaded and preprocessed.  
The checkpoints are saved in the automatically created directory `model_GROUP`; summaries are saved in `summary_GROUP`.

### Visualizing the results  
We've included code to visualize the training over TensorBoard. To execute, run
```
tensorboard --logdir=summary_GROUP --port=6006
```

We provide three types of data visualization:  
1. **SCALARS**: training/test error over iterations  
2. **IMAGES**: alignment results and mean/variance appearances  
3. **GRAPH**: network architecture

--------------------------------------

## PyTorch

The PyTorch version of the code is stil under active development. The training speed is currently slower than the TensorFlow version. Suggestions on improvements are welcome! :)

### Prerequisites  
This code is developed with Python3 (`python3`). It has not been tested with Python2.7 yet. PyTorch 0.2.0+ is required. Please see http://pytorch.org/ for installation instructions.  
Visdom is also required; it can be installed by running
```
pip3 install --upgrade visdom
```
If you don't have sudo access, add the `--user` flag.  

### Running the code  
First, start a Visdom server by running
```
python3 -m visdom.server -port=7000
```
The training code can be executed via the command (using the same port number)
```
python3 train.py <netType> --port=7000 [(options)]
```
`<netType>` should be one of the following:  
1. `CNN` - standard convolutional neural network  
2. `STN` - Spatial Transformer Network (STN)  
3. `IC-STN` - Inverse Compositional Spatial Transformer Network (IC-STN)  

The list of optional arguments can be found by executing `python3 train.py --help`.  
The default training settings in this released code is slightly different from that in the paper; it is stabler and optimizes the networks better.  

When the code is run for the first time, the datasets will be automatically downloaded and preprocessed.  
The checkpoints are saved in the automatically created directory `model_GROUP`; summaries are saved in `summary_GROUP`.

### Visualizing the results  
We provide three types of data visualization on Visdom:  
1. Training/test error over iterations  
2. Alignment results and mean/variance appearances  

--------------------------------------

If you find our code useful for your research, please cite
```
@inproceedings{lin2017inverse,
  title={Inverse Compositional Spatial Transformer Networks},
  author={Lin, Chen-Hsuan and Lucey, Simon},
  booktitle={IEEE Conference on Computer Vision and Pattern Recognition ({CVPR})},
  year={2017}
}
```

Please contact me (chlin@cmu.edu) if you have any questions!


================================================
FILE: traffic-sign-tensorflow/data.py
================================================
import numpy as np
import scipy.linalg,scipy.misc
import os,time
import tensorflow as tf
import matplotlib.pyplot as plt
import csv

import warp

# load GTSRB data
def loadGTSRB(opt,fname):
	if not os.path.exists(fname):
		# download and preprocess GTSRB dataset
		os.makedirs(os.path.dirname(fname))
		os.system("wget -O data/GTSRB_Final_Training_Images.zip http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Training_Images.zip")
		os.system("wget -O data/GTSRB_Final_Test_Images.zip http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Test_Images.zip")
		os.system("wget -O data/GTSRB_Final_Test_GT.zip http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Test_GT.zip")
		os.system("cd data && unzip GTSRB_Final_Training_Images.zip")
		os.system("cd data && unzip GTSRB_Final_Test_Images.zip")
		os.system("cd data && unzip GTSRB_Final_Test_GT.zip")
		# training data
		print("preparing training data...")
		images,bboxes,labels = [],[],[]
		for c in range(43):
			prefix = "data/GTSRB/Final_Training/Images/{0:05d}".format(c)
			with open("{0}/GT-{1:05d}.csv".format(prefix,c)) as file:
				reader = csv.reader(file,delimiter=";")
				next(reader)
				for line in reader:
					img = plt.imread(prefix+"/"+line[0])
					rawH,rawW = img.shape[0],img.shape[1]
					scaleH,scaleW = float(opt.fullH)/rawH,float(opt.fullW)/rawW
					imgResize = scipy.misc.imresize(img,(opt.fullH,opt.fullW,3))
					images.append(imgResize)
					bboxes.append([float(line[3])*scaleW,float(line[4])*scaleH,
								   float(line[5])*scaleW,float(line[6])*scaleH])
					labels.append(int(line[7]))
		trainData = {
			"image": np.array(images),
			"bbox": np.array(bboxes),
			"label": np.array(labels)
		}
		# test data
		print("preparing test data...")
		images,bboxes,labels = [],[],[]
		prefix = "data/GTSRB/Final_Test/Images/"
		with open("data/GT-final_test.csv") as file:
			reader = csv.reader(file,delimiter=";")
			next(reader)
			for line in reader:
				img = plt.imread(prefix+"/"+line[0])
				rawH,rawW = img.shape[0],img.shape[1]
				scaleH,scaleW = float(opt.fullH)/rawH,float(opt.fullW)/rawW
				imgResize = scipy.misc.imresize(img,(opt.fullH,opt.fullW,3))
				images.append(imgResize)
				bboxes.append([float(line[3])*scaleW,float(line[4])*scaleH,
							   float(line[5])*scaleW,float(line[6])*scaleH])
				labels.append(int(line[7]))
		testData = {
			"image": np.array(images),
			"bbox": np.array(bboxes),
			"label": np.array(labels)
		}
		np.savez(fname,train=trainData,test=testData)
		os.system("rm -rf data/*.zip")
	GTSRB = np.load(fname)
	trainData = GTSRB["train"].item()
	testData = GTSRB["test"].item()
	return trainData,testData

# generate training batch
def genPerturbations(opt):
	with tf.name_scope("genPerturbations"):
		X = np.tile(opt.canon4pts[:,0],[opt.batchSize,1])
		Y = np.tile(opt.canon4pts[:,1],[opt.batchSize,1])
		dX = tf.random_normal([opt.batchSize,4])*opt.pertScale \
			+tf.random_normal([opt.batchSize,1])*opt.transScale
		dY = tf.random_normal([opt.batchSize,4])*opt.pertScale \
			+tf.random_normal([opt.batchSize,1])*opt.transScale
		O = np.zeros([opt.batchSize,4],dtype=np.float32)
		I = np.ones([opt.batchSize,4],dtype=np.float32)
		# fit warp parameters to generated displacements
		if opt.warpType=="homography":
			A = tf.concat([tf.stack([X,Y,I,O,O,O,-X*(X+dX),-Y*(X+dX)],axis=-1),
						   tf.stack([O,O,O,X,Y,I,-X*(Y+dY),-Y*(Y+dY)],axis=-1)],1)
			b = tf.expand_dims(tf.concat([X+dX,Y+dY],1),-1)
			pPert = tf.matrix_solve(A,b)[:,:,0]
			pPert -= tf.to_float([[1,0,0,0,1,0,0,0]])
		else:
			if opt.warpType=="translation":
				J = np.concatenate([np.stack([I,O],axis=-1),
									np.stack([O,I],axis=-1)],axis=1)
			if opt.warpType=="similarity":
				J = np.concatenate([np.stack([X,Y,I,O],axis=-1),
									np.stack([-Y,X,O,I],axis=-1)],axis=1)
			if opt.warpType=="affine":
				J = np.concatenate([np.stack([X,Y,I,O,O,O],axis=-1),
									np.stack([O,O,O,X,Y,I],axis=-1)],axis=1)
			dXY = tf.expand_dims(tf.concat([dX,dY],1),-1)
			pPert = tf.matrix_solve_ls(J,dXY)[:,:,0]
	return pPert

# make training batch
def makeBatch(opt,data,PH):
	N = len(data["image"])
	randIdx = np.random.randint(N,size=[opt.batchSize])
	# put data in placeholders
	[image,label] = PH
	batch = {
		image: data["image"][randIdx]/255.0,
		label: data["label"][randIdx],
	}
	return batch

# evaluation on test set
def evalTest(opt,sess,data,PH,prediction,imagesEval=[]):
	N = len(data["image"])
	# put data in placeholders
	[image,label] = PH
	batchN = int(np.ceil(N/opt.batchSize))
	warped = [{},{}]
	count = 0
	for b in range(batchN):
		# use some dummy data (0) as batch filler if necessary
		if b!=batchN-1:
			realIdx = np.arange(opt.batchSize*b,opt.batchSize*(b+1))
		else:
			realIdx = np.arange(opt.batchSize*b,N)
		idx = np.zeros([opt.batchSize],dtype=int)
		idx[:len(realIdx)] = realIdx
		batch = {
			image: data["image"][idx]/255.0,
			label: data["label"][idx],
		}
		evalList = sess.run([prediction]+imagesEval,feed_dict=batch)
		pred = evalList[0]
		count += pred[:len(realIdx)].sum()
		if len(imagesEval)>0:
			imgs = evalList[1:]
			for i in range(len(realIdx)):
				if data["label"][idx[i]] not in warped[0]: warped[0][data["label"][idx[i]]] = []
				if data["label"][idx[i]] not in warped[1]: warped[1][data["label"][idx[i]]] = []
				warped[0][data["label"][idx[i]]].append(imgs[0][i])
				warped[1][data["label"][idx[i]]].append(imgs[1][i])
	accuracy = float(count)/N
	if len(imagesEval)>0:
		mean = [np.array([np.mean(warped[0][l],axis=0) for l in warped[0]]),
				np.array([np.mean(warped[1][l],axis=0) for l in warped[1]])]
		var = [np.array([np.var(warped[0][l],axis=0) for l in warped[0]]),
			   np.array([np.var(warped[1][l],axis=0) for l in warped[1]])]
	else: mean,var = None,None
	return accuracy,mean,var


================================================
FILE: traffic-sign-tensorflow/graph.py
================================================
import numpy as np
import tensorflow as tf
import time
import data,warp,util

# build classification network
def fullCNN(opt,image):
	def conv2Layer(opt,feat,outDim):
		weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdC)
		conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias
		return conv
	def linearLayer(opt,feat,outDim):
		weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC)
		fc = tf.matmul(feat,weight)+bias
		return fc
	with tf.variable_scope("classifier"):
		feat = image
		with tf.variable_scope("conv1"):
			feat = conv2Layer(opt,feat,6)
			feat = tf.nn.relu(feat)
		with tf.variable_scope("conv2"):
			feat = conv2Layer(opt,feat,12)
			feat = tf.nn.relu(feat)
			feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID")
		with tf.variable_scope("conv3"):
			feat = conv2Layer(opt,feat,24)
			feat = tf.nn.relu(feat)
		feat = tf.reshape(feat,[opt.batchSize,-1])
		with tf.variable_scope("fc4"):
			feat = linearLayer(opt,feat,200)
			feat = tf.nn.relu(feat)
		with tf.variable_scope("fc5"):
			feat = linearLayer(opt,feat,opt.labelN)
		output = feat
	return output

# build classification network
def CNN(opt,image):
	def conv2Layer(opt,feat,outDim):
		weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdC)
		conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias
		return conv
	def linearLayer(opt,feat,outDim):
		weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC)
		fc = tf.matmul(feat,weight)+bias
		return fc
	with tf.variable_scope("classifier"):
		feat = image
		with tf.variable_scope("conv1"):
			feat = conv2Layer(opt,feat,6)
			feat = tf.nn.relu(feat)
		with tf.variable_scope("conv2"):
			feat = conv2Layer(opt,feat,12)
			feat = tf.nn.relu(feat)
			feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID")
		feat = tf.reshape(feat,[opt.batchSize,-1])
		with tf.variable_scope("fc3"):
			feat = linearLayer(opt,feat,opt.labelN)
		output = feat
	return output

# build Spatial Transformer Network
def STN(opt,image):
	def conv2Layer(opt,feat,outDim):
		weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP)
		conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias
		return conv
	def linearLayer(opt,feat,outDim):
		weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdGP)
		fc = tf.matmul(feat,weight)+bias
		return fc
	imageWarpAll = [image]
	with tf.variable_scope("geometric"):
		feat = image
		with tf.variable_scope("conv1"):
			feat = conv2Layer(opt,feat,6)
			feat = tf.nn.relu(feat)
		with tf.variable_scope("conv2"):
			feat = conv2Layer(opt,feat,24)
			feat = tf.nn.relu(feat)
		feat = tf.reshape(feat,[opt.batchSize,-1])
		with tf.variable_scope("fc3"):
			feat = linearLayer(opt,feat,opt.warpDim)
		p = feat
	pMtrx = warp.vec2mtrx(opt,p)
	imageWarp = warp.transformImage(opt,image,pMtrx)
	imageWarpAll.append(imageWarp)
	return imageWarpAll

# build Inverse Compositional STN
def ICSTN(opt,imageFull,p):
	def conv2Layer(opt,feat,outDim):
		weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP)
		conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias
		return conv
	def linearLayer(opt,feat,outDim):
		weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdGP)
		fc = tf.matmul(feat,weight)+bias
		return fc
	imageWarpAll = []
	for l in range(opt.warpN):
		with tf.variable_scope("geometric",reuse=l>0):
			pMtrx = warp.vec2mtrx(opt,p)
			imageWarp = warp.transformCropImage(opt,imageFull,pMtrx)
			imageWarpAll.append(imageWarp)
			feat = imageWarp
			with tf.variable_scope("conv1"):
				feat = conv2Layer(opt,feat,6)
				feat = tf.nn.relu(feat)
			with tf.variable_scope("conv2"):
				feat = conv2Layer(opt,feat,24)
				feat = tf.nn.relu(feat)
			feat = tf.reshape(feat,[opt.batchSize,-1])
			with tf.variable_scope("fc3"):
				feat = linearLayer(opt,feat,opt.warpDim)
			dp = feat
		p = warp.compose(opt,p,dp)
	pMtrx = warp.vec2mtrx(opt,p)
	imageWarp = warp.transformCropImage(opt,imageFull,pMtrx)
	imageWarpAll.append(imageWarp)
	return imageWarpAll

# auxiliary function for creating weight and bias
def createVariable(opt,weightShape,biasShape=None,stddev=None):
	if biasShape is None: biasShape = [weightShape[-1]]
	weight = tf.get_variable("weight",shape=weightShape,dtype=tf.float32,
									  initializer=tf.random_normal_initializer(stddev=stddev))
	bias = tf.get_variable("bias",shape=biasShape,dtype=tf.float32,
								  initializer=tf.random_normal_initializer(stddev=stddev))
	return weight,bias


================================================
FILE: traffic-sign-tensorflow/options.py
================================================
import numpy as np
import argparse
import warp
import util

def set(training):

	# parse input arguments
	parser = argparse.ArgumentParser()
	parser.add_argument("netType",		choices=["CNN","STN","IC-STN"],		help="type of network")
	parser.add_argument("--group",					default="0",			help="name for group")
	parser.add_argument("--model",					default="test",			help="name for model instance")
	parser.add_argument("--size",					default="36x36",		help="image resolution")
	parser.add_argument("--sizeFull",				default="50x50",		help="full image resolution")
	parser.add_argument("--warpType",				default="homography",	help="type of warp function on images",
																			choices=["translation","similarity","affine","homography"])
	parser.add_argument("--warpN",		type=int,	default=4,				help="number of recurrent transformations (for IC-STN)")
	parser.add_argument("--stdC",		type=float,	default=0.01,			help="initialization stddev (classification network)")
	parser.add_argument("--stdGP",		type=float,	default=0.001,			help="initialization stddev (geometric predictor)")
	parser.add_argument("--pertScale",	type=float,	default=0.25,			help="initial perturbation scale")
	parser.add_argument("--transScale",	type=float,	default=0.25,			help="initial translation scale")
	if training: # training
		parser.add_argument("--batchSize",	type=int,	default=100,	help="batch size for SGD")
		parser.add_argument("--lrC",		type=float,	default=1e-2,	help="learning rate (classification network)")
		parser.add_argument("--lrCdecay",	type=float,	default=0.1,	help="learning rate decay (classification network)")
		parser.add_argument("--lrCstep",	type=int,	default=500000,	help="learning rate decay step size (classification network)")
		parser.add_argument("--lrGP",		type=float,	default=None,	help="learning rate (geometric predictor)")
		parser.add_argument("--lrGPdecay",	type=float,	default=0.1,	help="learning rate decay (geometric predictor)")
		parser.add_argument("--lrGPstep",	type=int,	default=500000,	help="learning rate decay step size (geometric predictor)")
		parser.add_argument("--fromIt",		type=int,	default=0,		help="resume training from iteration number")
		parser.add_argument("--toIt",		type=int,	default=1000000,help="run training to iteration number")
	else: # evaluation
		parser.add_argument("--batchSize",	type=int,	default=1,		help="batch size for evaluation")
	opt = parser.parse_args()

	if opt.lrGP is None: opt.lrGP = 0 if opt.netType=="CNN" else \
									1e-3 if opt.netType=="STN" else \
									3e-5 if opt.netType=="IC-STN" else None

	# --- below are automatically set ---
	opt.training = training
	opt.H,opt.W = [int(x) for x in opt.size.split("x")]
	opt.fullH,opt.fullW = [int(x) for x in opt.sizeFull.split("x")]
	opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize)))
	opt.warpDim = 2 if opt.warpType == "translation" else \
				  4 if opt.warpType == "similarity" else \
				  6 if opt.warpType == "affine" else \
				  8 if opt.warpType == "homography" else None
	opt.labelN = 43
	opt.canon4pts = np.array([[-1,-1],[-1,1],[1,1],[1,-1]],dtype=np.float32)
	opt.image4pts = np.array([[0,0],[0,opt.H-1],[opt.W-1,opt.H-1],[opt.W-1,0]],dtype=np.float32)
	opt.bbox = [int(opt.fullW/2-opt.W/2),int(opt.fullH/2-opt.H/2),int(opt.fullW/2+opt.W/2),int(opt.fullH/2+opt.H/2)]
	opt.bbox4pts = np.array([[opt.bbox[0],opt.bbox[1]],[opt.bbox[0],opt.bbox[3]],
							 [opt.bbox[2],opt.bbox[3]],[opt.bbox[2],opt.bbox[1]]],dtype=np.float32)
	opt.refMtrx = warp.fit(Xsrc=opt.canon4pts,Xdst=opt.image4pts)
	opt.bboxRefMtrx = warp.fit(Xsrc=opt.canon4pts,Xdst=opt.bbox4pts)
	if opt.netType=="STN": opt.warpN = 1

	print("({0}) {1}".format(
		util.toGreen("{0}".format(opt.group)),
		util.toGreen("{0}".format(opt.model))))
	print("------------------------------------------")
	print("network type: {0}, recurrent warps: {1}".format(
		util.toYellow("{0}".format(opt.netType)),
		util.toYellow("{0}".format(opt.warpN if opt.netType=="IC-STN" else "X"))))
	print("batch size: {0}, image size: {1}x{2}".format(
		util.toYellow("{0}".format(opt.batchSize)),
		util.toYellow("{0}".format(opt.H)),
		util.toYellow("{0}".format(opt.W))))
	print("warpScale: (pert) {0} (trans) {1}".format(
		util.toYellow("{0}".format(opt.pertScale)),
		util.toYellow("{0}".format(opt.transScale))))
	if training:
		print("[geometric predictor]    stddev={0}, lr={1}".format(
			util.toYellow("{0:.0e}".format(opt.stdGP)),
			util.toYellow("{0:.0e}".format(opt.lrGP))))
		print("[classification network] stddev={0}, lr={1}".format(
			util.toYellow("{0:.0e}".format(opt.stdC)),
			util.toYellow("{0:.0e}".format(opt.lrC))))
	print("------------------------------------------")
	if training:
		print(util.toMagenta("training model ({0}) {1}...".format(opt.group,opt.model)))

	return opt


================================================
FILE: traffic-sign-tensorflow/train.py
================================================
import numpy as np
import time,os,sys
import argparse
import util

print(util.toYellow("======================================================="))
print(util.toYellow("train.py (training on MNIST)"))
print(util.toYellow("======================================================="))

import tensorflow as tf
import data,graph,warp,util
import options

print(util.toMagenta("setting configurations..."))
opt = options.set(training=True)

# create directories for model output
util.mkdir("models_{0}".format(opt.group))

print(util.toMagenta("building graph..."))
tf.reset_default_graph()
# build graph
with tf.device("/gpu:0"):
	# ------ define input data ------
	imageFull = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.fullH,opt.fullW,3])
	imageMean,imageVar = tf.nn.moments(imageFull,axes=[1,2],keep_dims=True)
	imageFullNormalize = (imageFull-imageMean)/tf.sqrt(imageVar)
	label = tf.placeholder(tf.int64,shape=[opt.batchSize])
	PH = [imageFull,label]
	# ------ generate perturbation ------
	pInit = data.genPerturbations(opt)
	pInitMtrx = warp.vec2mtrx(opt,pInit)
	# ------ build network ------
	imagePert = warp.transformCropImage(opt,imageFullNormalize,pInitMtrx)
	imagePertRescale = imagePert*tf.sqrt(imageVar)+imageMean
	if opt.netType=="CNN":
		output = graph.fullCNN(opt,imagePert)
	elif opt.netType=="STN":
		imageWarpAll = graph.STN(opt,imagePert)
		imageWarp = imageWarpAll[-1]
		output = graph.CNN(opt,imageWarp)
		imageWarpRescale = imageWarp*tf.sqrt(imageVar)+imageMean
	elif opt.netType=="IC-STN":
		imageWarpAll = graph.ICSTN(opt,imageFullNormalize,pInit)
		imageWarp = imageWarpAll[-1]
		output = graph.CNN(opt,imageWarp)
		imageWarpRescale = imageWarp*tf.sqrt(imageVar)+imageMean
	softmax = tf.nn.softmax(output)
	labelOnehot = tf.one_hot(label,opt.labelN)
	prediction = tf.equal(tf.argmax(softmax,1),label)
	# ------ define loss ------
	softmaxLoss = tf.nn.softmax_cross_entropy_with_logits(logits=output,labels=labelOnehot)
	loss = tf.reduce_mean(softmaxLoss)
	# ------ optimizer ------
	lrGP_PH,lrC_PH = tf.placeholder(tf.float32,shape=[]),tf.placeholder(tf.float32,shape=[])
	optim = util.setOptimizer(opt,loss,lrGP_PH,lrC_PH)
	# ------ generate summaries ------
	summaryImageTrain = []
	summaryImageTest = []
	if opt.netType=="STN" or opt.netType=="IC-STN":
		for l in range(opt.warpN+1):
			summaryImageTrain.append(util.imageSummary(opt,imageWarpAll[l]*tf.sqrt(imageVar)+imageMean,"TRAIN_warp{0}".format(l),opt.H,opt.W))
			summaryImageTest.append(util.imageSummary(opt,imageWarpAll[l]*tf.sqrt(imageVar)+imageMean,"TEST_warp{0}".format(l),opt.H,opt.W))
		summaryImageTrain = tf.summary.merge(summaryImageTrain)
		summaryImageTest = tf.summary.merge(summaryImageTest)
	summaryLossTrain = tf.summary.scalar("TRAIN_loss",loss)
	testErrorPH = tf.placeholder(tf.float32,shape=[])
	testImagePH = tf.placeholder(tf.float32,shape=[opt.labelN,opt.H,opt.W,3])
	summaryErrorTest = tf.summary.scalar("TEST_error",testErrorPH)
	if opt.netType=="STN" or opt.netType=="IC-STN":
		summaryMeanTest0 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_mean_init",opt.H,opt.W)
		summaryMeanTest1 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_mean_warped",opt.H,opt.W)
		summaryVarTest0 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_var_init",opt.H,opt.W)
		summaryVarTest1 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_var_warped",opt.H,opt.W)

# load data
print(util.toMagenta("loading GTSRB dataset..."))
trainData,testData = data.loadGTSRB(opt,"data/GTSRB.npz")

# prepare model saver/summary writer
saver = tf.train.Saver(max_to_keep=20)
summaryWriter = tf.summary.FileWriter("summary_{0}/{1}".format(opt.group,opt.model))

print(util.toYellow("======= TRAINING START ======="))
timeStart = time.time()
# start session
tfConfig = tf.ConfigProto(allow_soft_placement=True)
tfConfig.gpu_options.allow_growth = True
with tf.Session(config=tfConfig) as sess:
	sess.run(tf.global_variables_initializer())
	summaryWriter.add_graph(sess.graph)
	if opt.fromIt!=0:
		util.restoreModel(opt,sess,saver,opt.fromIt)
		print(util.toMagenta("resuming from iteration {0}...".format(opt.fromIt)))
	print(util.toMagenta("start training..."))

	# training loop
	for i in range(opt.fromIt,opt.toIt):
		lrGP = opt.lrGP*opt.lrGPdecay**(i//opt.lrGPstep)
		lrC = opt.lrC*opt.lrCdecay**(i//opt.lrCstep)
		# make training batch
		batch = data.makeBatch(opt,trainData,PH)
		batch[lrGP_PH] = lrGP
		batch[lrC_PH] = lrC
		# run one step
		_,l = sess.run([optim,loss],feed_dict=batch)
		if (i+1)%100==0:
			print("it. {0}/{1}  lr={3}(GP),{4}(C), loss={5}, time={2}"
				.format(util.toCyan("{0}".format(i+1)),
						opt.toIt,
						util.toGreen("{0:.2f}".format(time.time()-timeStart)),
						util.toYellow("{0:.0e}".format(lrGP)),
						util.toYellow("{0:.0e}".format(lrC)),
						util.toRed("{0:.4f}".format(l))))
		if (i+1)%100==0:
			summaryWriter.add_summary(sess.run(summaryLossTrain,feed_dict=batch),i+1)
		if (i+1)%500==0 and (opt.netType=="STN" or opt.netType=="IC-STN"):
			summaryWriter.add_summary(sess.run(summaryImageTrain,feed_dict=batch),i+1)
			summaryWriter.add_summary(sess.run(summaryImageTest,feed_dict=batch),i+1)
		if (i+1)%1000==0:
			# evaluate on test set
			if opt.netType=="STN" or opt.netType=="IC-STN":
				testAcc,testMean,testVar = data.evalTest(opt,sess,testData,PH,prediction,imagesEval=[imagePert,imageWarp])
			else:
				testAcc,_,_ = data.evalTest(opt,sess,testData,PH,prediction)
			testError = (1-testAcc)*100
			summaryWriter.add_summary(sess.run(summaryErrorTest,feed_dict={testErrorPH:testError}),i+1)
			if opt.netType=="STN" or opt.netType=="IC-STN":
				summaryWriter.add_summary(sess.run(summaryMeanTest0,feed_dict={testImagePH:testMean[0]}),i+1)
				summaryWriter.add_summary(sess.run(summaryMeanTest1,feed_dict={testImagePH:testMean[1]}),i+1)
				summaryWriter.add_summary(sess.run(summaryVarTest0,feed_dict={testImagePH:testVar[0]}),i+1)
				summaryWriter.add_summary(sess.run(summaryVarTest1,feed_dict={testImagePH:testVar[1]}),i+1)
		if (i+1)%10000==0:
			util.saveModel(opt,sess,saver,i+1)
			print(util.toGreen("model saved: {0}/{1}, it.{2}".format(opt.group,opt.model,i+1)))

print(util.toYellow("======= TRAINING DONE ======="))


================================================
FILE: traffic-sign-tensorflow/util.py
================================================
import numpy as np
import scipy.misc
import tensorflow as tf
import os
import termcolor

def mkdir(path):
	if not os.path.exists(path): os.mkdir(path)
def imread(fname):
	return scipy.misc.imread(fname)/255.0
def imsave(fname,array):
	scipy.misc.toimage(array,cmin=0.0,cmax=1.0).save(fname)

# convert to colored strings
def toRed(content): return termcolor.colored(content,"red",attrs=["bold"])
def toGreen(content): return termcolor.colored(content,"green",attrs=["bold"])
def toBlue(content): return termcolor.colored(content,"blue",attrs=["bold"])
def toCyan(content): return termcolor.colored(content,"cyan",attrs=["bold"])
def toYellow(content): return termcolor.colored(content,"yellow",attrs=["bold"])
def toMagenta(content): return termcolor.colored(content,"magenta",attrs=["bold"])

# make image summary from image batch
def imageSummary(opt,image,tag,H,W):
	blockSize = opt.visBlockSize
	imageOne = tf.batch_to_space(image[:blockSize**2],crops=[[0,0],[0,0]],block_size=blockSize)
	imagePermute = tf.reshape(imageOne,[H,blockSize,W,blockSize,-1])
	imageTransp = tf.transpose(imagePermute,[1,0,3,2,4])
	imageBlocks = tf.reshape(imageTransp,[1,H*blockSize,W*blockSize,-1])
	imageBlocks = tf.cast(imageBlocks*255,tf.uint8)
	summary = tf.summary.image(tag,imageBlocks)
	return summary

# make image summary from image batch (mean/variance)
def imageSummaryMeanVar(opt,image,tag,H,W):
	image = tf.concat([image,np.zeros([2,H,W,3])],axis=0)
	imageOne = tf.batch_to_space_nd(image,crops=[[0,0],[0,0]],block_shape=[5,9])
	imagePermute = tf.reshape(imageOne,[H,5,W,9,-1])
	imageTransp = tf.transpose(imagePermute,[1,0,3,2,4])
	imageBlocks = tf.reshape(imageTransp,[1,H*5,W*9,-1])
	# imageBlocks = tf.cast(imageBlocks*255,tf.uint8)
	summary = tf.summary.image(tag,imageBlocks)
	return summary

# set optimizer for different learning rates
def setOptimizer(opt,loss,lrGP,lrC):
	varsGP = [v for v in tf.global_variables() if "geometric" in v.name]
	varsC = [v for v in tf.global_variables() if "classifier" in v.name]
	gradC = tf.gradients(loss,varsC)
	optimC = tf.train.GradientDescentOptimizer(lrC).apply_gradients(zip(gradC,varsC))
	if len(varsGP)>0:
		gradGP = tf.gradients(loss,varsGP)
		optimGP = tf.train.GradientDescentOptimizer(lrGP).apply_gradients(zip(gradGP,varsGP))
		optim = tf.group(optimC,optimGP)
	else:
		optim = optimC
	return optim

# restore model
def restoreModel(opt,sess,saver,it):
	saver.restore(sess,"models_{0}/{1}_it{2}.ckpt".format(opt.group,opt.model,it,opt.warpN))
# save model
def saveModel(opt,sess,saver,it):
	saver.save(sess,"models_{0}/{1}_it{2}.ckpt".format(opt.group,opt.model,it,opt.warpN))


================================================
FILE: traffic-sign-tensorflow/warp.py
================================================
import numpy as np
import scipy.linalg
import tensorflow as tf

# fit (affine) warp between two sets of points 
def fit(Xsrc,Xdst):
	ptsN = len(Xsrc)
	X,Y,U,V,O,I = Xsrc[:,0],Xsrc[:,1],Xdst[:,0],Xdst[:,1],np.zeros([ptsN]),np.ones([ptsN])
	A = np.concatenate((np.stack([X,Y,I,O,O,O],axis=1),
						np.stack([O,O,O,X,Y,I],axis=1)),axis=0)
	b = np.concatenate((U,V),axis=0)
	p1,p2,p3,p4,p5,p6 = scipy.linalg.lstsq(A,b)[0].squeeze()
	pMtrx = np.array([[p1,p2,p3],[p4,p5,p6],[0,0,1]],dtype=np.float32)
	return pMtrx

# compute composition of warp parameters
def compose(opt,p,dp):
	with tf.name_scope("compose"):
		pMtrx = vec2mtrx(opt,p)
		dpMtrx = vec2mtrx(opt,dp)
		pMtrxNew = tf.matmul(dpMtrx,pMtrx)
		pMtrxNew /= pMtrxNew[:,2:3,2:3]
		pNew = mtrx2vec(opt,pMtrxNew)
	return pNew

# compute inverse of warp parameters
def inverse(opt,p):
	with tf.name_scope("inverse"):
		pMtrx = vec2mtrx(opt,p)
		pInvMtrx = tf.matrix_inverse(pMtrx)
		pInv = mtrx2vec(opt,pInvMtrx)
	return pInv

# convert warp parameters to matrix
def vec2mtrx(opt,p):
	with tf.name_scope("vec2mtrx"):
		O = tf.zeros([opt.batchSize])
		I = tf.ones([opt.batchSize])
		if opt.warpType=="translation":
			tx,ty = tf.unstack(p,axis=1)
			pMtrx = tf.transpose(tf.stack([[I,O,tx],[O,I,ty],[O,O,I]]),perm=[2,0,1])
		if opt.warpType=="similarity":
			pc,ps,tx,ty = tf.unstack(p,axis=1)
			pMtrx = tf.transpose(tf.stack([[I+pc,-ps,tx],[ps,I+pc,ty],[O,O,I]]),perm=[2,0,1])
		if opt.warpType=="affine":
			p1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1)
			pMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[O,O,I]]),perm=[2,0,1])
		if opt.warpType=="homography":
			p1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1)
			pMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[p7,p8,I]]),perm=[2,0,1])
	return pMtrx

# convert warp matrix to parameters
def mtrx2vec(opt,pMtrx):
	with tf.name_scope("mtrx2vec"):
		[row0,row1,row2] = tf.unstack(pMtrx,axis=1)
		[e00,e01,e02] = tf.unstack(row0,axis=1)
		[e10,e11,e12] = tf.unstack(row1,axis=1)
		[e20,e21,e22] = tf.unstack(row2,axis=1)
		if opt.warpType=="translation": p = tf.stack([e02,e12],axis=1)
		if opt.warpType=="similarity": p = tf.stack([e00-1,e10,e02,e12],axis=1)
		if opt.warpType=="affine": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12],axis=1)
		if opt.warpType=="homography": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],axis=1)
	return p

# warp the image
def transformImage(opt,image,pMtrx):
	with tf.name_scope("transformImage"):
		refMtrx = tf.tile(tf.expand_dims(opt.refMtrx,axis=0),[opt.batchSize,1,1])
		transMtrx = tf.matmul(refMtrx,pMtrx)
		# warp the canonical coordinates
		X,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H))
		X,Y = X.flatten(),Y.flatten()
		XYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T
		XYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32)
		XYwarpHom = tf.matmul(transMtrx,XYhom)
		XwarpHom,YwarpHom,ZwarpHom = tf.unstack(XYwarpHom,axis=1)
		Xwarp = tf.reshape(XwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])
		Ywarp = tf.reshape(YwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])
		# get the integer sampling coordinates
		Xfloor,Xceil = tf.floor(Xwarp),tf.ceil(Xwarp)
		Yfloor,Yceil = tf.floor(Ywarp),tf.ceil(Ywarp)
		XfloorInt,XceilInt = tf.to_int32(Xfloor),tf.to_int32(Xceil)
		YfloorInt,YceilInt = tf.to_int32(Yfloor),tf.to_int32(Yceil)
		imageIdx = np.tile(np.arange(opt.batchSize).reshape([opt.batchSize,1,1]),[1,opt.H,opt.W])
		imageVec = tf.reshape(image,[-1,int(image.shape[-1])])
		imageVecOut = tf.concat([imageVec,tf.zeros([1,int(image.shape[-1])])],axis=0)
		idxUL = (imageIdx*opt.H+YfloorInt)*opt.W+XfloorInt
		idxUR = (imageIdx*opt.H+YfloorInt)*opt.W+XceilInt
		idxBL = (imageIdx*opt.H+YceilInt)*opt.W+XfloorInt
		idxBR = (imageIdx*opt.H+YceilInt)*opt.W+XceilInt
		idxOutside = tf.fill([opt.batchSize,opt.H,opt.W],opt.batchSize*opt.H*opt.W)
		def insideImage(Xint,Yint):
			return (Xint>=0)&(Xint<opt.W)&(Yint>=0)&(Yint<opt.H)
		idxUL = tf.where(insideImage(XfloorInt,YfloorInt),idxUL,idxOutside)
		idxUR = tf.where(insideImage(XceilInt,YfloorInt),idxUR,idxOutside)
		idxBL = tf.where(insideImage(XfloorInt,YceilInt),idxBL,idxOutside)
		idxBR = tf.where(insideImage(XceilInt,YceilInt),idxBR,idxOutside)
		# bilinear interpolation
		Xratio = tf.reshape(Xwarp-Xfloor,[opt.batchSize,opt.H,opt.W,1])
		Yratio = tf.reshape(Ywarp-Yfloor,[opt.batchSize,opt.H,opt.W,1])
		imageUL = tf.to_float(tf.gather(imageVecOut,idxUL))*(1-Xratio)*(1-Yratio)
		imageUR = tf.to_float(tf.gather(imageVecOut,idxUR))*(Xratio)*(1-Yratio)
		imageBL = tf.to_float(tf.gather(imageVecOut,idxBL))*(1-Xratio)*(Yratio)
		imageBR = tf.to_float(tf.gather(imageVecOut,idxBR))*(Xratio)*(Yratio)
		imageWarp = imageUL+imageUR+imageBL+imageBR
	return imageWarp

# warp the image
def transformCropImage(opt,imageFull,pMtrx):
	with tf.name_scope("transformImage"):
		refMtrx = tf.tile(tf.expand_dims(opt.bboxRefMtrx,axis=0),[opt.batchSize,1,1])
		transMtrx = tf.matmul(refMtrx,pMtrx)
		# warp the canonical coordinates
		X,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H))
		X,Y = X.flatten(),Y.flatten()
		XYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T
		XYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32)
		XYwarpHom = tf.matmul(transMtrx,XYhom)
		XwarpHom,YwarpHom,ZwarpHom = tf.unstack(XYwarpHom,axis=1)
		Xwarp = tf.reshape(XwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])
		Ywarp = tf.reshape(YwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])
		# get the integer sampling coordinates
		Xfloor,Xceil = tf.floor(Xwarp),tf.ceil(Xwarp)
		Yfloor,Yceil = tf.floor(Ywarp),tf.ceil(Ywarp)
		XfloorInt,XceilInt = tf.to_int32(Xfloor),tf.to_int32(Xceil)
		YfloorInt,YceilInt = tf.to_int32(Yfloor),tf.to_int32(Yceil)
		imageIdx = np.tile(np.arange(opt.batchSize).reshape([opt.batchSize,1,1]),[1,opt.H,opt.W])
		imageVec = tf.reshape(imageFull,[-1,int(imageFull.shape[-1])])
		imageVecOut = tf.concat([imageVec,tf.zeros([1,int(imageFull.shape[-1])])],axis=0)
		idxUL = (imageIdx*opt.fullH+YfloorInt)*opt.fullW+XfloorInt
		idxUR = (imageIdx*opt.fullH+YfloorInt)*opt.fullW+XceilInt
		idxBL = (imageIdx*opt.fullH+YceilInt)*opt.fullW+XfloorInt
		idxBR = (imageIdx*opt.fullH+YceilInt)*opt.fullW+XceilInt
		idxOutside = tf.fill([opt.batchSize,opt.H,opt.W],opt.batchSize*opt.fullH*opt.fullW)
		def insideImage(Xint,Yint):
			return (Xint>=0)&(Xint<opt.fullW)&(Yint>=0)&(Yint<opt.fullH)
		idxUL = tf.where(insideImage(XfloorInt,YfloorInt),idxUL,idxOutside)
		idxUR = tf.where(insideImage(XceilInt,YfloorInt),idxUR,idxOutside)
		idxBL = tf.where(insideImage(XfloorInt,YceilInt),idxBL,idxOutside)
		idxBR = tf.where(insideImage(XceilInt,YceilInt),idxBR,idxOutside)
		# bilinear interpolation
		Xratio = tf.reshape(Xwarp-Xfloor,[opt.batchSize,opt.H,opt.W,1])
		Yratio = tf.reshape(Ywarp-Yfloor,[opt.batchSize,opt.H,opt.W,1])
		imageUL = tf.to_float(tf.gather(imageVecOut,idxUL))*(1-Xratio)*(1-Yratio)
		imageUR = tf.to_float(tf.gather(imageVecOut,idxUR))*(Xratio)*(1-Yratio)
		imageBL = tf.to_float(tf.gather(imageVecOut,idxBL))*(1-Xratio)*(Yratio)
		imageBR = tf.to_float(tf.gather(imageVecOut,idxBR))*(Xratio)*(Yratio)
		imageWarp = imageUL+imageUR+imageBL+imageBR
	return imageWarp