Repository: chenhsuanlin/inverse-compositional-STN Branch: master Commit: 4a2a8fc7b9a1 Files: 22 Total size: 86.3 KB Directory structure: gitextract_t1x_4nxr/ ├── .editorconfig ├── .gitignore ├── LICENSE ├── MNIST-pytorch/ │ ├── data.py │ ├── graph.py │ ├── options.py │ ├── train.py │ ├── util.py │ └── warp.py ├── MNIST-tensorflow/ │ ├── data.py │ ├── graph.py │ ├── options.py │ ├── train.py │ ├── util.py │ └── warp.py ├── README.md └── traffic-sign-tensorflow/ ├── data.py ├── graph.py ├── options.py ├── train.py ├── util.py └── warp.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .editorconfig ================================================ root = true [*] end_of_line = lf insert_final_newline = true indent_style = tab indent_size = 4 trim_trailing_whitespace = true [*.md] trim_trailing_whitespace = false ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2018 Chen-Hsuan Lin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: MNIST-pytorch/data.py ================================================ import numpy as np import scipy.linalg import os,time import torch import torchvision import warp,util # load MNIST data def loadMNIST(opt,path): os.makedirs(path,exist_ok=True) trainDataset = torchvision.datasets.MNIST(path,train=True,download=True) testDataset = torchvision.datasets.MNIST(path,train=False,download=True) trainData,testData = {},{} trainData["image"] = torch.tensor([np.array(sample[0])/255.0 for sample in trainDataset],dtype=torch.float32) testData["image"] = torch.tensor([np.array(sample[0])/255.0 for sample in testDataset],dtype=torch.float32) trainData["label"] = torch.tensor([sample[1] for sample in trainDataset]) testData["label"] = torch.tensor([sample[1] for sample in testDataset]) return trainData,testData # generate training batch def genPerturbations(opt): X = np.tile(opt.canon4pts[:,0],[opt.batchSize,1]) Y = np.tile(opt.canon4pts[:,1],[opt.batchSize,1]) O = np.zeros([opt.batchSize,4],dtype=np.float32) I = np.ones([opt.batchSize,4],dtype=np.float32) dX = np.random.randn(opt.batchSize,4)*opt.pertScale \ +np.random.randn(opt.batchSize,1)*opt.transScale dY = np.random.randn(opt.batchSize,4)*opt.pertScale \ +np.random.randn(opt.batchSize,1)*opt.transScale dX,dY = dX.astype(np.float32),dY.astype(np.float32) # fit warp parameters to generated displacements if opt.warpType=="homography": A = np.concatenate([np.stack([X,Y,I,O,O,O,-X*(X+dX),-Y*(X+dX)],axis=-1), np.stack([O,O,O,X,Y,I,-X*(Y+dY),-Y*(Y+dY)],axis=-1)],axis=1) b = np.expand_dims(np.concatenate([X+dX,Y+dY],axis=1),axis=-1) pPert = np.matmul(np.linalg.inv(A),b).squeeze() pPert -= np.array([1,0,0,0,1,0,0,0]) else: if opt.warpType=="translation": J = np.concatenate([np.stack([I,O],axis=-1), np.stack([O,I],axis=-1)],axis=1) if opt.warpType=="similarity": J = np.concatenate([np.stack([X,Y,I,O],axis=-1), np.stack([-Y,X,O,I],axis=-1)],axis=1) if opt.warpType=="affine": J = np.concatenate([np.stack([X,Y,I,O,O,O],axis=-1), np.stack([O,O,O,X,Y,I],axis=-1)],axis=1) dXY = np.expand_dims(np.concatenate([dX,dY],axis=1),axis=-1) Jtransp = np.transpose(J,axes=[0,2,1]) pPert = np.matmul(np.linalg.inv(np.matmul(Jtransp,J)),np.matmul(Jtransp,dXY)).squeeze() pInit = torch.from_numpy(pPert).cuda() return pInit # make training batch def makeBatch(opt,data): N = len(data["image"]) randIdx = np.random.randint(N,size=[opt.batchSize]) batch = { "image": data["image"][randIdx].cuda(), "label": data["label"][randIdx].cuda(), } return batch # evaluation on test set def evalTest(opt,data,geometric,classifier): geometric.eval() classifier.eval() N = len(data["image"]) batchN = int(np.ceil(N/opt.batchSize)) warped = [{},{}] count = 0 for b in range(batchN): # use some dummy data (0) as batch filler if necessary if b!=batchN-1: realIdx = np.arange(opt.batchSize*b,opt.batchSize*(b+1)) else: realIdx = np.arange(opt.batchSize*b,N) idx = np.zeros([opt.batchSize],dtype=int) idx[:len(realIdx)] = realIdx # make training batch image = data["image"][idx].cuda() label = data["label"][idx].cuda() image.data.unsqueeze_(dim=1) # generate perturbation pInit = genPerturbations(opt) pInitMtrx = warp.vec2mtrx(opt,pInit) imagePert = warp.transformImage(opt,image,pInitMtrx) imageWarpAll = geometric(opt,image,pInit) if opt.netType=="IC-STN" else geometric(opt,imagePert) imageWarp = imageWarpAll[-1] output = classifier(opt,imageWarp) _,pred = output.max(dim=1) count += int((pred==label).sum().cpu().numpy()) if opt.netType=="STN" or opt.netType=="IC-STN": imgPert = imagePert.detach().cpu().numpy() imgWarp = imageWarp.detach().cpu().numpy() for i in range(len(realIdx)): l = data["label"][idx[i]].item() if l not in warped[0]: warped[0][l] = [] if l not in warped[1]: warped[1][l] = [] warped[0][l].append(imgPert[i]) warped[1][l].append(imgWarp[i]) accuracy = float(count)/N if opt.netType=="STN" or opt.netType=="IC-STN": mean = [np.array([np.mean(warped[0][l],axis=0) for l in warped[0]]), np.array([np.mean(warped[1][l],axis=0) for l in warped[1]])] var = [np.array([np.var(warped[0][l],axis=0) for l in warped[0]]), np.array([np.var(warped[1][l],axis=0) for l in warped[1]])] else: mean,var = None,None geometric.train() classifier.train() return accuracy,mean,var ================================================ FILE: MNIST-pytorch/graph.py ================================================ import numpy as np import torch import time import data,warp,util # build classification network class FullCNN(torch.nn.Module): def __init__(self,opt): super(FullCNN,self).__init__() self.inDim = 1 def conv2Layer(outDim): conv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[3,3],stride=1,padding=0) self.inDim = outDim return conv def linearLayer(outDim): fc = torch.nn.Linear(self.inDim,outDim) self.inDim = outDim return fc def maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2) self.conv2Layers = torch.nn.Sequential( conv2Layer(3),torch.nn.ReLU(True), conv2Layer(6),torch.nn.ReLU(True),maxpoolLayer(), conv2Layer(9),torch.nn.ReLU(True), conv2Layer(12),torch.nn.ReLU(True) ) self.inDim *= 8**2 self.linearLayers = torch.nn.Sequential( linearLayer(48),torch.nn.ReLU(True), linearLayer(opt.labelN) ) initialize(opt,self,opt.stdC) def forward(self,opt,image): feat = image feat = self.conv2Layers(feat).reshape(opt.batchSize,-1) feat = self.linearLayers(feat) output = feat return output # build classification network class CNN(torch.nn.Module): def __init__(self,opt): super(CNN,self).__init__() self.inDim = 1 def conv2Layer(outDim): conv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[9,9],stride=1,padding=0) self.inDim = outDim return conv def linearLayer(outDim): fc = torch.nn.Linear(self.inDim,outDim) self.inDim = outDim return fc def maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2) self.conv2Layers = torch.nn.Sequential( conv2Layer(3),torch.nn.ReLU(True) ) self.inDim *= 20**2 self.linearLayers = torch.nn.Sequential( linearLayer(opt.labelN) ) initialize(opt,self,opt.stdC) def forward(self,opt,image): feat = image feat = self.conv2Layers(feat).reshape(opt.batchSize,-1) feat = self.linearLayers(feat) output = feat return output # an identity class to skip geometric predictors class Identity(torch.nn.Module): def __init__(self): super(Identity,self).__init__() def forward(self,opt,feat): return [feat] # build Spatial Transformer Network class STN(torch.nn.Module): def __init__(self,opt): super(STN,self).__init__() self.inDim = 1 def conv2Layer(outDim): conv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[7,7],stride=1,padding=0) self.inDim = outDim return conv def linearLayer(outDim): fc = torch.nn.Linear(self.inDim,outDim) self.inDim = outDim return fc def maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2) self.conv2Layers = torch.nn.Sequential( conv2Layer(4),torch.nn.ReLU(True), conv2Layer(8),torch.nn.ReLU(True),maxpoolLayer() ) self.inDim *= 8**2 self.linearLayers = torch.nn.Sequential( linearLayer(48),torch.nn.ReLU(True), linearLayer(opt.warpDim) ) initialize(opt,self,opt.stdGP,last0=True) def forward(self,opt,image): imageWarpAll = [image] feat = image feat = self.conv2Layers(feat).reshape(opt.batchSize,-1) feat = self.linearLayers(feat) p = feat pMtrx = warp.vec2mtrx(opt,p) imageWarp = warp.transformImage(opt,image,pMtrx) imageWarpAll.append(imageWarp) return imageWarpAll # build Inverse Compositional STN class ICSTN(torch.nn.Module): def __init__(self,opt): super(ICSTN,self).__init__() self.inDim = 1 def conv2Layer(outDim): conv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[7,7],stride=1,padding=0) self.inDim = outDim return conv def linearLayer(outDim): fc = torch.nn.Linear(self.inDim,outDim) self.inDim = outDim return fc def maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2) self.conv2Layers = torch.nn.Sequential( conv2Layer(4),torch.nn.ReLU(True), conv2Layer(8),torch.nn.ReLU(True),maxpoolLayer() ) self.inDim *= 8**2 self.linearLayers = torch.nn.Sequential( linearLayer(48),torch.nn.ReLU(True), linearLayer(opt.warpDim) ) initialize(opt,self,opt.stdGP,last0=True) def forward(self,opt,image,p): imageWarpAll = [] for l in range(opt.warpN): pMtrx = warp.vec2mtrx(opt,p) imageWarp = warp.transformImage(opt,image,pMtrx) imageWarpAll.append(imageWarp) feat = imageWarp feat = self.conv2Layers(feat).reshape(opt.batchSize,-1) feat = self.linearLayers(feat) dp = feat p = warp.compose(opt,p,dp) pMtrx = warp.vec2mtrx(opt,p) imageWarp = warp.transformImage(opt,image,pMtrx) imageWarpAll.append(imageWarp) return imageWarpAll # initialize weights/biases def initialize(opt,model,stddev,last0=False): for m in model.conv2Layers: if isinstance(m,torch.nn.Conv2d): m.weight.data.normal_(0,stddev) m.bias.data.normal_(0,stddev) for m in model.linearLayers: if isinstance(m,torch.nn.Linear): if last0 and m is model.linearLayers[-1]: m.weight.data.zero_() m.bias.data.zero_() else: m.weight.data.normal_(0,stddev) m.bias.data.normal_(0,stddev) ================================================ FILE: MNIST-pytorch/options.py ================================================ import numpy as np import argparse import warp import util import torch def set(training): # parse input arguments parser = argparse.ArgumentParser() parser.add_argument("netType", choices=["CNN","STN","IC-STN"], help="type of network") parser.add_argument("--group", default="0", help="name for group") parser.add_argument("--model", default="test", help="name for model instance") parser.add_argument("--size", default="28x28", help="image resolution") parser.add_argument("--warpType", default="homography", help="type of warp function on images", choices=["translation","similarity","affine","homography"]) parser.add_argument("--warpN", type=int, default=4, help="number of recurrent transformations (for IC-STN)") parser.add_argument("--stdC", type=float, default=0.1, help="initialization stddev (classification network)") parser.add_argument("--stdGP", type=float, default=0.1, help="initialization stddev (geometric predictor)") parser.add_argument("--pertScale", type=float, default=0.25, help="initial perturbation scale") parser.add_argument("--transScale", type=float, default=0.25, help="initial translation scale") if training: # training parser.add_argument("--port", type=int, default=8097, help="port number for visdom visualization") parser.add_argument("--batchSize", type=int, default=100, help="batch size for SGD") parser.add_argument("--lrC", type=float, default=1e-2, help="learning rate (classification network)") parser.add_argument("--lrGP", type=float, default=None, help="learning rate (geometric predictor)") parser.add_argument("--lrDecay", type=float, default=1.0, help="learning rate decay") parser.add_argument("--lrStep", type=int, default=100000, help="learning rate decay step size") parser.add_argument("--fromIt", type=int, default=0, help="resume training from iteration number") parser.add_argument("--toIt", type=int, default=500000, help="run training to iteration number") else: # evaluation parser.add_argument("--batchSize", type=int, default=1, help="batch size for evaluation") opt = parser.parse_args() if opt.lrGP is None: opt.lrGP = 0 if opt.netType=="CNN" else \ 1e-2 if opt.netType=="STN" else \ 1e-4 if opt.netType=="IC-STN" else None # --- below are automatically set --- assert(torch.cuda.is_available()) # support only training on GPU for now torch.set_default_tensor_type("torch.cuda.FloatTensor") opt.training = training opt.H,opt.W = [int(x) for x in opt.size.split("x")] opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize))) opt.warpDim = 2 if opt.warpType == "translation" else \ 4 if opt.warpType == "similarity" else \ 6 if opt.warpType == "affine" else \ 8 if opt.warpType == "homography" else None opt.labelN = 10 opt.canon4pts = np.array([[-1,-1],[-1,1],[1,1],[1,-1]],dtype=np.float32) opt.image4pts = np.array([[0,0],[0,opt.H-1],[opt.W-1,opt.H-1],[opt.W-1,0]],dtype=np.float32) opt.refMtrx = np.eye(3).astype(np.float32) if opt.netType=="STN": opt.warpN = 1 print("({0}) {1}".format( util.toGreen("{0}".format(opt.group)), util.toGreen("{0}".format(opt.model)))) print("------------------------------------------") print("network type: {0}, recurrent warps: {1}".format( util.toYellow("{0}".format(opt.netType)), util.toYellow("{0}".format(opt.warpN if opt.netType=="IC-STN" else "X")))) print("batch size: {0}, image size: {1}x{2}".format( util.toYellow("{0}".format(opt.batchSize)), util.toYellow("{0}".format(opt.H)), util.toYellow("{0}".format(opt.W)))) print("warpScale: (pert) {0} (trans) {1}".format( util.toYellow("{0}".format(opt.pertScale)), util.toYellow("{0}".format(opt.transScale)))) if training: print("[geometric predictor] stddev={0}, lr={1}".format( util.toYellow("{0:.0e}".format(opt.stdGP)), util.toYellow("{0:.0e}".format(opt.lrGP)))) print("[classification network] stddev={0}, lr={1}".format( util.toYellow("{0:.0e}".format(opt.stdC)), util.toYellow("{0:.0e}".format(opt.lrC)))) print("------------------------------------------") if training: print(util.toMagenta("training model ({0}) {1}...".format(opt.group,opt.model))) return opt ================================================ FILE: MNIST-pytorch/train.py ================================================ import numpy as np import time,os,sys import argparse import util print(util.toYellow("=======================================================")) print(util.toYellow("train.py (training on MNIST)")) print(util.toYellow("=======================================================")) import torch import data,graph,warp,util import options print(util.toMagenta("setting configurations...")) opt = options.set(training=True) # create directories for model output util.mkdir("models_{0}".format(opt.group)) print(util.toMagenta("building network...")) with torch.cuda.device(0): # ------ build network ------ if opt.netType=="CNN": geometric = graph.Identity() classifier = graph.FullCNN(opt) elif opt.netType=="STN": geometric = graph.STN(opt) classifier = graph.CNN(opt) elif opt.netType=="IC-STN": geometric = graph.ICSTN(opt) classifier = graph.CNN(opt) # ------ define loss ------ loss = torch.nn.CrossEntropyLoss() # ------ optimizer ------ optimList = [{ "params": geometric.parameters(), "lr": opt.lrGP }, { "params": classifier.parameters(), "lr": opt.lrC }] optim = torch.optim.SGD(optimList) # load data print(util.toMagenta("loading MNIST dataset...")) trainData,testData = data.loadMNIST(opt,"data") # visdom visualizer vis = util.Visdom(opt) print(util.toYellow("======= TRAINING START =======")) timeStart = time.time() # start session with torch.cuda.device(0): geometric.train() classifier.train() if opt.fromIt!=0: util.restoreModel(opt,geometric,classifier,opt.fromIt) print(util.toMagenta("resuming from iteration {0}...".format(opt.fromIt))) print(util.toMagenta("start training...")) # training loop for i in range(opt.fromIt,opt.toIt): lrGP = opt.lrGP*opt.lrDecay**(i//opt.lrStep) lrC = opt.lrC*opt.lrDecay**(i//opt.lrStep) # make training batch batch = data.makeBatch(opt,trainData) image = batch["image"].unsqueeze(dim=1) label = batch["label"] # generate perturbation pInit = data.genPerturbations(opt) pInitMtrx = warp.vec2mtrx(opt,pInit) # forward/backprop through network optim.zero_grad() imagePert = warp.transformImage(opt,image,pInitMtrx) imageWarpAll = geometric(opt,image,pInit) if opt.netType=="IC-STN" else geometric(opt,imagePert) imageWarp = imageWarpAll[-1] output = classifier(opt,imageWarp) train_loss = loss(output,label) train_loss.backward() # run one step optim.step() if (i+1)%100==0: print("it. {0}/{1} lr={3}(GP),{4}(C), loss={5}, time={2}" .format(util.toCyan("{0}".format(i+1)), opt.toIt, util.toGreen("{0:.2f}".format(time.time()-timeStart)), util.toYellow("{0:.0e}".format(lrGP)), util.toYellow("{0:.0e}".format(lrC)), util.toRed("{0:.4f}".format(train_loss)))) if (i+1)%200==0: vis.trainLoss(opt,i+1,train_loss) if (i+1)%1000==0: # evaluate on test set testAcc,testMean,testVar = data.evalTest(opt,testData,geometric,classifier) testError = (1-testAcc)*100 vis.testLoss(opt,i+1,testError) if opt.netType=="STN" or opt.netType=="IC-STN": vis.meanVar(opt,testMean,testVar) if (i+1)%10000==0: util.saveModel(opt,geometric,classifier,i+1) print(util.toGreen("model saved: {0}/{1}, it.{2}".format(opt.group,opt.model,i+1))) print(util.toYellow("======= TRAINING DONE =======")) ================================================ FILE: MNIST-pytorch/util.py ================================================ import numpy as np import scipy.misc import torch import os import termcolor import visdom def mkdir(path): if not os.path.exists(path): os.mkdir(path) def imread(fname): return scipy.misc.imread(fname)/255.0 def imsave(fname,array): scipy.misc.toimage(array,cmin=0.0,cmax=1.0).save(fname) # convert to colored strings def toRed(content): return termcolor.colored(content,"red",attrs=["bold"]) def toGreen(content): return termcolor.colored(content,"green",attrs=["bold"]) def toBlue(content): return termcolor.colored(content,"blue",attrs=["bold"]) def toCyan(content): return termcolor.colored(content,"cyan",attrs=["bold"]) def toYellow(content): return termcolor.colored(content,"yellow",attrs=["bold"]) def toMagenta(content): return termcolor.colored(content,"magenta",attrs=["bold"]) # restore model def restoreModel(opt,geometric,classifier,it): geometric.load_state_dict(torch.load("models_{0}/{1}_it{2}_GP.npy".format(opt.group,opt.model,it))) classifier.load_state_dict(torch.load("models_{0}/{1}_it{2}_C.npy".format(opt.group,opt.model,it))) # save model def saveModel(opt,geometric,classifier,it): torch.save(geometric.state_dict(),"models_{0}/{1}_it{2}_GP.npy".format(opt.group,opt.model,it)) torch.save(classifier.state_dict(),"models_{0}/{1}_it{2}_C.npy".format(opt.group,opt.model,it)) class Visdom(): def __init__(self,opt): self.vis = visdom.Visdom(port=opt.port,use_incoming_socket=False) self.trainLossInit = True self.testLossInit = True self.meanVarInit = True def tileImages(self,opt,images,H,W,HN,WN): assert(len(images)==HN*WN) images = images.reshape([HN,WN,-1,H,W]) images = [list(i) for i in images] imageBlocks = np.concatenate([np.concatenate(row,axis=2) for row in images],axis=1) return imageBlocks def trainLoss(self,opt,it,loss): loss = float(loss.detach().cpu().numpy()) if self.trainLossInit: self.vis.line(Y=np.array([loss]),X=np.array([it]),win="{0}_trainloss".format(opt.model), opts={ "title": "{0} (TRAIN_loss)".format(opt.model) }) self.trainLossInit = False else: self.vis.line(Y=np.array([loss]),X=np.array([it]),win=opt.model+"_trainloss",update="append") def testLoss(self,opt,it,loss): if self.testLossInit: self.vis.line(Y=np.array([loss]),X=np.array([it]),win="{0}_testloss".format(opt.model), opts={ "title": "{0} (TEST_error)".format(opt.model) }) self.testLossInit = False else: self.vis.line(Y=np.array([loss]),X=np.array([it]),win=opt.model+"_testloss",update="append") def meanVar(self,opt,mean,var): mean = [self.tileImages(opt,m,opt.H,opt.W,1,10) for m in mean] var = [self.tileImages(opt,v,opt.H,opt.W,1,10)*3 for v in var] self.vis.image(mean[0].clip(0,1),win="{0}_meaninit".format(opt.model), opts={ "title": "{0} (TEST_mean_init)".format(opt.model) }) self.vis.image(mean[1].clip(0,1),win="{0}_meanwarped".format(opt.model), opts={ "title": "{0} (TEST_mean_warped)".format(opt.model) }) self.vis.image(var[0].clip(0,1),win="{0}_varinit".format(opt.model), opts={ "title": "{0} (TEST_var_init)".format(opt.model) }) self.vis.image(var[1].clip(0,1),win="{0}_varwarped".format(opt.model), opts={ "title": "{0} (TEST_var_warped)".format(opt.model) }) ================================================ FILE: MNIST-pytorch/warp.py ================================================ import numpy as np import scipy.linalg import torch import util # fit (affine) warp between two sets of points def fit(Xsrc,Xdst): ptsN = len(Xsrc) X,Y,U,V,O,I = Xsrc[:,0],Xsrc[:,1],Xdst[:,0],Xdst[:,1],np.zeros([ptsN]),np.ones([ptsN]) A = np.concatenate((np.stack([X,Y,I,O,O,O],axis=1), np.stack([O,O,O,X,Y,I],axis=1)),axis=0) b = np.concatenate((U,V),axis=0) p1,p2,p3,p4,p5,p6 = scipy.linalg.lstsq(A,b)[0].squeeze() pMtrx = np.array([[p1,p2,p3],[p4,p5,p6],[0,0,1]],dtype=torch.float32) return pMtrx # compute composition of warp parameters def compose(opt,p,dp): pMtrx = vec2mtrx(opt,p) dpMtrx = vec2mtrx(opt,dp) pMtrxNew = dpMtrx.matmul(pMtrx) pMtrxNew = pMtrxNew/pMtrxNew[:,2:3,2:3] pNew = mtrx2vec(opt,pMtrxNew) return pNew # compute inverse of warp parameters def inverse(opt,p): pMtrx = vec2mtrx(opt,p) pInvMtrx = pMtrx.inverse() pInv = mtrx2vec(opt,pInvMtrx) return pInv # convert warp parameters to matrix def vec2mtrx(opt,p): O = torch.zeros(opt.batchSize,dtype=torch.float32).cuda() I = torch.ones(opt.batchSize,dtype=torch.float32).cuda() if opt.warpType=="translation": tx,ty = torch.unbind(p,dim=1) pMtrx = torch.stack([torch.stack([I,O,tx],dim=-1), torch.stack([O,I,ty],dim=-1), torch.stack([O,O,I],dim=-1)],dim=1) if opt.warpType=="similarity": pc,ps,tx,ty = torch.unbind(p,dim=1) pMtrx = torch.stack([torch.stack([I+pc,-ps,tx],dim=-1), torch.stack([ps,I+pc,ty],dim=-1), torch.stack([O,O,I],dim=-1)],dim=1) if opt.warpType=="affine": p1,p2,p3,p4,p5,p6 = torch.unbind(p,dim=1) pMtrx = torch.stack([torch.stack([I+p1,p2,p3],dim=-1), torch.stack([p4,I+p5,p6],dim=-1), torch.stack([O,O,I],dim=-1)],dim=1) if opt.warpType=="homography": p1,p2,p3,p4,p5,p6,p7,p8 = torch.unbind(p,dim=1) pMtrx = torch.stack([torch.stack([I+p1,p2,p3],dim=-1), torch.stack([p4,I+p5,p6],dim=-1), torch.stack([p7,p8,I],dim=-1)],dim=1) return pMtrx # convert warp matrix to parameters def mtrx2vec(opt,pMtrx): [row0,row1,row2] = torch.unbind(pMtrx,dim=1) [e00,e01,e02] = torch.unbind(row0,dim=1) [e10,e11,e12] = torch.unbind(row1,dim=1) [e20,e21,e22] = torch.unbind(row2,dim=1) if opt.warpType=="translation": p = torch.stack([e02,e12],dim=1) if opt.warpType=="similarity": p = torch.stack([e00-1,e10,e02,e12],dim=1) if opt.warpType=="affine": p = torch.stack([e00-1,e01,e02,e10,e11-1,e12],dim=1) if opt.warpType=="homography": p = torch.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],dim=1) return p # warp the image def transformImage(opt,image,pMtrx): refMtrx = torch.from_numpy(opt.refMtrx).cuda() refMtrx = refMtrx.repeat(opt.batchSize,1,1) transMtrx = refMtrx.matmul(pMtrx) # warp the canonical coordinates X,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H)) X,Y = X.flatten(),Y.flatten() XYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T XYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32) XYhom = torch.from_numpy(XYhom).cuda() XYwarpHom = transMtrx.matmul(XYhom) XwarpHom,YwarpHom,ZwarpHom = torch.unbind(XYwarpHom,dim=1) Xwarp = (XwarpHom/(ZwarpHom+1e-8)).reshape(opt.batchSize,opt.H,opt.W) Ywarp = (YwarpHom/(ZwarpHom+1e-8)).reshape(opt.batchSize,opt.H,opt.W) grid = torch.stack([Xwarp,Ywarp],dim=-1) # sampling with bilinear interpolation imageWarp = torch.nn.functional.grid_sample(image,grid,mode="bilinear") return imageWarp ================================================ FILE: MNIST-tensorflow/data.py ================================================ import numpy as np import scipy.linalg import os,time import tensorflow as tf import warp # load MNIST data def loadMNIST(fname): if not os.path.exists(fname): # download and preprocess MNIST dataset from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data/",one_hot=True) trainData,validData,testData = {},{},{} trainData["image"] = mnist.train.images.reshape([-1,28,28]).astype(np.float32) validData["image"] = mnist.validation.images.reshape([-1,28,28]).astype(np.float32) testData["image"] = mnist.test.images.reshape([-1,28,28]).astype(np.float32) trainData["label"] = np.argmax(mnist.train.labels.astype(np.float32),axis=1) validData["label"] = np.argmax(mnist.validation.labels.astype(np.float32),axis=1) testData["label"] = np.argmax(mnist.test.labels.astype(np.float32),axis=1) os.makedirs(os.path.dirname(fname)) np.savez(fname,train=trainData,valid=validData,test=testData) os.system("rm -rf MNIST_data") MNIST = np.load(fname) trainData = MNIST["train"].item() validData = MNIST["valid"].item() testData = MNIST["test"].item() return trainData,validData,testData # generate training batch def genPerturbations(opt): with tf.name_scope("genPerturbations"): X = np.tile(opt.canon4pts[:,0],[opt.batchSize,1]) Y = np.tile(opt.canon4pts[:,1],[opt.batchSize,1]) dX = tf.random_normal([opt.batchSize,4])*opt.pertScale \ +tf.random_normal([opt.batchSize,1])*opt.transScale dY = tf.random_normal([opt.batchSize,4])*opt.pertScale \ +tf.random_normal([opt.batchSize,1])*opt.transScale O = np.zeros([opt.batchSize,4],dtype=np.float32) I = np.ones([opt.batchSize,4],dtype=np.float32) # fit warp parameters to generated displacements if opt.warpType=="homography": A = tf.concat([tf.stack([X,Y,I,O,O,O,-X*(X+dX),-Y*(X+dX)],axis=-1), tf.stack([O,O,O,X,Y,I,-X*(Y+dY),-Y*(Y+dY)],axis=-1)],1) b = tf.expand_dims(tf.concat([X+dX,Y+dY],1),-1) pPert = tf.matrix_solve(A,b)[:,:,0] pPert -= tf.to_float([[1,0,0,0,1,0,0,0]]) else: if opt.warpType=="translation": J = np.concatenate([np.stack([I,O],axis=-1), np.stack([O,I],axis=-1)],axis=1) if opt.warpType=="similarity": J = np.concatenate([np.stack([X,Y,I,O],axis=-1), np.stack([-Y,X,O,I],axis=-1)],axis=1) if opt.warpType=="affine": J = np.concatenate([np.stack([X,Y,I,O,O,O],axis=-1), np.stack([O,O,O,X,Y,I],axis=-1)],axis=1) dXY = tf.expand_dims(tf.concat([dX,dY],1),-1) pPert = tf.matrix_solve_ls(J,dXY)[:,:,0] return pPert # make training batch def makeBatch(opt,data,PH): N = len(data["image"]) randIdx = np.random.randint(N,size=[opt.batchSize]) # put data in placeholders [image,label] = PH batch = { image: data["image"][randIdx], label: data["label"][randIdx], } return batch # evaluation on test set def evalTest(opt,sess,data,PH,prediction,imagesEval=[]): N = len(data["image"]) # put data in placeholders [image,label] = PH batchN = int(np.ceil(N/opt.batchSize)) warped = [{},{}] count = 0 for b in range(batchN): # use some dummy data (0) as batch filler if necessary if b!=batchN-1: realIdx = np.arange(opt.batchSize*b,opt.batchSize*(b+1)) else: realIdx = np.arange(opt.batchSize*b,N) idx = np.zeros([opt.batchSize],dtype=int) idx[:len(realIdx)] = realIdx batch = { image: data["image"][idx], label: data["label"][idx], } evalList = sess.run([prediction]+imagesEval,feed_dict=batch) pred = evalList[0] count += pred[:len(realIdx)].sum() if opt.netType=="STN" or opt.netType=="IC-STN": imgs = evalList[1:] for i in range(len(realIdx)): l = data["label"][idx[i]] if l not in warped[0]: warped[0][l] = [] if l not in warped[1]: warped[1][l] = [] warped[0][l].append(imgs[0][i]) warped[1][l].append(imgs[1][i]) accuracy = float(count)/N if opt.netType=="STN" or opt.netType=="IC-STN": mean = [np.array([np.mean(warped[0][l],axis=0) for l in warped[0]]), np.array([np.mean(warped[1][l],axis=0) for l in warped[1]])] var = [np.array([np.var(warped[0][l],axis=0) for l in warped[0]]), np.array([np.var(warped[1][l],axis=0) for l in warped[1]])] else: mean,var = None,None return accuracy,mean,var ================================================ FILE: MNIST-tensorflow/graph.py ================================================ import numpy as np import tensorflow as tf import time import data,warp,util # build classification network def fullCNN(opt,image): def conv2Layer(opt,feat,outDim): weight,bias = createVariable(opt,[3,3,int(feat.shape[-1]),outDim],stddev=opt.stdC) conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias return conv def linearLayer(opt,feat,outDim): weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC) fc = tf.matmul(feat,weight)+bias return fc with tf.variable_scope("classifier"): feat = image with tf.variable_scope("conv1"): feat = conv2Layer(opt,feat,3) feat = tf.nn.relu(feat) with tf.variable_scope("conv2"): feat = conv2Layer(opt,feat,6) feat = tf.nn.relu(feat) feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID") with tf.variable_scope("conv3"): feat = conv2Layer(opt,feat,9) feat = tf.nn.relu(feat) with tf.variable_scope("conv4"): feat = conv2Layer(opt,feat,12) feat = tf.nn.relu(feat) feat = tf.reshape(feat,[opt.batchSize,-1]) with tf.variable_scope("fc5"): feat = linearLayer(opt,feat,48) feat = tf.nn.relu(feat) with tf.variable_scope("fc6"): feat = linearLayer(opt,feat,opt.labelN) output = feat return output # build classification network def CNN(opt,image): def conv2Layer(opt,feat,outDim): weight,bias = createVariable(opt,[9,9,int(feat.shape[-1]),outDim],stddev=opt.stdC) conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias return conv def linearLayer(opt,feat,outDim): weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC) fc = tf.matmul(feat,weight)+bias return fc with tf.variable_scope("classifier"): feat = image with tf.variable_scope("conv1"): feat = conv2Layer(opt,feat,3) feat = tf.nn.relu(feat) feat = tf.reshape(feat,[opt.batchSize,-1]) with tf.variable_scope("fc2"): feat = linearLayer(opt,feat,opt.labelN) output = feat return output # build Spatial Transformer Network def STN(opt,image): def conv2Layer(opt,feat,outDim): weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP) conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias return conv def linearLayer(opt,feat,outDim,final=False): weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=0.0 if final else opt.stdGP) fc = tf.matmul(feat,weight)+bias return fc imageWarpAll = [image] with tf.variable_scope("geometric"): feat = image with tf.variable_scope("conv1"): feat = conv2Layer(opt,feat,4) feat = tf.nn.relu(feat) with tf.variable_scope("conv2"): feat = conv2Layer(opt,feat,8) feat = tf.nn.relu(feat) feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID") feat = tf.reshape(feat,[opt.batchSize,-1]) with tf.variable_scope("fc3"): feat = linearLayer(opt,feat,48) feat = tf.nn.relu(feat) with tf.variable_scope("fc4"): feat = linearLayer(opt,feat,opt.warpDim,final=True) p = feat pMtrx = warp.vec2mtrx(opt,p) imageWarp = warp.transformImage(opt,image,pMtrx) imageWarpAll.append(imageWarp) return imageWarpAll # build Inverse Compositional STN def ICSTN(opt,image,p): def conv2Layer(opt,feat,outDim): weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP) conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias return conv def linearLayer(opt,feat,outDim,final=False): weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=0.0 if final else opt.stdGP) fc = tf.matmul(feat,weight)+bias return fc imageWarpAll = [] for l in range(opt.warpN): with tf.variable_scope("geometric",reuse=l>0): pMtrx = warp.vec2mtrx(opt,p) imageWarp = warp.transformImage(opt,image,pMtrx) imageWarpAll.append(imageWarp) feat = imageWarp with tf.variable_scope("conv1"): feat = conv2Layer(opt,feat,4) feat = tf.nn.relu(feat) with tf.variable_scope("conv2"): feat = conv2Layer(opt,feat,8) feat = tf.nn.relu(feat) feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID") feat = tf.reshape(feat,[opt.batchSize,-1]) with tf.variable_scope("fc3"): feat = linearLayer(opt,feat,48) feat = tf.nn.relu(feat) with tf.variable_scope("fc4"): feat = linearLayer(opt,feat,opt.warpDim,final=True) dp = feat p = warp.compose(opt,p,dp) pMtrx = warp.vec2mtrx(opt,p) imageWarp = warp.transformImage(opt,image,pMtrx) imageWarpAll.append(imageWarp) return imageWarpAll # auxiliary function for creating weight and bias def createVariable(opt,weightShape,biasShape=None,stddev=None): if biasShape is None: biasShape = [weightShape[-1]] weight = tf.get_variable("weight",shape=weightShape,dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=stddev)) bias = tf.get_variable("bias",shape=biasShape,dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=stddev)) return weight,bias ================================================ FILE: MNIST-tensorflow/options.py ================================================ import numpy as np import argparse import warp import util def set(training): # parse input arguments parser = argparse.ArgumentParser() parser.add_argument("netType", choices=["CNN","STN","IC-STN"], help="type of network") parser.add_argument("--group", default="0", help="name for group") parser.add_argument("--model", default="test", help="name for model instance") parser.add_argument("--size", default="28x28", help="image resolution") parser.add_argument("--warpType", default="homography", help="type of warp function on images", choices=["translation","similarity","affine","homography"]) parser.add_argument("--warpN", type=int, default=4, help="number of recurrent transformations (for IC-STN)") parser.add_argument("--stdC", type=float, default=0.1, help="initialization stddev (classification network)") parser.add_argument("--stdGP", type=float, default=0.1, help="initialization stddev (geometric predictor)") parser.add_argument("--pertScale", type=float, default=0.25, help="initial perturbation scale") parser.add_argument("--transScale", type=float, default=0.25, help="initial translation scale") if training: # training parser.add_argument("--batchSize", type=int, default=100, help="batch size for SGD") parser.add_argument("--lrC", type=float, default=1e-2, help="learning rate (classification network)") parser.add_argument("--lrCdecay", type=float, default=1.0, help="learning rate decay (classification network)") parser.add_argument("--lrCstep", type=int, default=100000, help="learning rate decay step size (classification network)") parser.add_argument("--lrGP", type=float, default=None, help="learning rate (geometric predictor)") parser.add_argument("--lrGPdecay", type=float, default=1.0, help="learning rate decay (geometric predictor)") parser.add_argument("--lrGPstep", type=int, default=100000, help="learning rate decay step size (geometric predictor)") parser.add_argument("--fromIt", type=int, default=0, help="resume training from iteration number") parser.add_argument("--toIt", type=int, default=500000, help="run training to iteration number") else: # evaluation parser.add_argument("--batchSize", type=int, default=1, help="batch size for evaluation") opt = parser.parse_args() if opt.lrGP is None: opt.lrGP = 0 if opt.netType=="CNN" else \ 1e-2 if opt.netType=="STN" else \ 1e-4 if opt.netType=="IC-STN" else None # --- below are automatically set --- opt.training = training opt.H,opt.W = [int(x) for x in opt.size.split("x")] opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize))) opt.warpDim = 2 if opt.warpType == "translation" else \ 4 if opt.warpType == "similarity" else \ 6 if opt.warpType == "affine" else \ 8 if opt.warpType == "homography" else None opt.labelN = 10 opt.canon4pts = np.array([[-1,-1],[-1,1],[1,1],[1,-1]],dtype=np.float32) opt.image4pts = np.array([[0,0],[0,opt.H-1],[opt.W-1,opt.H-1],[opt.W-1,0]],dtype=np.float32) opt.refMtrx = warp.fit(Xsrc=opt.canon4pts,Xdst=opt.image4pts) if opt.netType=="STN": opt.warpN = 1 print("({0}) {1}".format( util.toGreen("{0}".format(opt.group)), util.toGreen("{0}".format(opt.model)))) print("------------------------------------------") print("network type: {0}, recurrent warps: {1}".format( util.toYellow("{0}".format(opt.netType)), util.toYellow("{0}".format(opt.warpN if opt.netType=="IC-STN" else "X")))) print("batch size: {0}, image size: {1}x{2}".format( util.toYellow("{0}".format(opt.batchSize)), util.toYellow("{0}".format(opt.H)), util.toYellow("{0}".format(opt.W)))) print("warpScale: (pert) {0} (trans) {1}".format( util.toYellow("{0}".format(opt.pertScale)), util.toYellow("{0}".format(opt.transScale)))) if training: print("[geometric predictor] stddev={0}, lr={1}".format( util.toYellow("{0:.0e}".format(opt.stdGP)), util.toYellow("{0:.0e}".format(opt.lrGP)))) print("[classification network] stddev={0}, lr={1}".format( util.toYellow("{0:.0e}".format(opt.stdC)), util.toYellow("{0:.0e}".format(opt.lrC)))) print("------------------------------------------") if training: print(util.toMagenta("training model ({0}) {1}...".format(opt.group,opt.model))) return opt ================================================ FILE: MNIST-tensorflow/train.py ================================================ import numpy as np import time,os,sys import argparse import util print(util.toYellow("=======================================================")) print(util.toYellow("train.py (training on MNIST)")) print(util.toYellow("=======================================================")) import tensorflow as tf import data,graph,warp,util import options print(util.toMagenta("setting configurations...")) opt = options.set(training=True) # create directories for model output util.mkdir("models_{0}".format(opt.group)) print(util.toMagenta("building graph...")) tf.reset_default_graph() # build graph with tf.device("/gpu:0"): # ------ define input data ------ image = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.H,opt.W]) label = tf.placeholder(tf.int64,shape=[opt.batchSize]) PH = [image,label] # ------ generate perturbation ------ pInit = data.genPerturbations(opt) pInitMtrx = warp.vec2mtrx(opt,pInit) # ------ build network ------ image = tf.expand_dims(image,axis=-1) imagePert = warp.transformImage(opt,image,pInitMtrx) if opt.netType=="CNN": output = graph.fullCNN(opt,imagePert) elif opt.netType=="STN": imageWarpAll = graph.STN(opt,imagePert) imageWarp = imageWarpAll[-1] output = graph.CNN(opt,imageWarp) elif opt.netType=="IC-STN": imageWarpAll = graph.ICSTN(opt,image,pInit) imageWarp = imageWarpAll[-1] output = graph.CNN(opt,imageWarp) softmax = tf.nn.softmax(output) labelOnehot = tf.one_hot(label,opt.labelN) prediction = tf.equal(tf.argmax(softmax,1),label) # ------ define loss ------ softmaxLoss = tf.nn.softmax_cross_entropy_with_logits(logits=output,labels=labelOnehot) loss = tf.reduce_mean(softmaxLoss) # ------ optimizer ------ lrGP_PH,lrC_PH = tf.placeholder(tf.float32,shape=[]),tf.placeholder(tf.float32,shape=[]) optim = util.setOptimizer(opt,loss,lrGP_PH,lrC_PH) # ------ generate summaries ------ summaryImageTrain = [] summaryImageTest = [] if opt.netType=="STN" or opt.netType=="IC-STN": for l in range(opt.warpN+1): summaryImageTrain.append(util.imageSummary(opt,imageWarpAll[l],"TRAIN_warp{0}".format(l),opt.H,opt.W)) summaryImageTest.append(util.imageSummary(opt,imageWarpAll[l],"TEST_warp{0}".format(l),opt.H,opt.W)) summaryImageTrain = tf.summary.merge(summaryImageTrain) summaryImageTest = tf.summary.merge(summaryImageTest) summaryLossTrain = tf.summary.scalar("TRAIN_loss",loss) testErrorPH = tf.placeholder(tf.float32,shape=[]) testImagePH = tf.placeholder(tf.float32,shape=[opt.labelN,opt.H,opt.W,1]) summaryErrorTest = tf.summary.scalar("TEST_error",testErrorPH) if opt.netType=="STN" or opt.netType=="IC-STN": summaryMeanTest0 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_mean_init",opt.H,opt.W) summaryMeanTest1 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_mean_warped",opt.H,opt.W) summaryVarTest0 = util.imageSummaryMeanVar(opt,testImagePH*3,"TEST_var_init",opt.H,opt.W) summaryVarTest1 = util.imageSummaryMeanVar(opt,testImagePH*3,"TEST_var_warped",opt.H,opt.W) # load data print(util.toMagenta("loading MNIST dataset...")) trainData,validData,testData = data.loadMNIST("data/MNIST.npz") # prepare model saver/summary writer saver = tf.train.Saver(max_to_keep=20) summaryWriter = tf.summary.FileWriter("summary_{0}/{1}".format(opt.group,opt.model)) print(util.toYellow("======= TRAINING START =======")) timeStart = time.time() # start session tfConfig = tf.ConfigProto(allow_soft_placement=True) tfConfig.gpu_options.allow_growth = True with tf.Session(config=tfConfig) as sess: sess.run(tf.global_variables_initializer()) summaryWriter.add_graph(sess.graph) if opt.fromIt!=0: util.restoreModel(opt,sess,saver,opt.fromIt) print(util.toMagenta("resuming from iteration {0}...".format(opt.fromIt))) print(util.toMagenta("start training...")) # training loop for i in range(opt.fromIt,opt.toIt): lrGP = opt.lrGP*opt.lrGPdecay**(i//opt.lrGPstep) lrC = opt.lrC*opt.lrCdecay**(i//opt.lrCstep) # make training batch batch = data.makeBatch(opt,trainData,PH) batch[lrGP_PH] = lrGP batch[lrC_PH] = lrC # run one step _,l = sess.run([optim,loss],feed_dict=batch) if (i+1)%100==0: print("it. {0}/{1} lr={3}(GP),{4}(C), loss={5}, time={2}" .format(util.toCyan("{0}".format(i+1)), opt.toIt, util.toGreen("{0:.2f}".format(time.time()-timeStart)), util.toYellow("{0:.0e}".format(lrGP)), util.toYellow("{0:.0e}".format(lrC)), util.toRed("{0:.4f}".format(l)))) if (i+1)%100==0: summaryWriter.add_summary(sess.run(summaryLossTrain,feed_dict=batch),i+1) if (i+1)%500==0 and (opt.netType=="STN" or opt.netType=="IC-STN"): summaryWriter.add_summary(sess.run(summaryImageTrain,feed_dict=batch),i+1) summaryWriter.add_summary(sess.run(summaryImageTest,feed_dict=batch),i+1) if (i+1)%1000==0: # evaluate on test set if opt.netType=="STN" or opt.netType=="IC-STN": testAcc,testMean,testVar = data.evalTest(opt,sess,testData,PH,prediction,imagesEval=[imagePert,imageWarp]) else: testAcc,_,_ = data.evalTest(opt,sess,testData,PH,prediction) testError = (1-testAcc)*100 summaryWriter.add_summary(sess.run(summaryErrorTest,feed_dict={testErrorPH:testError}),i+1) if opt.netType=="STN" or opt.netType=="IC-STN": summaryWriter.add_summary(sess.run(summaryMeanTest0,feed_dict={testImagePH:testMean[0]}),i+1) summaryWriter.add_summary(sess.run(summaryMeanTest1,feed_dict={testImagePH:testMean[1]}),i+1) summaryWriter.add_summary(sess.run(summaryVarTest0,feed_dict={testImagePH:testVar[0]}),i+1) summaryWriter.add_summary(sess.run(summaryVarTest1,feed_dict={testImagePH:testVar[1]}),i+1) if (i+1)%10000==0: util.saveModel(opt,sess,saver,i+1) print(util.toGreen("model saved: {0}/{1}, it.{2}".format(opt.group,opt.model,i+1))) print(util.toYellow("======= TRAINING DONE =======")) ================================================ FILE: MNIST-tensorflow/util.py ================================================ import numpy as np import scipy.misc import tensorflow as tf import os import termcolor def mkdir(path): if not os.path.exists(path): os.mkdir(path) def imread(fname): return scipy.misc.imread(fname)/255.0 def imsave(fname,array): scipy.misc.toimage(array,cmin=0.0,cmax=1.0).save(fname) # convert to colored strings def toRed(content): return termcolor.colored(content,"red",attrs=["bold"]) def toGreen(content): return termcolor.colored(content,"green",attrs=["bold"]) def toBlue(content): return termcolor.colored(content,"blue",attrs=["bold"]) def toCyan(content): return termcolor.colored(content,"cyan",attrs=["bold"]) def toYellow(content): return termcolor.colored(content,"yellow",attrs=["bold"]) def toMagenta(content): return termcolor.colored(content,"magenta",attrs=["bold"]) # make image summary from image batch def imageSummary(opt,image,tag,H,W): blockSize = opt.visBlockSize imageOne = tf.batch_to_space(image[:blockSize**2],crops=[[0,0],[0,0]],block_size=blockSize) imagePermute = tf.reshape(imageOne,[H,blockSize,W,blockSize,-1]) imageTransp = tf.transpose(imagePermute,[1,0,3,2,4]) imageBlocks = tf.reshape(imageTransp,[1,H*blockSize,W*blockSize,-1]) imageBlocks = tf.cast(imageBlocks*255,tf.uint8) summary = tf.summary.image(tag,imageBlocks) return summary # make image summary from image batch (mean/variance) def imageSummaryMeanVar(opt,image,tag,H,W): imageOne = tf.batch_to_space_nd(image,crops=[[0,0],[0,0]],block_shape=[1,10]) imagePermute = tf.reshape(imageOne,[H,1,W,10,-1]) imageTransp = tf.transpose(imagePermute,[1,0,3,2,4]) imageBlocks = tf.reshape(imageTransp,[1,H*1,W*10,-1]) imageBlocks = tf.cast(imageBlocks*255,tf.uint8) summary = tf.summary.image(tag,imageBlocks) return summary # set optimizer for different learning rates def setOptimizer(opt,loss,lrGP,lrC): varsGP = [v for v in tf.global_variables() if "geometric" in v.name] varsC = [v for v in tf.global_variables() if "classifier" in v.name] gradC = tf.gradients(loss,varsC) optimC = tf.train.GradientDescentOptimizer(lrC).apply_gradients(zip(gradC,varsC)) if len(varsGP)>0: gradGP = tf.gradients(loss,varsGP) optimGP = tf.train.GradientDescentOptimizer(lrGP).apply_gradients(zip(gradGP,varsGP)) optim = tf.group(optimC,optimGP) else: optim = optimC return optim # restore model def restoreModel(opt,sess,saver,it): saver.restore(sess,"models_{0}/{1}_it{2}.ckpt".format(opt.group,opt.model,it,opt.warpN)) # save model def saveModel(opt,sess,saver,it): saver.save(sess,"models_{0}/{1}_it{2}.ckpt".format(opt.group,opt.model,it,opt.warpN)) ================================================ FILE: MNIST-tensorflow/warp.py ================================================ import numpy as np import scipy.linalg import tensorflow as tf # fit (affine) warp between two sets of points def fit(Xsrc,Xdst): ptsN = len(Xsrc) X,Y,U,V,O,I = Xsrc[:,0],Xsrc[:,1],Xdst[:,0],Xdst[:,1],np.zeros([ptsN]),np.ones([ptsN]) A = np.concatenate((np.stack([X,Y,I,O,O,O],axis=1), np.stack([O,O,O,X,Y,I],axis=1)),axis=0) b = np.concatenate((U,V),axis=0) p1,p2,p3,p4,p5,p6 = scipy.linalg.lstsq(A,b)[0].squeeze() pMtrx = np.array([[p1,p2,p3],[p4,p5,p6],[0,0,1]],dtype=np.float32) return pMtrx # compute composition of warp parameters def compose(opt,p,dp): with tf.name_scope("compose"): pMtrx = vec2mtrx(opt,p) dpMtrx = vec2mtrx(opt,dp) pMtrxNew = tf.matmul(dpMtrx,pMtrx) pMtrxNew /= pMtrxNew[:,2:3,2:3] pNew = mtrx2vec(opt,pMtrxNew) return pNew # compute inverse of warp parameters def inverse(opt,p): with tf.name_scope("inverse"): pMtrx = vec2mtrx(opt,p) pInvMtrx = tf.matrix_inverse(pMtrx) pInv = mtrx2vec(opt,pInvMtrx) return pInv # convert warp parameters to matrix def vec2mtrx(opt,p): with tf.name_scope("vec2mtrx"): O = tf.zeros([opt.batchSize]) I = tf.ones([opt.batchSize]) if opt.warpType=="translation": tx,ty = tf.unstack(p,axis=1) pMtrx = tf.transpose(tf.stack([[I,O,tx],[O,I,ty],[O,O,I]]),perm=[2,0,1]) if opt.warpType=="similarity": pc,ps,tx,ty = tf.unstack(p,axis=1) pMtrx = tf.transpose(tf.stack([[I+pc,-ps,tx],[ps,I+pc,ty],[O,O,I]]),perm=[2,0,1]) if opt.warpType=="affine": p1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1) pMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[O,O,I]]),perm=[2,0,1]) if opt.warpType=="homography": p1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1) pMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[p7,p8,I]]),perm=[2,0,1]) return pMtrx # convert warp matrix to parameters def mtrx2vec(opt,pMtrx): with tf.name_scope("mtrx2vec"): [row0,row1,row2] = tf.unstack(pMtrx,axis=1) [e00,e01,e02] = tf.unstack(row0,axis=1) [e10,e11,e12] = tf.unstack(row1,axis=1) [e20,e21,e22] = tf.unstack(row2,axis=1) if opt.warpType=="translation": p = tf.stack([e02,e12],axis=1) if opt.warpType=="similarity": p = tf.stack([e00-1,e10,e02,e12],axis=1) if opt.warpType=="affine": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12],axis=1) if opt.warpType=="homography": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],axis=1) return p # warp the image def transformImage(opt,image,pMtrx): with tf.name_scope("transformImage"): refMtrx = tf.tile(tf.expand_dims(opt.refMtrx,axis=0),[opt.batchSize,1,1]) transMtrx = tf.matmul(refMtrx,pMtrx) # warp the canonical coordinates X,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H)) X,Y = X.flatten(),Y.flatten() XYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T XYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32) XYwarpHom = tf.matmul(transMtrx,XYhom) XwarpHom,YwarpHom,ZwarpHom = tf.unstack(XYwarpHom,axis=1) Xwarp = tf.reshape(XwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W]) Ywarp = tf.reshape(YwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W]) # get the integer sampling coordinates Xfloor,Xceil = tf.floor(Xwarp),tf.ceil(Xwarp) Yfloor,Yceil = tf.floor(Ywarp),tf.ceil(Ywarp) XfloorInt,XceilInt = tf.to_int32(Xfloor),tf.to_int32(Xceil) YfloorInt,YceilInt = tf.to_int32(Yfloor),tf.to_int32(Yceil) imageIdx = np.tile(np.arange(opt.batchSize).reshape([opt.batchSize,1,1]),[1,opt.H,opt.W]) imageVec = tf.reshape(image,[-1,int(image.shape[-1])]) imageVecOut = tf.concat([imageVec,tf.zeros([1,int(image.shape[-1])])],axis=0) idxUL = (imageIdx*opt.H+YfloorInt)*opt.W+XfloorInt idxUR = (imageIdx*opt.H+YfloorInt)*opt.W+XceilInt idxBL = (imageIdx*opt.H+YceilInt)*opt.W+XfloorInt idxBR = (imageIdx*opt.H+YceilInt)*opt.W+XceilInt idxOutside = tf.fill([opt.batchSize,opt.H,opt.W],opt.batchSize*opt.H*opt.W) def insideImage(Xint,Yint): return (Xint>=0)&(Xint=0)&(Yint

We provide TensorFlow code for the following experiments: - MNIST classification - traffic sign classification **[NEW!]** The PyTorch implementation of the MNIST experiment is now up! -------------------------------------- ## TensorFlow ### Prerequisites This code is developed with Python3 (`python3`) but it is also compatible with Python2.7 (`python`). TensorFlow r1.0+ is required. The dependencies can install by running ``` pip3 install --upgrade numpy scipy termcolor matplotlib tensorflow-gpu ``` If you're using Python2.7, use `pip2` instead; if you don't have sudo access, add the `--user` flag. ### Running the code The training code can be executed via the command ``` python3 train.py [(options)] ``` `` should be one of the following: 1. `CNN` - standard convolutional neural network 2. `STN` - Spatial Transformer Network (STN) 3. `IC-STN` - Inverse Compositional Spatial Transformer Network (IC-STN) The list of optional arguments can be found by executing `python3 train.py --help`. The default training settings in this released code is slightly different from that in the paper; it is stabler and optimizes the networks better. When the code is run for the first time, the datasets will be automatically downloaded and preprocessed. The checkpoints are saved in the automatically created directory `model_GROUP`; summaries are saved in `summary_GROUP`. ### Visualizing the results We've included code to visualize the training over TensorBoard. To execute, run ``` tensorboard --logdir=summary_GROUP --port=6006 ``` We provide three types of data visualization: 1. **SCALARS**: training/test error over iterations 2. **IMAGES**: alignment results and mean/variance appearances 3. **GRAPH**: network architecture -------------------------------------- ## PyTorch The PyTorch version of the code is stil under active development. The training speed is currently slower than the TensorFlow version. Suggestions on improvements are welcome! :) ### Prerequisites This code is developed with Python3 (`python3`). It has not been tested with Python2.7 yet. PyTorch 0.2.0+ is required. Please see http://pytorch.org/ for installation instructions. Visdom is also required; it can be installed by running ``` pip3 install --upgrade visdom ``` If you don't have sudo access, add the `--user` flag. ### Running the code First, start a Visdom server by running ``` python3 -m visdom.server -port=7000 ``` The training code can be executed via the command (using the same port number) ``` python3 train.py --port=7000 [(options)] ``` `` should be one of the following: 1. `CNN` - standard convolutional neural network 2. `STN` - Spatial Transformer Network (STN) 3. `IC-STN` - Inverse Compositional Spatial Transformer Network (IC-STN) The list of optional arguments can be found by executing `python3 train.py --help`. The default training settings in this released code is slightly different from that in the paper; it is stabler and optimizes the networks better. When the code is run for the first time, the datasets will be automatically downloaded and preprocessed. The checkpoints are saved in the automatically created directory `model_GROUP`; summaries are saved in `summary_GROUP`. ### Visualizing the results We provide three types of data visualization on Visdom: 1. Training/test error over iterations 2. Alignment results and mean/variance appearances -------------------------------------- If you find our code useful for your research, please cite ``` @inproceedings{lin2017inverse, title={Inverse Compositional Spatial Transformer Networks}, author={Lin, Chen-Hsuan and Lucey, Simon}, booktitle={IEEE Conference on Computer Vision and Pattern Recognition ({CVPR})}, year={2017} } ``` Please contact me (chlin@cmu.edu) if you have any questions! ================================================ FILE: traffic-sign-tensorflow/data.py ================================================ import numpy as np import scipy.linalg,scipy.misc import os,time import tensorflow as tf import matplotlib.pyplot as plt import csv import warp # load GTSRB data def loadGTSRB(opt,fname): if not os.path.exists(fname): # download and preprocess GTSRB dataset os.makedirs(os.path.dirname(fname)) os.system("wget -O data/GTSRB_Final_Training_Images.zip http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Training_Images.zip") os.system("wget -O data/GTSRB_Final_Test_Images.zip http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Test_Images.zip") os.system("wget -O data/GTSRB_Final_Test_GT.zip http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Test_GT.zip") os.system("cd data && unzip GTSRB_Final_Training_Images.zip") os.system("cd data && unzip GTSRB_Final_Test_Images.zip") os.system("cd data && unzip GTSRB_Final_Test_GT.zip") # training data print("preparing training data...") images,bboxes,labels = [],[],[] for c in range(43): prefix = "data/GTSRB/Final_Training/Images/{0:05d}".format(c) with open("{0}/GT-{1:05d}.csv".format(prefix,c)) as file: reader = csv.reader(file,delimiter=";") next(reader) for line in reader: img = plt.imread(prefix+"/"+line[0]) rawH,rawW = img.shape[0],img.shape[1] scaleH,scaleW = float(opt.fullH)/rawH,float(opt.fullW)/rawW imgResize = scipy.misc.imresize(img,(opt.fullH,opt.fullW,3)) images.append(imgResize) bboxes.append([float(line[3])*scaleW,float(line[4])*scaleH, float(line[5])*scaleW,float(line[6])*scaleH]) labels.append(int(line[7])) trainData = { "image": np.array(images), "bbox": np.array(bboxes), "label": np.array(labels) } # test data print("preparing test data...") images,bboxes,labels = [],[],[] prefix = "data/GTSRB/Final_Test/Images/" with open("data/GT-final_test.csv") as file: reader = csv.reader(file,delimiter=";") next(reader) for line in reader: img = plt.imread(prefix+"/"+line[0]) rawH,rawW = img.shape[0],img.shape[1] scaleH,scaleW = float(opt.fullH)/rawH,float(opt.fullW)/rawW imgResize = scipy.misc.imresize(img,(opt.fullH,opt.fullW,3)) images.append(imgResize) bboxes.append([float(line[3])*scaleW,float(line[4])*scaleH, float(line[5])*scaleW,float(line[6])*scaleH]) labels.append(int(line[7])) testData = { "image": np.array(images), "bbox": np.array(bboxes), "label": np.array(labels) } np.savez(fname,train=trainData,test=testData) os.system("rm -rf data/*.zip") GTSRB = np.load(fname) trainData = GTSRB["train"].item() testData = GTSRB["test"].item() return trainData,testData # generate training batch def genPerturbations(opt): with tf.name_scope("genPerturbations"): X = np.tile(opt.canon4pts[:,0],[opt.batchSize,1]) Y = np.tile(opt.canon4pts[:,1],[opt.batchSize,1]) dX = tf.random_normal([opt.batchSize,4])*opt.pertScale \ +tf.random_normal([opt.batchSize,1])*opt.transScale dY = tf.random_normal([opt.batchSize,4])*opt.pertScale \ +tf.random_normal([opt.batchSize,1])*opt.transScale O = np.zeros([opt.batchSize,4],dtype=np.float32) I = np.ones([opt.batchSize,4],dtype=np.float32) # fit warp parameters to generated displacements if opt.warpType=="homography": A = tf.concat([tf.stack([X,Y,I,O,O,O,-X*(X+dX),-Y*(X+dX)],axis=-1), tf.stack([O,O,O,X,Y,I,-X*(Y+dY),-Y*(Y+dY)],axis=-1)],1) b = tf.expand_dims(tf.concat([X+dX,Y+dY],1),-1) pPert = tf.matrix_solve(A,b)[:,:,0] pPert -= tf.to_float([[1,0,0,0,1,0,0,0]]) else: if opt.warpType=="translation": J = np.concatenate([np.stack([I,O],axis=-1), np.stack([O,I],axis=-1)],axis=1) if opt.warpType=="similarity": J = np.concatenate([np.stack([X,Y,I,O],axis=-1), np.stack([-Y,X,O,I],axis=-1)],axis=1) if opt.warpType=="affine": J = np.concatenate([np.stack([X,Y,I,O,O,O],axis=-1), np.stack([O,O,O,X,Y,I],axis=-1)],axis=1) dXY = tf.expand_dims(tf.concat([dX,dY],1),-1) pPert = tf.matrix_solve_ls(J,dXY)[:,:,0] return pPert # make training batch def makeBatch(opt,data,PH): N = len(data["image"]) randIdx = np.random.randint(N,size=[opt.batchSize]) # put data in placeholders [image,label] = PH batch = { image: data["image"][randIdx]/255.0, label: data["label"][randIdx], } return batch # evaluation on test set def evalTest(opt,sess,data,PH,prediction,imagesEval=[]): N = len(data["image"]) # put data in placeholders [image,label] = PH batchN = int(np.ceil(N/opt.batchSize)) warped = [{},{}] count = 0 for b in range(batchN): # use some dummy data (0) as batch filler if necessary if b!=batchN-1: realIdx = np.arange(opt.batchSize*b,opt.batchSize*(b+1)) else: realIdx = np.arange(opt.batchSize*b,N) idx = np.zeros([opt.batchSize],dtype=int) idx[:len(realIdx)] = realIdx batch = { image: data["image"][idx]/255.0, label: data["label"][idx], } evalList = sess.run([prediction]+imagesEval,feed_dict=batch) pred = evalList[0] count += pred[:len(realIdx)].sum() if len(imagesEval)>0: imgs = evalList[1:] for i in range(len(realIdx)): if data["label"][idx[i]] not in warped[0]: warped[0][data["label"][idx[i]]] = [] if data["label"][idx[i]] not in warped[1]: warped[1][data["label"][idx[i]]] = [] warped[0][data["label"][idx[i]]].append(imgs[0][i]) warped[1][data["label"][idx[i]]].append(imgs[1][i]) accuracy = float(count)/N if len(imagesEval)>0: mean = [np.array([np.mean(warped[0][l],axis=0) for l in warped[0]]), np.array([np.mean(warped[1][l],axis=0) for l in warped[1]])] var = [np.array([np.var(warped[0][l],axis=0) for l in warped[0]]), np.array([np.var(warped[1][l],axis=0) for l in warped[1]])] else: mean,var = None,None return accuracy,mean,var ================================================ FILE: traffic-sign-tensorflow/graph.py ================================================ import numpy as np import tensorflow as tf import time import data,warp,util # build classification network def fullCNN(opt,image): def conv2Layer(opt,feat,outDim): weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdC) conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias return conv def linearLayer(opt,feat,outDim): weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC) fc = tf.matmul(feat,weight)+bias return fc with tf.variable_scope("classifier"): feat = image with tf.variable_scope("conv1"): feat = conv2Layer(opt,feat,6) feat = tf.nn.relu(feat) with tf.variable_scope("conv2"): feat = conv2Layer(opt,feat,12) feat = tf.nn.relu(feat) feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID") with tf.variable_scope("conv3"): feat = conv2Layer(opt,feat,24) feat = tf.nn.relu(feat) feat = tf.reshape(feat,[opt.batchSize,-1]) with tf.variable_scope("fc4"): feat = linearLayer(opt,feat,200) feat = tf.nn.relu(feat) with tf.variable_scope("fc5"): feat = linearLayer(opt,feat,opt.labelN) output = feat return output # build classification network def CNN(opt,image): def conv2Layer(opt,feat,outDim): weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdC) conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias return conv def linearLayer(opt,feat,outDim): weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC) fc = tf.matmul(feat,weight)+bias return fc with tf.variable_scope("classifier"): feat = image with tf.variable_scope("conv1"): feat = conv2Layer(opt,feat,6) feat = tf.nn.relu(feat) with tf.variable_scope("conv2"): feat = conv2Layer(opt,feat,12) feat = tf.nn.relu(feat) feat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding="VALID") feat = tf.reshape(feat,[opt.batchSize,-1]) with tf.variable_scope("fc3"): feat = linearLayer(opt,feat,opt.labelN) output = feat return output # build Spatial Transformer Network def STN(opt,image): def conv2Layer(opt,feat,outDim): weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP) conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias return conv def linearLayer(opt,feat,outDim): weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdGP) fc = tf.matmul(feat,weight)+bias return fc imageWarpAll = [image] with tf.variable_scope("geometric"): feat = image with tf.variable_scope("conv1"): feat = conv2Layer(opt,feat,6) feat = tf.nn.relu(feat) with tf.variable_scope("conv2"): feat = conv2Layer(opt,feat,24) feat = tf.nn.relu(feat) feat = tf.reshape(feat,[opt.batchSize,-1]) with tf.variable_scope("fc3"): feat = linearLayer(opt,feat,opt.warpDim) p = feat pMtrx = warp.vec2mtrx(opt,p) imageWarp = warp.transformImage(opt,image,pMtrx) imageWarpAll.append(imageWarp) return imageWarpAll # build Inverse Compositional STN def ICSTN(opt,imageFull,p): def conv2Layer(opt,feat,outDim): weight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP) conv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding="VALID")+bias return conv def linearLayer(opt,feat,outDim): weight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdGP) fc = tf.matmul(feat,weight)+bias return fc imageWarpAll = [] for l in range(opt.warpN): with tf.variable_scope("geometric",reuse=l>0): pMtrx = warp.vec2mtrx(opt,p) imageWarp = warp.transformCropImage(opt,imageFull,pMtrx) imageWarpAll.append(imageWarp) feat = imageWarp with tf.variable_scope("conv1"): feat = conv2Layer(opt,feat,6) feat = tf.nn.relu(feat) with tf.variable_scope("conv2"): feat = conv2Layer(opt,feat,24) feat = tf.nn.relu(feat) feat = tf.reshape(feat,[opt.batchSize,-1]) with tf.variable_scope("fc3"): feat = linearLayer(opt,feat,opt.warpDim) dp = feat p = warp.compose(opt,p,dp) pMtrx = warp.vec2mtrx(opt,p) imageWarp = warp.transformCropImage(opt,imageFull,pMtrx) imageWarpAll.append(imageWarp) return imageWarpAll # auxiliary function for creating weight and bias def createVariable(opt,weightShape,biasShape=None,stddev=None): if biasShape is None: biasShape = [weightShape[-1]] weight = tf.get_variable("weight",shape=weightShape,dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=stddev)) bias = tf.get_variable("bias",shape=biasShape,dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=stddev)) return weight,bias ================================================ FILE: traffic-sign-tensorflow/options.py ================================================ import numpy as np import argparse import warp import util def set(training): # parse input arguments parser = argparse.ArgumentParser() parser.add_argument("netType", choices=["CNN","STN","IC-STN"], help="type of network") parser.add_argument("--group", default="0", help="name for group") parser.add_argument("--model", default="test", help="name for model instance") parser.add_argument("--size", default="36x36", help="image resolution") parser.add_argument("--sizeFull", default="50x50", help="full image resolution") parser.add_argument("--warpType", default="homography", help="type of warp function on images", choices=["translation","similarity","affine","homography"]) parser.add_argument("--warpN", type=int, default=4, help="number of recurrent transformations (for IC-STN)") parser.add_argument("--stdC", type=float, default=0.01, help="initialization stddev (classification network)") parser.add_argument("--stdGP", type=float, default=0.001, help="initialization stddev (geometric predictor)") parser.add_argument("--pertScale", type=float, default=0.25, help="initial perturbation scale") parser.add_argument("--transScale", type=float, default=0.25, help="initial translation scale") if training: # training parser.add_argument("--batchSize", type=int, default=100, help="batch size for SGD") parser.add_argument("--lrC", type=float, default=1e-2, help="learning rate (classification network)") parser.add_argument("--lrCdecay", type=float, default=0.1, help="learning rate decay (classification network)") parser.add_argument("--lrCstep", type=int, default=500000, help="learning rate decay step size (classification network)") parser.add_argument("--lrGP", type=float, default=None, help="learning rate (geometric predictor)") parser.add_argument("--lrGPdecay", type=float, default=0.1, help="learning rate decay (geometric predictor)") parser.add_argument("--lrGPstep", type=int, default=500000, help="learning rate decay step size (geometric predictor)") parser.add_argument("--fromIt", type=int, default=0, help="resume training from iteration number") parser.add_argument("--toIt", type=int, default=1000000,help="run training to iteration number") else: # evaluation parser.add_argument("--batchSize", type=int, default=1, help="batch size for evaluation") opt = parser.parse_args() if opt.lrGP is None: opt.lrGP = 0 if opt.netType=="CNN" else \ 1e-3 if opt.netType=="STN" else \ 3e-5 if opt.netType=="IC-STN" else None # --- below are automatically set --- opt.training = training opt.H,opt.W = [int(x) for x in opt.size.split("x")] opt.fullH,opt.fullW = [int(x) for x in opt.sizeFull.split("x")] opt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize))) opt.warpDim = 2 if opt.warpType == "translation" else \ 4 if opt.warpType == "similarity" else \ 6 if opt.warpType == "affine" else \ 8 if opt.warpType == "homography" else None opt.labelN = 43 opt.canon4pts = np.array([[-1,-1],[-1,1],[1,1],[1,-1]],dtype=np.float32) opt.image4pts = np.array([[0,0],[0,opt.H-1],[opt.W-1,opt.H-1],[opt.W-1,0]],dtype=np.float32) opt.bbox = [int(opt.fullW/2-opt.W/2),int(opt.fullH/2-opt.H/2),int(opt.fullW/2+opt.W/2),int(opt.fullH/2+opt.H/2)] opt.bbox4pts = np.array([[opt.bbox[0],opt.bbox[1]],[opt.bbox[0],opt.bbox[3]], [opt.bbox[2],opt.bbox[3]],[opt.bbox[2],opt.bbox[1]]],dtype=np.float32) opt.refMtrx = warp.fit(Xsrc=opt.canon4pts,Xdst=opt.image4pts) opt.bboxRefMtrx = warp.fit(Xsrc=opt.canon4pts,Xdst=opt.bbox4pts) if opt.netType=="STN": opt.warpN = 1 print("({0}) {1}".format( util.toGreen("{0}".format(opt.group)), util.toGreen("{0}".format(opt.model)))) print("------------------------------------------") print("network type: {0}, recurrent warps: {1}".format( util.toYellow("{0}".format(opt.netType)), util.toYellow("{0}".format(opt.warpN if opt.netType=="IC-STN" else "X")))) print("batch size: {0}, image size: {1}x{2}".format( util.toYellow("{0}".format(opt.batchSize)), util.toYellow("{0}".format(opt.H)), util.toYellow("{0}".format(opt.W)))) print("warpScale: (pert) {0} (trans) {1}".format( util.toYellow("{0}".format(opt.pertScale)), util.toYellow("{0}".format(opt.transScale)))) if training: print("[geometric predictor] stddev={0}, lr={1}".format( util.toYellow("{0:.0e}".format(opt.stdGP)), util.toYellow("{0:.0e}".format(opt.lrGP)))) print("[classification network] stddev={0}, lr={1}".format( util.toYellow("{0:.0e}".format(opt.stdC)), util.toYellow("{0:.0e}".format(opt.lrC)))) print("------------------------------------------") if training: print(util.toMagenta("training model ({0}) {1}...".format(opt.group,opt.model))) return opt ================================================ FILE: traffic-sign-tensorflow/train.py ================================================ import numpy as np import time,os,sys import argparse import util print(util.toYellow("=======================================================")) print(util.toYellow("train.py (training on MNIST)")) print(util.toYellow("=======================================================")) import tensorflow as tf import data,graph,warp,util import options print(util.toMagenta("setting configurations...")) opt = options.set(training=True) # create directories for model output util.mkdir("models_{0}".format(opt.group)) print(util.toMagenta("building graph...")) tf.reset_default_graph() # build graph with tf.device("/gpu:0"): # ------ define input data ------ imageFull = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.fullH,opt.fullW,3]) imageMean,imageVar = tf.nn.moments(imageFull,axes=[1,2],keep_dims=True) imageFullNormalize = (imageFull-imageMean)/tf.sqrt(imageVar) label = tf.placeholder(tf.int64,shape=[opt.batchSize]) PH = [imageFull,label] # ------ generate perturbation ------ pInit = data.genPerturbations(opt) pInitMtrx = warp.vec2mtrx(opt,pInit) # ------ build network ------ imagePert = warp.transformCropImage(opt,imageFullNormalize,pInitMtrx) imagePertRescale = imagePert*tf.sqrt(imageVar)+imageMean if opt.netType=="CNN": output = graph.fullCNN(opt,imagePert) elif opt.netType=="STN": imageWarpAll = graph.STN(opt,imagePert) imageWarp = imageWarpAll[-1] output = graph.CNN(opt,imageWarp) imageWarpRescale = imageWarp*tf.sqrt(imageVar)+imageMean elif opt.netType=="IC-STN": imageWarpAll = graph.ICSTN(opt,imageFullNormalize,pInit) imageWarp = imageWarpAll[-1] output = graph.CNN(opt,imageWarp) imageWarpRescale = imageWarp*tf.sqrt(imageVar)+imageMean softmax = tf.nn.softmax(output) labelOnehot = tf.one_hot(label,opt.labelN) prediction = tf.equal(tf.argmax(softmax,1),label) # ------ define loss ------ softmaxLoss = tf.nn.softmax_cross_entropy_with_logits(logits=output,labels=labelOnehot) loss = tf.reduce_mean(softmaxLoss) # ------ optimizer ------ lrGP_PH,lrC_PH = tf.placeholder(tf.float32,shape=[]),tf.placeholder(tf.float32,shape=[]) optim = util.setOptimizer(opt,loss,lrGP_PH,lrC_PH) # ------ generate summaries ------ summaryImageTrain = [] summaryImageTest = [] if opt.netType=="STN" or opt.netType=="IC-STN": for l in range(opt.warpN+1): summaryImageTrain.append(util.imageSummary(opt,imageWarpAll[l]*tf.sqrt(imageVar)+imageMean,"TRAIN_warp{0}".format(l),opt.H,opt.W)) summaryImageTest.append(util.imageSummary(opt,imageWarpAll[l]*tf.sqrt(imageVar)+imageMean,"TEST_warp{0}".format(l),opt.H,opt.W)) summaryImageTrain = tf.summary.merge(summaryImageTrain) summaryImageTest = tf.summary.merge(summaryImageTest) summaryLossTrain = tf.summary.scalar("TRAIN_loss",loss) testErrorPH = tf.placeholder(tf.float32,shape=[]) testImagePH = tf.placeholder(tf.float32,shape=[opt.labelN,opt.H,opt.W,3]) summaryErrorTest = tf.summary.scalar("TEST_error",testErrorPH) if opt.netType=="STN" or opt.netType=="IC-STN": summaryMeanTest0 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_mean_init",opt.H,opt.W) summaryMeanTest1 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_mean_warped",opt.H,opt.W) summaryVarTest0 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_var_init",opt.H,opt.W) summaryVarTest1 = util.imageSummaryMeanVar(opt,testImagePH,"TEST_var_warped",opt.H,opt.W) # load data print(util.toMagenta("loading GTSRB dataset...")) trainData,testData = data.loadGTSRB(opt,"data/GTSRB.npz") # prepare model saver/summary writer saver = tf.train.Saver(max_to_keep=20) summaryWriter = tf.summary.FileWriter("summary_{0}/{1}".format(opt.group,opt.model)) print(util.toYellow("======= TRAINING START =======")) timeStart = time.time() # start session tfConfig = tf.ConfigProto(allow_soft_placement=True) tfConfig.gpu_options.allow_growth = True with tf.Session(config=tfConfig) as sess: sess.run(tf.global_variables_initializer()) summaryWriter.add_graph(sess.graph) if opt.fromIt!=0: util.restoreModel(opt,sess,saver,opt.fromIt) print(util.toMagenta("resuming from iteration {0}...".format(opt.fromIt))) print(util.toMagenta("start training...")) # training loop for i in range(opt.fromIt,opt.toIt): lrGP = opt.lrGP*opt.lrGPdecay**(i//opt.lrGPstep) lrC = opt.lrC*opt.lrCdecay**(i//opt.lrCstep) # make training batch batch = data.makeBatch(opt,trainData,PH) batch[lrGP_PH] = lrGP batch[lrC_PH] = lrC # run one step _,l = sess.run([optim,loss],feed_dict=batch) if (i+1)%100==0: print("it. {0}/{1} lr={3}(GP),{4}(C), loss={5}, time={2}" .format(util.toCyan("{0}".format(i+1)), opt.toIt, util.toGreen("{0:.2f}".format(time.time()-timeStart)), util.toYellow("{0:.0e}".format(lrGP)), util.toYellow("{0:.0e}".format(lrC)), util.toRed("{0:.4f}".format(l)))) if (i+1)%100==0: summaryWriter.add_summary(sess.run(summaryLossTrain,feed_dict=batch),i+1) if (i+1)%500==0 and (opt.netType=="STN" or opt.netType=="IC-STN"): summaryWriter.add_summary(sess.run(summaryImageTrain,feed_dict=batch),i+1) summaryWriter.add_summary(sess.run(summaryImageTest,feed_dict=batch),i+1) if (i+1)%1000==0: # evaluate on test set if opt.netType=="STN" or opt.netType=="IC-STN": testAcc,testMean,testVar = data.evalTest(opt,sess,testData,PH,prediction,imagesEval=[imagePert,imageWarp]) else: testAcc,_,_ = data.evalTest(opt,sess,testData,PH,prediction) testError = (1-testAcc)*100 summaryWriter.add_summary(sess.run(summaryErrorTest,feed_dict={testErrorPH:testError}),i+1) if opt.netType=="STN" or opt.netType=="IC-STN": summaryWriter.add_summary(sess.run(summaryMeanTest0,feed_dict={testImagePH:testMean[0]}),i+1) summaryWriter.add_summary(sess.run(summaryMeanTest1,feed_dict={testImagePH:testMean[1]}),i+1) summaryWriter.add_summary(sess.run(summaryVarTest0,feed_dict={testImagePH:testVar[0]}),i+1) summaryWriter.add_summary(sess.run(summaryVarTest1,feed_dict={testImagePH:testVar[1]}),i+1) if (i+1)%10000==0: util.saveModel(opt,sess,saver,i+1) print(util.toGreen("model saved: {0}/{1}, it.{2}".format(opt.group,opt.model,i+1))) print(util.toYellow("======= TRAINING DONE =======")) ================================================ FILE: traffic-sign-tensorflow/util.py ================================================ import numpy as np import scipy.misc import tensorflow as tf import os import termcolor def mkdir(path): if not os.path.exists(path): os.mkdir(path) def imread(fname): return scipy.misc.imread(fname)/255.0 def imsave(fname,array): scipy.misc.toimage(array,cmin=0.0,cmax=1.0).save(fname) # convert to colored strings def toRed(content): return termcolor.colored(content,"red",attrs=["bold"]) def toGreen(content): return termcolor.colored(content,"green",attrs=["bold"]) def toBlue(content): return termcolor.colored(content,"blue",attrs=["bold"]) def toCyan(content): return termcolor.colored(content,"cyan",attrs=["bold"]) def toYellow(content): return termcolor.colored(content,"yellow",attrs=["bold"]) def toMagenta(content): return termcolor.colored(content,"magenta",attrs=["bold"]) # make image summary from image batch def imageSummary(opt,image,tag,H,W): blockSize = opt.visBlockSize imageOne = tf.batch_to_space(image[:blockSize**2],crops=[[0,0],[0,0]],block_size=blockSize) imagePermute = tf.reshape(imageOne,[H,blockSize,W,blockSize,-1]) imageTransp = tf.transpose(imagePermute,[1,0,3,2,4]) imageBlocks = tf.reshape(imageTransp,[1,H*blockSize,W*blockSize,-1]) imageBlocks = tf.cast(imageBlocks*255,tf.uint8) summary = tf.summary.image(tag,imageBlocks) return summary # make image summary from image batch (mean/variance) def imageSummaryMeanVar(opt,image,tag,H,W): image = tf.concat([image,np.zeros([2,H,W,3])],axis=0) imageOne = tf.batch_to_space_nd(image,crops=[[0,0],[0,0]],block_shape=[5,9]) imagePermute = tf.reshape(imageOne,[H,5,W,9,-1]) imageTransp = tf.transpose(imagePermute,[1,0,3,2,4]) imageBlocks = tf.reshape(imageTransp,[1,H*5,W*9,-1]) # imageBlocks = tf.cast(imageBlocks*255,tf.uint8) summary = tf.summary.image(tag,imageBlocks) return summary # set optimizer for different learning rates def setOptimizer(opt,loss,lrGP,lrC): varsGP = [v for v in tf.global_variables() if "geometric" in v.name] varsC = [v for v in tf.global_variables() if "classifier" in v.name] gradC = tf.gradients(loss,varsC) optimC = tf.train.GradientDescentOptimizer(lrC).apply_gradients(zip(gradC,varsC)) if len(varsGP)>0: gradGP = tf.gradients(loss,varsGP) optimGP = tf.train.GradientDescentOptimizer(lrGP).apply_gradients(zip(gradGP,varsGP)) optim = tf.group(optimC,optimGP) else: optim = optimC return optim # restore model def restoreModel(opt,sess,saver,it): saver.restore(sess,"models_{0}/{1}_it{2}.ckpt".format(opt.group,opt.model,it,opt.warpN)) # save model def saveModel(opt,sess,saver,it): saver.save(sess,"models_{0}/{1}_it{2}.ckpt".format(opt.group,opt.model,it,opt.warpN)) ================================================ FILE: traffic-sign-tensorflow/warp.py ================================================ import numpy as np import scipy.linalg import tensorflow as tf # fit (affine) warp between two sets of points def fit(Xsrc,Xdst): ptsN = len(Xsrc) X,Y,U,V,O,I = Xsrc[:,0],Xsrc[:,1],Xdst[:,0],Xdst[:,1],np.zeros([ptsN]),np.ones([ptsN]) A = np.concatenate((np.stack([X,Y,I,O,O,O],axis=1), np.stack([O,O,O,X,Y,I],axis=1)),axis=0) b = np.concatenate((U,V),axis=0) p1,p2,p3,p4,p5,p6 = scipy.linalg.lstsq(A,b)[0].squeeze() pMtrx = np.array([[p1,p2,p3],[p4,p5,p6],[0,0,1]],dtype=np.float32) return pMtrx # compute composition of warp parameters def compose(opt,p,dp): with tf.name_scope("compose"): pMtrx = vec2mtrx(opt,p) dpMtrx = vec2mtrx(opt,dp) pMtrxNew = tf.matmul(dpMtrx,pMtrx) pMtrxNew /= pMtrxNew[:,2:3,2:3] pNew = mtrx2vec(opt,pMtrxNew) return pNew # compute inverse of warp parameters def inverse(opt,p): with tf.name_scope("inverse"): pMtrx = vec2mtrx(opt,p) pInvMtrx = tf.matrix_inverse(pMtrx) pInv = mtrx2vec(opt,pInvMtrx) return pInv # convert warp parameters to matrix def vec2mtrx(opt,p): with tf.name_scope("vec2mtrx"): O = tf.zeros([opt.batchSize]) I = tf.ones([opt.batchSize]) if opt.warpType=="translation": tx,ty = tf.unstack(p,axis=1) pMtrx = tf.transpose(tf.stack([[I,O,tx],[O,I,ty],[O,O,I]]),perm=[2,0,1]) if opt.warpType=="similarity": pc,ps,tx,ty = tf.unstack(p,axis=1) pMtrx = tf.transpose(tf.stack([[I+pc,-ps,tx],[ps,I+pc,ty],[O,O,I]]),perm=[2,0,1]) if opt.warpType=="affine": p1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1) pMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[O,O,I]]),perm=[2,0,1]) if opt.warpType=="homography": p1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1) pMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[p7,p8,I]]),perm=[2,0,1]) return pMtrx # convert warp matrix to parameters def mtrx2vec(opt,pMtrx): with tf.name_scope("mtrx2vec"): [row0,row1,row2] = tf.unstack(pMtrx,axis=1) [e00,e01,e02] = tf.unstack(row0,axis=1) [e10,e11,e12] = tf.unstack(row1,axis=1) [e20,e21,e22] = tf.unstack(row2,axis=1) if opt.warpType=="translation": p = tf.stack([e02,e12],axis=1) if opt.warpType=="similarity": p = tf.stack([e00-1,e10,e02,e12],axis=1) if opt.warpType=="affine": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12],axis=1) if opt.warpType=="homography": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],axis=1) return p # warp the image def transformImage(opt,image,pMtrx): with tf.name_scope("transformImage"): refMtrx = tf.tile(tf.expand_dims(opt.refMtrx,axis=0),[opt.batchSize,1,1]) transMtrx = tf.matmul(refMtrx,pMtrx) # warp the canonical coordinates X,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H)) X,Y = X.flatten(),Y.flatten() XYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T XYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32) XYwarpHom = tf.matmul(transMtrx,XYhom) XwarpHom,YwarpHom,ZwarpHom = tf.unstack(XYwarpHom,axis=1) Xwarp = tf.reshape(XwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W]) Ywarp = tf.reshape(YwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W]) # get the integer sampling coordinates Xfloor,Xceil = tf.floor(Xwarp),tf.ceil(Xwarp) Yfloor,Yceil = tf.floor(Ywarp),tf.ceil(Ywarp) XfloorInt,XceilInt = tf.to_int32(Xfloor),tf.to_int32(Xceil) YfloorInt,YceilInt = tf.to_int32(Yfloor),tf.to_int32(Yceil) imageIdx = np.tile(np.arange(opt.batchSize).reshape([opt.batchSize,1,1]),[1,opt.H,opt.W]) imageVec = tf.reshape(image,[-1,int(image.shape[-1])]) imageVecOut = tf.concat([imageVec,tf.zeros([1,int(image.shape[-1])])],axis=0) idxUL = (imageIdx*opt.H+YfloorInt)*opt.W+XfloorInt idxUR = (imageIdx*opt.H+YfloorInt)*opt.W+XceilInt idxBL = (imageIdx*opt.H+YceilInt)*opt.W+XfloorInt idxBR = (imageIdx*opt.H+YceilInt)*opt.W+XceilInt idxOutside = tf.fill([opt.batchSize,opt.H,opt.W],opt.batchSize*opt.H*opt.W) def insideImage(Xint,Yint): return (Xint>=0)&(Xint=0)&(Yint=0)&(Xint=0)&(Yint