[
  {
    "path": ".editorconfig",
    "content": "root = true\n\n[*]\nend_of_line = lf\ninsert_final_newline = true\nindent_style = tab\nindent_size = 4\ntrim_trailing_whitespace = true\n\n[*.md]\ntrim_trailing_whitespace = false\n"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nenv/\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\n*.egg-info/\n.installed.cfg\n*.egg\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*,cover\n.hypothesis/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# IPython Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# dotenv\n.env\n\n# virtualenv\nvenv/\nENV/\n\n# Spyder project settings\n.spyderproject\n\n# Rope project settings\n.ropeproject\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2018 Chen-Hsuan Lin\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "MNIST-pytorch/data.py",
    "content": "import numpy as np\nimport scipy.linalg\nimport os,time\nimport torch\nimport torchvision\n\nimport warp,util\n\n# load MNIST data\ndef loadMNIST(opt,path):\n\tos.makedirs(path,exist_ok=True)\n\ttrainDataset = torchvision.datasets.MNIST(path,train=True,download=True)\n\ttestDataset = torchvision.datasets.MNIST(path,train=False,download=True)\n\ttrainData,testData = {},{}\n\ttrainData[\"image\"] = torch.tensor([np.array(sample[0])/255.0 for sample in trainDataset],dtype=torch.float32)\n\ttestData[\"image\"] = torch.tensor([np.array(sample[0])/255.0 for sample in testDataset],dtype=torch.float32)\n\ttrainData[\"label\"] = torch.tensor([sample[1] for sample in trainDataset])\n\ttestData[\"label\"] = torch.tensor([sample[1] for sample in testDataset])\n\treturn trainData,testData\n\n# generate training batch\ndef genPerturbations(opt):\n\tX = np.tile(opt.canon4pts[:,0],[opt.batchSize,1])\n\tY = np.tile(opt.canon4pts[:,1],[opt.batchSize,1])\n\tO = np.zeros([opt.batchSize,4],dtype=np.float32)\n\tI = np.ones([opt.batchSize,4],dtype=np.float32)\n\tdX = np.random.randn(opt.batchSize,4)*opt.pertScale \\\n\t\t+np.random.randn(opt.batchSize,1)*opt.transScale\n\tdY = np.random.randn(opt.batchSize,4)*opt.pertScale \\\n\t\t+np.random.randn(opt.batchSize,1)*opt.transScale\n\tdX,dY = dX.astype(np.float32),dY.astype(np.float32)\n\t# fit warp parameters to generated displacements\n\tif opt.warpType==\"homography\":\n\t\tA = np.concatenate([np.stack([X,Y,I,O,O,O,-X*(X+dX),-Y*(X+dX)],axis=-1),\n\t\t\t\t\t\t\tnp.stack([O,O,O,X,Y,I,-X*(Y+dY),-Y*(Y+dY)],axis=-1)],axis=1)\n\t\tb = np.expand_dims(np.concatenate([X+dX,Y+dY],axis=1),axis=-1)\n\t\tpPert = np.matmul(np.linalg.inv(A),b).squeeze()\n\t\tpPert -= np.array([1,0,0,0,1,0,0,0])\n\telse:\n\t\tif opt.warpType==\"translation\":\n\t\t\tJ = np.concatenate([np.stack([I,O],axis=-1),\n\t\t\t\t\t\t\t\tnp.stack([O,I],axis=-1)],axis=1)\n\t\tif opt.warpType==\"similarity\":\n\t\t\tJ = np.concatenate([np.stack([X,Y,I,O],axis=-1),\n\t\t\t\t\t\t\t\tnp.stack([-Y,X,O,I],axis=-1)],axis=1)\n\t\tif opt.warpType==\"affine\":\n\t\t\tJ = np.concatenate([np.stack([X,Y,I,O,O,O],axis=-1),\n\t\t\t\t\t\t\t\tnp.stack([O,O,O,X,Y,I],axis=-1)],axis=1)\n\t\tdXY = np.expand_dims(np.concatenate([dX,dY],axis=1),axis=-1)\n\t\tJtransp = np.transpose(J,axes=[0,2,1])\n\t\tpPert = np.matmul(np.linalg.inv(np.matmul(Jtransp,J)),np.matmul(Jtransp,dXY)).squeeze()\n\tpInit = torch.from_numpy(pPert).cuda()\n\treturn pInit\n\n# make training batch\ndef makeBatch(opt,data):\n\tN = len(data[\"image\"])\n\trandIdx = np.random.randint(N,size=[opt.batchSize])\n\tbatch = {\n\t\t\"image\": data[\"image\"][randIdx].cuda(),\n\t\t\"label\": data[\"label\"][randIdx].cuda(),\n\t}\n\treturn batch\n\n# evaluation on test set\ndef evalTest(opt,data,geometric,classifier):\n\tgeometric.eval()\n\tclassifier.eval()\n\tN = len(data[\"image\"])\n\tbatchN = int(np.ceil(N/opt.batchSize))\n\twarped = [{},{}]\n\tcount = 0\n\tfor b in range(batchN):\n\t\t# use some dummy data (0) as batch filler if necessary\n\t\tif b!=batchN-1:\n\t\t\trealIdx = np.arange(opt.batchSize*b,opt.batchSize*(b+1))\n\t\telse:\n\t\t\trealIdx = np.arange(opt.batchSize*b,N)\n\t\tidx = np.zeros([opt.batchSize],dtype=int)\n\t\tidx[:len(realIdx)] = realIdx\n\t\t# make training batch\n\t\timage = data[\"image\"][idx].cuda()\n\t\tlabel = data[\"label\"][idx].cuda()\n\t\timage.data.unsqueeze_(dim=1)\n\t\t# generate perturbation\n\t\tpInit = genPerturbations(opt)\n\t\tpInitMtrx = warp.vec2mtrx(opt,pInit)\n\t\timagePert = warp.transformImage(opt,image,pInitMtrx)\n\t\timageWarpAll = geometric(opt,image,pInit) if opt.netType==\"IC-STN\" else geometric(opt,imagePert)\n\t\timageWarp = imageWarpAll[-1]\n\t\toutput = classifier(opt,imageWarp)\n\t\t_,pred = output.max(dim=1)\n\t\tcount += int((pred==label).sum().cpu().numpy())\n\t\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\n\t\t\timgPert = imagePert.detach().cpu().numpy()\n\t\t\timgWarp = imageWarp.detach().cpu().numpy()\n\t\t\tfor i in range(len(realIdx)):\n\t\t\t\tl = data[\"label\"][idx[i]].item()\n\t\t\t\tif l not in warped[0]: warped[0][l] = []\n\t\t\t\tif l not in warped[1]: warped[1][l] = []\n\t\t\t\twarped[0][l].append(imgPert[i])\n\t\t\t\twarped[1][l].append(imgWarp[i])\n\taccuracy = float(count)/N\n\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\n\t\tmean = [np.array([np.mean(warped[0][l],axis=0) for l in warped[0]]),\n\t\t\t\tnp.array([np.mean(warped[1][l],axis=0) for l in warped[1]])]\n\t\tvar = [np.array([np.var(warped[0][l],axis=0) for l in warped[0]]),\n\t\t\t   np.array([np.var(warped[1][l],axis=0) for l in warped[1]])]\n\telse: mean,var = None,None\n\tgeometric.train()\n\tclassifier.train()\n\treturn accuracy,mean,var\n"
  },
  {
    "path": "MNIST-pytorch/graph.py",
    "content": "import numpy as np\nimport torch\nimport time\nimport data,warp,util\n\n# build classification network\nclass FullCNN(torch.nn.Module):\n\tdef __init__(self,opt):\n\t\tsuper(FullCNN,self).__init__()\n\t\tself.inDim = 1\n\t\tdef conv2Layer(outDim):\n\t\t\tconv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[3,3],stride=1,padding=0)\n\t\t\tself.inDim = outDim\n\t\t\treturn conv\n\t\tdef linearLayer(outDim):\n\t\t\tfc = torch.nn.Linear(self.inDim,outDim)\n\t\t\tself.inDim = outDim\n\t\t\treturn fc\n\t\tdef maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2)\n\t\tself.conv2Layers = torch.nn.Sequential(\n\t\t\tconv2Layer(3),torch.nn.ReLU(True),\n\t\t\tconv2Layer(6),torch.nn.ReLU(True),maxpoolLayer(),\n\t\t\tconv2Layer(9),torch.nn.ReLU(True),\n\t\t\tconv2Layer(12),torch.nn.ReLU(True)\n\t\t)\n\t\tself.inDim *= 8**2\n\t\tself.linearLayers = torch.nn.Sequential(\n\t\t\tlinearLayer(48),torch.nn.ReLU(True),\n\t\t\tlinearLayer(opt.labelN)\n\t\t)\n\t\tinitialize(opt,self,opt.stdC)\n\tdef forward(self,opt,image):\n\t\tfeat = image\n\t\tfeat = self.conv2Layers(feat).reshape(opt.batchSize,-1)\n\t\tfeat = self.linearLayers(feat)\n\t\toutput = feat\n\t\treturn output\n\n# build classification network\nclass CNN(torch.nn.Module):\n\tdef __init__(self,opt):\n\t\tsuper(CNN,self).__init__()\n\t\tself.inDim = 1\n\t\tdef conv2Layer(outDim):\n\t\t\tconv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[9,9],stride=1,padding=0)\n\t\t\tself.inDim = outDim\n\t\t\treturn conv\n\t\tdef linearLayer(outDim):\n\t\t\tfc = torch.nn.Linear(self.inDim,outDim)\n\t\t\tself.inDim = outDim\n\t\t\treturn fc\n\t\tdef maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2)\n\t\tself.conv2Layers = torch.nn.Sequential(\n\t\t\tconv2Layer(3),torch.nn.ReLU(True)\n\t\t)\n\t\tself.inDim *= 20**2\n\t\tself.linearLayers = torch.nn.Sequential(\n\t\t\tlinearLayer(opt.labelN)\n\t\t)\n\t\tinitialize(opt,self,opt.stdC)\n\tdef forward(self,opt,image):\n\t\tfeat = image\n\t\tfeat = self.conv2Layers(feat).reshape(opt.batchSize,-1)\n\t\tfeat = self.linearLayers(feat)\n\t\toutput = feat\n\t\treturn output\n\n# an identity class to skip geometric predictors\nclass Identity(torch.nn.Module):\n\tdef __init__(self): super(Identity,self).__init__()\n\tdef forward(self,opt,feat): return [feat]\n\n# build Spatial Transformer Network\nclass STN(torch.nn.Module):\n\tdef __init__(self,opt):\n\t\tsuper(STN,self).__init__()\n\t\tself.inDim = 1\n\t\tdef conv2Layer(outDim):\n\t\t\tconv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[7,7],stride=1,padding=0)\n\t\t\tself.inDim = outDim\n\t\t\treturn conv\n\t\tdef linearLayer(outDim):\n\t\t\tfc = torch.nn.Linear(self.inDim,outDim)\n\t\t\tself.inDim = outDim\n\t\t\treturn fc\n\t\tdef maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2)\n\t\tself.conv2Layers = torch.nn.Sequential(\n\t\t\tconv2Layer(4),torch.nn.ReLU(True),\n\t\t\tconv2Layer(8),torch.nn.ReLU(True),maxpoolLayer()\n\t\t)\n\t\tself.inDim *= 8**2\n\t\tself.linearLayers = torch.nn.Sequential(\n\t\t\tlinearLayer(48),torch.nn.ReLU(True),\n\t\t\tlinearLayer(opt.warpDim)\n\t\t)\n\t\tinitialize(opt,self,opt.stdGP,last0=True)\n\tdef forward(self,opt,image):\n\t\timageWarpAll = [image]\n\t\tfeat = image\n\t\tfeat = self.conv2Layers(feat).reshape(opt.batchSize,-1)\n\t\tfeat = self.linearLayers(feat)\n\t\tp = feat\n\t\tpMtrx = warp.vec2mtrx(opt,p)\n\t\timageWarp = warp.transformImage(opt,image,pMtrx)\n\t\timageWarpAll.append(imageWarp)\n\t\treturn imageWarpAll\n\n# build Inverse Compositional STN\nclass ICSTN(torch.nn.Module):\n\tdef __init__(self,opt):\n\t\tsuper(ICSTN,self).__init__()\n\t\tself.inDim = 1\n\t\tdef conv2Layer(outDim):\n\t\t\tconv = torch.nn.Conv2d(self.inDim,outDim,kernel_size=[7,7],stride=1,padding=0)\n\t\t\tself.inDim = outDim\n\t\t\treturn conv\n\t\tdef linearLayer(outDim):\n\t\t\tfc = torch.nn.Linear(self.inDim,outDim)\n\t\t\tself.inDim = outDim\n\t\t\treturn fc\n\t\tdef maxpoolLayer(): return torch.nn.MaxPool2d([2,2],stride=2)\n\t\tself.conv2Layers = torch.nn.Sequential(\n\t\t\tconv2Layer(4),torch.nn.ReLU(True),\n\t\t\tconv2Layer(8),torch.nn.ReLU(True),maxpoolLayer()\n\t\t)\n\t\tself.inDim *= 8**2\n\t\tself.linearLayers = torch.nn.Sequential(\n\t\t\tlinearLayer(48),torch.nn.ReLU(True),\n\t\t\tlinearLayer(opt.warpDim)\n\t\t)\n\t\tinitialize(opt,self,opt.stdGP,last0=True)\n\tdef forward(self,opt,image,p):\n\t\timageWarpAll = []\n\t\tfor l in range(opt.warpN):\n\t\t\tpMtrx = warp.vec2mtrx(opt,p)\n\t\t\timageWarp = warp.transformImage(opt,image,pMtrx)\n\t\t\timageWarpAll.append(imageWarp)\n\t\t\tfeat = imageWarp\n\t\t\tfeat = self.conv2Layers(feat).reshape(opt.batchSize,-1)\n\t\t\tfeat = self.linearLayers(feat)\n\t\t\tdp = feat\n\t\t\tp = warp.compose(opt,p,dp)\n\t\tpMtrx = warp.vec2mtrx(opt,p)\n\t\timageWarp = warp.transformImage(opt,image,pMtrx)\n\t\timageWarpAll.append(imageWarp)\n\t\treturn imageWarpAll\n\n# initialize weights/biases\ndef initialize(opt,model,stddev,last0=False):\n\tfor m in model.conv2Layers:\n\t\tif isinstance(m,torch.nn.Conv2d):\n\t\t\tm.weight.data.normal_(0,stddev)\n\t\t\tm.bias.data.normal_(0,stddev)\n\tfor m in model.linearLayers:\n\t\tif isinstance(m,torch.nn.Linear):\n\t\t\tif last0 and m is model.linearLayers[-1]:\n\t\t\t\tm.weight.data.zero_()\n\t\t\t\tm.bias.data.zero_()\n\t\t\telse:\n\t\t\t\tm.weight.data.normal_(0,stddev)\n\t\t\t\tm.bias.data.normal_(0,stddev)\n"
  },
  {
    "path": "MNIST-pytorch/options.py",
    "content": "import numpy as np\nimport argparse\nimport warp\nimport util\nimport torch\n\ndef set(training):\n\n\t# parse input arguments\n\tparser = argparse.ArgumentParser()\n\tparser.add_argument(\"netType\",\t\tchoices=[\"CNN\",\"STN\",\"IC-STN\"],\t\thelp=\"type of network\")\n\tparser.add_argument(\"--group\",\t\t\t\t\tdefault=\"0\",\t\t\thelp=\"name for group\")\n\tparser.add_argument(\"--model\",\t\t\t\t\tdefault=\"test\",\t\t\thelp=\"name for model instance\")\n\tparser.add_argument(\"--size\",\t\t\t\t\tdefault=\"28x28\",\t\thelp=\"image resolution\")\n\tparser.add_argument(\"--warpType\",\t\t\t\tdefault=\"homography\",\thelp=\"type of warp function on images\",\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tchoices=[\"translation\",\"similarity\",\"affine\",\"homography\"])\n\tparser.add_argument(\"--warpN\",\t\ttype=int,\tdefault=4,\t\t\t\thelp=\"number of recurrent transformations (for IC-STN)\")\n\tparser.add_argument(\"--stdC\",\t\ttype=float,\tdefault=0.1,\t\t\thelp=\"initialization stddev (classification network)\")\n\tparser.add_argument(\"--stdGP\",\t\ttype=float,\tdefault=0.1,\t\t\thelp=\"initialization stddev (geometric predictor)\")\n\tparser.add_argument(\"--pertScale\",\ttype=float,\tdefault=0.25,\t\t\thelp=\"initial perturbation scale\")\n\tparser.add_argument(\"--transScale\",\ttype=float,\tdefault=0.25,\t\t\thelp=\"initial translation scale\")\n\tif training: # training\n\t\tparser.add_argument(\"--port\",\t\ttype=int,\tdefault=8097,\thelp=\"port number for visdom visualization\")\n\t\tparser.add_argument(\"--batchSize\",\ttype=int,\tdefault=100,\thelp=\"batch size for SGD\")\n\t\tparser.add_argument(\"--lrC\",\t\ttype=float,\tdefault=1e-2,\thelp=\"learning rate (classification network)\")\n\t\tparser.add_argument(\"--lrGP\",\t\ttype=float,\tdefault=None,\thelp=\"learning rate (geometric predictor)\")\n\t\tparser.add_argument(\"--lrDecay\",\ttype=float,\tdefault=1.0,\thelp=\"learning rate decay\")\n\t\tparser.add_argument(\"--lrStep\",\t\ttype=int,\tdefault=100000,\thelp=\"learning rate decay step size\")\n\t\tparser.add_argument(\"--fromIt\",\t\ttype=int,\tdefault=0,\t\thelp=\"resume training from iteration number\")\n\t\tparser.add_argument(\"--toIt\",\t\ttype=int,\tdefault=500000,\thelp=\"run training to iteration number\")\n\telse: # evaluation\n\t\tparser.add_argument(\"--batchSize\",\ttype=int,\tdefault=1,\t\thelp=\"batch size for evaluation\")\n\topt = parser.parse_args()\n\n\tif opt.lrGP is None: opt.lrGP = 0 if opt.netType==\"CNN\" else \\\n\t\t\t\t\t\t\t\t\t1e-2 if opt.netType==\"STN\" else \\\n\t\t\t\t\t\t\t\t\t1e-4 if opt.netType==\"IC-STN\" else None\n\n\t# --- below are automatically set ---\n\tassert(torch.cuda.is_available()) # support only training on GPU for now\n\ttorch.set_default_tensor_type(\"torch.cuda.FloatTensor\")\n\topt.training = training\n\topt.H,opt.W = [int(x) for x in opt.size.split(\"x\")]\n\topt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize)))\n\topt.warpDim = 2 if opt.warpType == \"translation\" else \\\n\t\t\t\t  4 if opt.warpType == \"similarity\" else \\\n\t\t\t\t  6 if opt.warpType == \"affine\" else \\\n\t\t\t\t  8 if opt.warpType == \"homography\" else None\n\topt.labelN = 10\n\topt.canon4pts = np.array([[-1,-1],[-1,1],[1,1],[1,-1]],dtype=np.float32)\n\topt.image4pts = np.array([[0,0],[0,opt.H-1],[opt.W-1,opt.H-1],[opt.W-1,0]],dtype=np.float32)\n\topt.refMtrx = np.eye(3).astype(np.float32)\n\tif opt.netType==\"STN\": opt.warpN = 1\n\n\tprint(\"({0}) {1}\".format(\n\t\tutil.toGreen(\"{0}\".format(opt.group)),\n\t\tutil.toGreen(\"{0}\".format(opt.model))))\n\tprint(\"------------------------------------------\")\n\tprint(\"network type: {0}, recurrent warps: {1}\".format(\n\t\tutil.toYellow(\"{0}\".format(opt.netType)),\n\t\tutil.toYellow(\"{0}\".format(opt.warpN if opt.netType==\"IC-STN\" else \"X\"))))\n\tprint(\"batch size: {0}, image size: {1}x{2}\".format(\n\t\tutil.toYellow(\"{0}\".format(opt.batchSize)),\n\t\tutil.toYellow(\"{0}\".format(opt.H)),\n\t\tutil.toYellow(\"{0}\".format(opt.W))))\n\tprint(\"warpScale: (pert) {0} (trans) {1}\".format(\n\t\tutil.toYellow(\"{0}\".format(opt.pertScale)),\n\t\tutil.toYellow(\"{0}\".format(opt.transScale))))\n\tif training:\n\t\tprint(\"[geometric predictor]    stddev={0}, lr={1}\".format(\n\t\t\tutil.toYellow(\"{0:.0e}\".format(opt.stdGP)),\n\t\t\tutil.toYellow(\"{0:.0e}\".format(opt.lrGP))))\n\t\tprint(\"[classification network] stddev={0}, lr={1}\".format(\n\t\t\tutil.toYellow(\"{0:.0e}\".format(opt.stdC)),\n\t\t\tutil.toYellow(\"{0:.0e}\".format(opt.lrC))))\n\tprint(\"------------------------------------------\")\n\tif training:\n\t\tprint(util.toMagenta(\"training model ({0}) {1}...\".format(opt.group,opt.model)))\n\n\treturn opt\n"
  },
  {
    "path": "MNIST-pytorch/train.py",
    "content": "import numpy as np\r\nimport time,os,sys\r\nimport argparse\r\nimport util\r\n\r\nprint(util.toYellow(\"=======================================================\"))\r\nprint(util.toYellow(\"train.py (training on MNIST)\"))\r\nprint(util.toYellow(\"=======================================================\"))\r\n\r\nimport torch\r\nimport data,graph,warp,util\r\nimport options\r\n\r\nprint(util.toMagenta(\"setting configurations...\"))\r\nopt = options.set(training=True)\r\n\r\n# create directories for model output\r\nutil.mkdir(\"models_{0}\".format(opt.group))\r\n\r\nprint(util.toMagenta(\"building network...\"))\r\nwith torch.cuda.device(0):\r\n\t# ------ build network ------\r\n\tif opt.netType==\"CNN\":\r\n\t\tgeometric = graph.Identity()\r\n\t\tclassifier = graph.FullCNN(opt)\r\n\telif opt.netType==\"STN\":\r\n\t\tgeometric = graph.STN(opt)\r\n\t\tclassifier = graph.CNN(opt)\r\n\telif opt.netType==\"IC-STN\":\r\n\t\tgeometric = graph.ICSTN(opt)\r\n\t\tclassifier = graph.CNN(opt)\r\n\t# ------ define loss ------\r\n\tloss = torch.nn.CrossEntropyLoss()\r\n\t# ------ optimizer ------\r\n\toptimList = [{ \"params\": geometric.parameters(), \"lr\": opt.lrGP },\r\n\t\t\t\t { \"params\": classifier.parameters(), \"lr\": opt.lrC }]\r\n\toptim = torch.optim.SGD(optimList)\r\n\r\n# load data\r\nprint(util.toMagenta(\"loading MNIST dataset...\"))\r\ntrainData,testData = data.loadMNIST(opt,\"data\")\r\n\r\n# visdom visualizer\r\nvis = util.Visdom(opt)\r\n\r\nprint(util.toYellow(\"======= TRAINING START =======\"))\r\ntimeStart = time.time()\r\n# start session\r\nwith torch.cuda.device(0):\r\n\tgeometric.train()\r\n\tclassifier.train()\r\n\tif opt.fromIt!=0:\r\n\t\tutil.restoreModel(opt,geometric,classifier,opt.fromIt)\r\n\t\tprint(util.toMagenta(\"resuming from iteration {0}...\".format(opt.fromIt)))\r\n\tprint(util.toMagenta(\"start training...\"))\r\n\r\n\t# training loop\r\n\tfor i in range(opt.fromIt,opt.toIt):\r\n\t\tlrGP = opt.lrGP*opt.lrDecay**(i//opt.lrStep)\r\n\t\tlrC = opt.lrC*opt.lrDecay**(i//opt.lrStep)\r\n\t\t# make training batch\r\n\t\tbatch = data.makeBatch(opt,trainData)\r\n\t\timage = batch[\"image\"].unsqueeze(dim=1)\r\n\t\tlabel = batch[\"label\"]\r\n\t\t# generate perturbation\r\n\t\tpInit = data.genPerturbations(opt)\r\n\t\tpInitMtrx = warp.vec2mtrx(opt,pInit)\r\n\t\t# forward/backprop through network\r\n\t\toptim.zero_grad()\r\n\t\timagePert = warp.transformImage(opt,image,pInitMtrx)\r\n\t\timageWarpAll = geometric(opt,image,pInit) if opt.netType==\"IC-STN\" else geometric(opt,imagePert)\r\n\t\timageWarp = imageWarpAll[-1]\r\n\t\toutput = classifier(opt,imageWarp)\r\n\t\ttrain_loss = loss(output,label)\r\n\t\ttrain_loss.backward()\r\n\t\t# run one step\r\n\t\toptim.step()\r\n\t\tif (i+1)%100==0:\r\n\t\t\tprint(\"it. {0}/{1}  lr={3}(GP),{4}(C), loss={5}, time={2}\"\r\n\t\t\t\t.format(util.toCyan(\"{0}\".format(i+1)),\r\n\t\t\t\t\t\topt.toIt,\r\n\t\t\t\t\t\tutil.toGreen(\"{0:.2f}\".format(time.time()-timeStart)),\r\n\t\t\t\t\t\tutil.toYellow(\"{0:.0e}\".format(lrGP)),\r\n\t\t\t\t\t\tutil.toYellow(\"{0:.0e}\".format(lrC)),\r\n\t\t\t\t\t\tutil.toRed(\"{0:.4f}\".format(train_loss))))\r\n\t\tif (i+1)%200==0: vis.trainLoss(opt,i+1,train_loss)\r\n\t\tif (i+1)%1000==0:\r\n\t\t\t# evaluate on test set\r\n\t\t\ttestAcc,testMean,testVar = data.evalTest(opt,testData,geometric,classifier)\r\n\t\t\ttestError = (1-testAcc)*100\r\n\t\t\tvis.testLoss(opt,i+1,testError)\r\n\t\t\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\r\n\t\t\t\tvis.meanVar(opt,testMean,testVar)\r\n\t\tif (i+1)%10000==0:\r\n\t\t\tutil.saveModel(opt,geometric,classifier,i+1)\r\n\t\t\tprint(util.toGreen(\"model saved: {0}/{1}, it.{2}\".format(opt.group,opt.model,i+1)))\r\n\r\nprint(util.toYellow(\"======= TRAINING DONE =======\"))\r\n"
  },
  {
    "path": "MNIST-pytorch/util.py",
    "content": "import numpy as np\nimport scipy.misc\nimport torch\nimport os\nimport termcolor\nimport visdom\n\ndef mkdir(path):\n\tif not os.path.exists(path): os.mkdir(path)\ndef imread(fname):\n\treturn scipy.misc.imread(fname)/255.0\ndef imsave(fname,array):\n\tscipy.misc.toimage(array,cmin=0.0,cmax=1.0).save(fname)\n\n# convert to colored strings\ndef toRed(content): return termcolor.colored(content,\"red\",attrs=[\"bold\"])\ndef toGreen(content): return termcolor.colored(content,\"green\",attrs=[\"bold\"])\ndef toBlue(content): return termcolor.colored(content,\"blue\",attrs=[\"bold\"])\ndef toCyan(content): return termcolor.colored(content,\"cyan\",attrs=[\"bold\"])\ndef toYellow(content): return termcolor.colored(content,\"yellow\",attrs=[\"bold\"])\ndef toMagenta(content): return termcolor.colored(content,\"magenta\",attrs=[\"bold\"])\n\n# restore model\ndef restoreModel(opt,geometric,classifier,it):\n\tgeometric.load_state_dict(torch.load(\"models_{0}/{1}_it{2}_GP.npy\".format(opt.group,opt.model,it)))\n\tclassifier.load_state_dict(torch.load(\"models_{0}/{1}_it{2}_C.npy\".format(opt.group,opt.model,it)))\n# save model\ndef saveModel(opt,geometric,classifier,it):\n\ttorch.save(geometric.state_dict(),\"models_{0}/{1}_it{2}_GP.npy\".format(opt.group,opt.model,it))\n\ttorch.save(classifier.state_dict(),\"models_{0}/{1}_it{2}_C.npy\".format(opt.group,opt.model,it))\n\nclass Visdom():\n\tdef __init__(self,opt):\n\t\tself.vis = visdom.Visdom(port=opt.port,use_incoming_socket=False)\n\t\tself.trainLossInit = True\n\t\tself.testLossInit = True\n\t\tself.meanVarInit = True\n\tdef tileImages(self,opt,images,H,W,HN,WN):\n\t\tassert(len(images)==HN*WN)\n\t\timages = images.reshape([HN,WN,-1,H,W])\n\t\timages = [list(i) for i in images]\n\t\timageBlocks = np.concatenate([np.concatenate(row,axis=2) for row in images],axis=1)\n\t\treturn imageBlocks\n\tdef trainLoss(self,opt,it,loss):\n\t\tloss = float(loss.detach().cpu().numpy())\n\t\tif self.trainLossInit:\n\t\t\tself.vis.line(Y=np.array([loss]),X=np.array([it]),win=\"{0}_trainloss\".format(opt.model),\n\t\t\t\t\t\t  opts={ \"title\": \"{0} (TRAIN_loss)\".format(opt.model) })\n\t\t\tself.trainLossInit = False\n\t\telse: self.vis.line(Y=np.array([loss]),X=np.array([it]),win=opt.model+\"_trainloss\",update=\"append\")\n\tdef testLoss(self,opt,it,loss):\n\t\tif self.testLossInit:\n\t\t\tself.vis.line(Y=np.array([loss]),X=np.array([it]),win=\"{0}_testloss\".format(opt.model),\n\t\t\t\t\t\t  opts={ \"title\": \"{0} (TEST_error)\".format(opt.model) })\n\t\t\tself.testLossInit = False\n\t\telse: self.vis.line(Y=np.array([loss]),X=np.array([it]),win=opt.model+\"_testloss\",update=\"append\")\n\tdef meanVar(self,opt,mean,var):\n\t\tmean = [self.tileImages(opt,m,opt.H,opt.W,1,10) for m in mean]\n\t\tvar = [self.tileImages(opt,v,opt.H,opt.W,1,10)*3 for v in var]\n\t\tself.vis.image(mean[0].clip(0,1),win=\"{0}_meaninit\".format(opt.model), opts={ \"title\": \"{0} (TEST_mean_init)\".format(opt.model) })\n\t\tself.vis.image(mean[1].clip(0,1),win=\"{0}_meanwarped\".format(opt.model), opts={ \"title\": \"{0} (TEST_mean_warped)\".format(opt.model) })\n\t\tself.vis.image(var[0].clip(0,1),win=\"{0}_varinit\".format(opt.model), opts={ \"title\": \"{0} (TEST_var_init)\".format(opt.model) })\n\t\tself.vis.image(var[1].clip(0,1),win=\"{0}_varwarped\".format(opt.model), opts={ \"title\": \"{0} (TEST_var_warped)\".format(opt.model) })\n\n"
  },
  {
    "path": "MNIST-pytorch/warp.py",
    "content": "import numpy as np\nimport scipy.linalg\nimport torch\n\nimport util\n\n# fit (affine) warp between two sets of points \ndef fit(Xsrc,Xdst):\n\tptsN = len(Xsrc)\n\tX,Y,U,V,O,I = Xsrc[:,0],Xsrc[:,1],Xdst[:,0],Xdst[:,1],np.zeros([ptsN]),np.ones([ptsN])\n\tA = np.concatenate((np.stack([X,Y,I,O,O,O],axis=1),\n\t\t\t\t\t\tnp.stack([O,O,O,X,Y,I],axis=1)),axis=0)\n\tb = np.concatenate((U,V),axis=0)\n\tp1,p2,p3,p4,p5,p6 = scipy.linalg.lstsq(A,b)[0].squeeze()\n\tpMtrx = np.array([[p1,p2,p3],[p4,p5,p6],[0,0,1]],dtype=torch.float32)\n\treturn pMtrx\n\n# compute composition of warp parameters\ndef compose(opt,p,dp):\n\tpMtrx = vec2mtrx(opt,p)\n\tdpMtrx = vec2mtrx(opt,dp)\n\tpMtrxNew = dpMtrx.matmul(pMtrx)\n\tpMtrxNew = pMtrxNew/pMtrxNew[:,2:3,2:3]\n\tpNew = mtrx2vec(opt,pMtrxNew)\n\treturn pNew\n\n# compute inverse of warp parameters\ndef inverse(opt,p):\n\tpMtrx = vec2mtrx(opt,p)\n\tpInvMtrx = pMtrx.inverse()\n\tpInv = mtrx2vec(opt,pInvMtrx)\n\treturn pInv\n\n# convert warp parameters to matrix\ndef vec2mtrx(opt,p):\n\tO = torch.zeros(opt.batchSize,dtype=torch.float32).cuda()\n\tI = torch.ones(opt.batchSize,dtype=torch.float32).cuda()\n\tif opt.warpType==\"translation\":\n\t\ttx,ty = torch.unbind(p,dim=1)\n\t\tpMtrx = torch.stack([torch.stack([I,O,tx],dim=-1),\n\t\t\t\t\t\t\t torch.stack([O,I,ty],dim=-1),\n\t\t\t\t\t\t\t torch.stack([O,O,I],dim=-1)],dim=1)\n\tif opt.warpType==\"similarity\":\n\t\tpc,ps,tx,ty = torch.unbind(p,dim=1)\n\t\tpMtrx = torch.stack([torch.stack([I+pc,-ps,tx],dim=-1),\n\t\t\t\t\t\t\t torch.stack([ps,I+pc,ty],dim=-1),\n\t\t\t\t\t\t\t torch.stack([O,O,I],dim=-1)],dim=1)\n\tif opt.warpType==\"affine\":\n\t\tp1,p2,p3,p4,p5,p6 = torch.unbind(p,dim=1)\n\t\tpMtrx = torch.stack([torch.stack([I+p1,p2,p3],dim=-1),\n\t\t\t\t\t\t\t torch.stack([p4,I+p5,p6],dim=-1),\n\t\t\t\t\t\t\t torch.stack([O,O,I],dim=-1)],dim=1)\n\tif opt.warpType==\"homography\":\n\t\tp1,p2,p3,p4,p5,p6,p7,p8 = torch.unbind(p,dim=1)\n\t\tpMtrx = torch.stack([torch.stack([I+p1,p2,p3],dim=-1),\n\t\t\t\t\t\t\t torch.stack([p4,I+p5,p6],dim=-1),\n\t\t\t\t\t\t\t torch.stack([p7,p8,I],dim=-1)],dim=1)\n\treturn pMtrx\n\n# convert warp matrix to parameters\ndef mtrx2vec(opt,pMtrx):\n\t[row0,row1,row2] = torch.unbind(pMtrx,dim=1)\n\t[e00,e01,e02] = torch.unbind(row0,dim=1)\n\t[e10,e11,e12] = torch.unbind(row1,dim=1)\n\t[e20,e21,e22] = torch.unbind(row2,dim=1)\n\tif opt.warpType==\"translation\": p = torch.stack([e02,e12],dim=1)\n\tif opt.warpType==\"similarity\": p = torch.stack([e00-1,e10,e02,e12],dim=1)\n\tif opt.warpType==\"affine\": p = torch.stack([e00-1,e01,e02,e10,e11-1,e12],dim=1)\n\tif opt.warpType==\"homography\": p = torch.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],dim=1)\n\treturn p\n\n# warp the image\ndef transformImage(opt,image,pMtrx):\n\trefMtrx = torch.from_numpy(opt.refMtrx).cuda()\n\trefMtrx = refMtrx.repeat(opt.batchSize,1,1)\n\ttransMtrx = refMtrx.matmul(pMtrx)\n\t# warp the canonical coordinates\n\tX,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H))\n\tX,Y = X.flatten(),Y.flatten()\n\tXYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T\n\tXYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32)\n\tXYhom = torch.from_numpy(XYhom).cuda()\n\tXYwarpHom = transMtrx.matmul(XYhom)\n\tXwarpHom,YwarpHom,ZwarpHom = torch.unbind(XYwarpHom,dim=1)\n\tXwarp = (XwarpHom/(ZwarpHom+1e-8)).reshape(opt.batchSize,opt.H,opt.W)\n\tYwarp = (YwarpHom/(ZwarpHom+1e-8)).reshape(opt.batchSize,opt.H,opt.W)\n\tgrid = torch.stack([Xwarp,Ywarp],dim=-1)\n\t# sampling with bilinear interpolation\n\timageWarp = torch.nn.functional.grid_sample(image,grid,mode=\"bilinear\")\n\treturn imageWarp\n"
  },
  {
    "path": "MNIST-tensorflow/data.py",
    "content": "import numpy as np\nimport scipy.linalg\nimport os,time\nimport tensorflow as tf\n\nimport warp\n\n# load MNIST data\ndef loadMNIST(fname):\n\tif not os.path.exists(fname):\n\t\t# download and preprocess MNIST dataset\n\t\tfrom tensorflow.examples.tutorials.mnist import input_data\n\t\tmnist = input_data.read_data_sets(\"MNIST_data/\",one_hot=True)\n\t\ttrainData,validData,testData = {},{},{}\n\t\ttrainData[\"image\"] = mnist.train.images.reshape([-1,28,28]).astype(np.float32)\n\t\tvalidData[\"image\"] = mnist.validation.images.reshape([-1,28,28]).astype(np.float32)\n\t\ttestData[\"image\"] = mnist.test.images.reshape([-1,28,28]).astype(np.float32)\n\t\ttrainData[\"label\"] = np.argmax(mnist.train.labels.astype(np.float32),axis=1)\n\t\tvalidData[\"label\"] = np.argmax(mnist.validation.labels.astype(np.float32),axis=1)\n\t\ttestData[\"label\"] = np.argmax(mnist.test.labels.astype(np.float32),axis=1)\n\t\tos.makedirs(os.path.dirname(fname))\n\t\tnp.savez(fname,train=trainData,valid=validData,test=testData)\n\t\tos.system(\"rm -rf MNIST_data\")\n\tMNIST = np.load(fname)\n\ttrainData = MNIST[\"train\"].item()\n\tvalidData = MNIST[\"valid\"].item()\n\ttestData = MNIST[\"test\"].item()\n\treturn trainData,validData,testData\n\n# generate training batch\ndef genPerturbations(opt):\n\twith tf.name_scope(\"genPerturbations\"):\n\t\tX = np.tile(opt.canon4pts[:,0],[opt.batchSize,1])\n\t\tY = np.tile(opt.canon4pts[:,1],[opt.batchSize,1])\n\t\tdX = tf.random_normal([opt.batchSize,4])*opt.pertScale \\\n\t\t\t+tf.random_normal([opt.batchSize,1])*opt.transScale\n\t\tdY = tf.random_normal([opt.batchSize,4])*opt.pertScale \\\n\t\t\t+tf.random_normal([opt.batchSize,1])*opt.transScale\n\t\tO = np.zeros([opt.batchSize,4],dtype=np.float32)\n\t\tI = np.ones([opt.batchSize,4],dtype=np.float32)\n\t\t# fit warp parameters to generated displacements\n\t\tif opt.warpType==\"homography\":\n\t\t\tA = tf.concat([tf.stack([X,Y,I,O,O,O,-X*(X+dX),-Y*(X+dX)],axis=-1),\n\t\t\t\t\t\t   tf.stack([O,O,O,X,Y,I,-X*(Y+dY),-Y*(Y+dY)],axis=-1)],1)\n\t\t\tb = tf.expand_dims(tf.concat([X+dX,Y+dY],1),-1)\n\t\t\tpPert = tf.matrix_solve(A,b)[:,:,0]\n\t\t\tpPert -= tf.to_float([[1,0,0,0,1,0,0,0]])\n\t\telse:\n\t\t\tif opt.warpType==\"translation\":\n\t\t\t\tJ = np.concatenate([np.stack([I,O],axis=-1),\n\t\t\t\t\t\t\t\t\tnp.stack([O,I],axis=-1)],axis=1)\n\t\t\tif opt.warpType==\"similarity\":\n\t\t\t\tJ = np.concatenate([np.stack([X,Y,I,O],axis=-1),\n\t\t\t\t\t\t\t\t\tnp.stack([-Y,X,O,I],axis=-1)],axis=1)\n\t\t\tif opt.warpType==\"affine\":\n\t\t\t\tJ = np.concatenate([np.stack([X,Y,I,O,O,O],axis=-1),\n\t\t\t\t\t\t\t\t\tnp.stack([O,O,O,X,Y,I],axis=-1)],axis=1)\n\t\t\tdXY = tf.expand_dims(tf.concat([dX,dY],1),-1)\n\t\t\tpPert = tf.matrix_solve_ls(J,dXY)[:,:,0]\n\treturn pPert\n\n# make training batch\ndef makeBatch(opt,data,PH):\n\tN = len(data[\"image\"])\n\trandIdx = np.random.randint(N,size=[opt.batchSize])\n\t# put data in placeholders\n\t[image,label] = PH\n\tbatch = {\n\t\timage: data[\"image\"][randIdx],\n\t\tlabel: data[\"label\"][randIdx],\n\t}\n\treturn batch\n\n# evaluation on test set\ndef evalTest(opt,sess,data,PH,prediction,imagesEval=[]):\n\tN = len(data[\"image\"])\n\t# put data in placeholders\n\t[image,label] = PH\n\tbatchN = int(np.ceil(N/opt.batchSize))\n\twarped = [{},{}]\n\tcount = 0\n\tfor b in range(batchN):\n\t\t# use some dummy data (0) as batch filler if necessary\n\t\tif b!=batchN-1:\n\t\t\trealIdx = np.arange(opt.batchSize*b,opt.batchSize*(b+1))\n\t\telse:\n\t\t\trealIdx = np.arange(opt.batchSize*b,N)\n\t\tidx = np.zeros([opt.batchSize],dtype=int)\n\t\tidx[:len(realIdx)] = realIdx\n\t\tbatch = {\n\t\t\timage: data[\"image\"][idx],\n\t\t\tlabel: data[\"label\"][idx],\n\t\t}\n\t\tevalList = sess.run([prediction]+imagesEval,feed_dict=batch)\n\t\tpred = evalList[0]\n\t\tcount += pred[:len(realIdx)].sum()\n\t\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\n\t\t\timgs = evalList[1:]\n\t\t\tfor i in range(len(realIdx)):\n\t\t\t\tl = data[\"label\"][idx[i]]\n\t\t\t\tif l not in warped[0]: warped[0][l] = []\n\t\t\t\tif l not in warped[1]: warped[1][l] = []\n\t\t\t\twarped[0][l].append(imgs[0][i])\n\t\t\t\twarped[1][l].append(imgs[1][i])\n\taccuracy = float(count)/N\n\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\n\t\tmean = [np.array([np.mean(warped[0][l],axis=0) for l in warped[0]]),\n\t\t\t\tnp.array([np.mean(warped[1][l],axis=0) for l in warped[1]])]\n\t\tvar = [np.array([np.var(warped[0][l],axis=0) for l in warped[0]]),\n\t\t\t   np.array([np.var(warped[1][l],axis=0) for l in warped[1]])]\n\telse: mean,var = None,None\n\treturn accuracy,mean,var\n"
  },
  {
    "path": "MNIST-tensorflow/graph.py",
    "content": "import numpy as np\nimport tensorflow as tf\nimport time\nimport data,warp,util\n\n# build classification network\ndef fullCNN(opt,image):\n\tdef conv2Layer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[3,3,int(feat.shape[-1]),outDim],stddev=opt.stdC)\n\t\tconv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding=\"VALID\")+bias\n\t\treturn conv\n\tdef linearLayer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC)\n\t\tfc = tf.matmul(feat,weight)+bias\n\t\treturn fc\n\twith tf.variable_scope(\"classifier\"):\n\t\tfeat = image\n\t\twith tf.variable_scope(\"conv1\"):\n\t\t\tfeat = conv2Layer(opt,feat,3)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\twith tf.variable_scope(\"conv2\"):\n\t\t\tfeat = conv2Layer(opt,feat,6)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\t\tfeat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding=\"VALID\")\n\t\twith tf.variable_scope(\"conv3\"):\n\t\t\tfeat = conv2Layer(opt,feat,9)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\twith tf.variable_scope(\"conv4\"):\n\t\t\tfeat = conv2Layer(opt,feat,12)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\tfeat = tf.reshape(feat,[opt.batchSize,-1])\n\t\twith tf.variable_scope(\"fc5\"):\n\t\t\tfeat = linearLayer(opt,feat,48)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\twith tf.variable_scope(\"fc6\"):\n\t\t\tfeat = linearLayer(opt,feat,opt.labelN)\n\t\toutput = feat\n\treturn output\n\n# build classification network\ndef CNN(opt,image):\n\tdef conv2Layer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[9,9,int(feat.shape[-1]),outDim],stddev=opt.stdC)\n\t\tconv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding=\"VALID\")+bias\n\t\treturn conv\n\tdef linearLayer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC)\n\t\tfc = tf.matmul(feat,weight)+bias\n\t\treturn fc\n\twith tf.variable_scope(\"classifier\"):\n\t\tfeat = image\n\t\twith tf.variable_scope(\"conv1\"):\n\t\t\tfeat = conv2Layer(opt,feat,3)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\tfeat = tf.reshape(feat,[opt.batchSize,-1])\n\t\twith tf.variable_scope(\"fc2\"):\n\t\t\tfeat = linearLayer(opt,feat,opt.labelN)\n\t\toutput = feat\n\treturn output\n\n# build Spatial Transformer Network\ndef STN(opt,image):\n\tdef conv2Layer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP)\n\t\tconv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding=\"VALID\")+bias\n\t\treturn conv\n\tdef linearLayer(opt,feat,outDim,final=False):\n\t\tweight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=0.0 if final else opt.stdGP)\n\t\tfc = tf.matmul(feat,weight)+bias\n\t\treturn fc\n\timageWarpAll = [image]\n\twith tf.variable_scope(\"geometric\"):\n\t\tfeat = image\n\t\twith tf.variable_scope(\"conv1\"):\n\t\t\tfeat = conv2Layer(opt,feat,4)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\twith tf.variable_scope(\"conv2\"):\n\t\t\tfeat = conv2Layer(opt,feat,8)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\t\tfeat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding=\"VALID\")\n\t\tfeat = tf.reshape(feat,[opt.batchSize,-1])\n\t\twith tf.variable_scope(\"fc3\"):\n\t\t\tfeat = linearLayer(opt,feat,48)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\twith tf.variable_scope(\"fc4\"):\n\t\t\tfeat = linearLayer(opt,feat,opt.warpDim,final=True)\n\t\tp = feat\n\tpMtrx = warp.vec2mtrx(opt,p)\n\timageWarp = warp.transformImage(opt,image,pMtrx)\n\timageWarpAll.append(imageWarp)\n\treturn imageWarpAll\n\n# build Inverse Compositional STN\ndef ICSTN(opt,image,p):\n\tdef conv2Layer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP)\n\t\tconv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding=\"VALID\")+bias\n\t\treturn conv\n\tdef linearLayer(opt,feat,outDim,final=False):\n\t\tweight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=0.0 if final else opt.stdGP)\n\t\tfc = tf.matmul(feat,weight)+bias\n\t\treturn fc\n\timageWarpAll = []\n\tfor l in range(opt.warpN):\n\t\twith tf.variable_scope(\"geometric\",reuse=l>0):\n\t\t\tpMtrx = warp.vec2mtrx(opt,p)\n\t\t\timageWarp = warp.transformImage(opt,image,pMtrx)\n\t\t\timageWarpAll.append(imageWarp)\n\t\t\tfeat = imageWarp\n\t\t\twith tf.variable_scope(\"conv1\"):\n\t\t\t\tfeat = conv2Layer(opt,feat,4)\n\t\t\t\tfeat = tf.nn.relu(feat)\n\t\t\twith tf.variable_scope(\"conv2\"):\n\t\t\t\tfeat = conv2Layer(opt,feat,8)\n\t\t\t\tfeat = tf.nn.relu(feat)\n\t\t\t\tfeat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding=\"VALID\")\n\t\t\tfeat = tf.reshape(feat,[opt.batchSize,-1])\n\t\t\twith tf.variable_scope(\"fc3\"):\n\t\t\t\tfeat = linearLayer(opt,feat,48)\n\t\t\t\tfeat = tf.nn.relu(feat)\n\t\t\twith tf.variable_scope(\"fc4\"):\n\t\t\t\tfeat = linearLayer(opt,feat,opt.warpDim,final=True)\n\t\t\tdp = feat\n\t\tp = warp.compose(opt,p,dp)\n\tpMtrx = warp.vec2mtrx(opt,p)\n\timageWarp = warp.transformImage(opt,image,pMtrx)\n\timageWarpAll.append(imageWarp)\n\treturn imageWarpAll\n\n# auxiliary function for creating weight and bias\ndef createVariable(opt,weightShape,biasShape=None,stddev=None):\n\tif biasShape is None: biasShape = [weightShape[-1]]\n\tweight = tf.get_variable(\"weight\",shape=weightShape,dtype=tf.float32,\n\t\t\t\t\t\t\t\t\t  initializer=tf.random_normal_initializer(stddev=stddev))\n\tbias = tf.get_variable(\"bias\",shape=biasShape,dtype=tf.float32,\n\t\t\t\t\t\t\t\t  initializer=tf.random_normal_initializer(stddev=stddev))\n\treturn weight,bias\n"
  },
  {
    "path": "MNIST-tensorflow/options.py",
    "content": "import numpy as np\nimport argparse\nimport warp\nimport util\n\ndef set(training):\n\n\t# parse input arguments\n\tparser = argparse.ArgumentParser()\n\tparser.add_argument(\"netType\",\t\tchoices=[\"CNN\",\"STN\",\"IC-STN\"],\t\thelp=\"type of network\")\n\tparser.add_argument(\"--group\",\t\t\t\t\tdefault=\"0\",\t\t\thelp=\"name for group\")\n\tparser.add_argument(\"--model\",\t\t\t\t\tdefault=\"test\",\t\t\thelp=\"name for model instance\")\n\tparser.add_argument(\"--size\",\t\t\t\t\tdefault=\"28x28\",\t\thelp=\"image resolution\")\n\tparser.add_argument(\"--warpType\",\t\t\t\tdefault=\"homography\",\thelp=\"type of warp function on images\",\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tchoices=[\"translation\",\"similarity\",\"affine\",\"homography\"])\n\tparser.add_argument(\"--warpN\",\t\ttype=int,\tdefault=4,\t\t\t\thelp=\"number of recurrent transformations (for IC-STN)\")\n\tparser.add_argument(\"--stdC\",\t\ttype=float,\tdefault=0.1,\t\t\thelp=\"initialization stddev (classification network)\")\n\tparser.add_argument(\"--stdGP\",\t\ttype=float,\tdefault=0.1,\t\t\thelp=\"initialization stddev (geometric predictor)\")\n\tparser.add_argument(\"--pertScale\",\ttype=float,\tdefault=0.25,\t\t\thelp=\"initial perturbation scale\")\n\tparser.add_argument(\"--transScale\",\ttype=float,\tdefault=0.25,\t\t\thelp=\"initial translation scale\")\n\tif training: # training\n\t\tparser.add_argument(\"--batchSize\",\ttype=int,\tdefault=100,\thelp=\"batch size for SGD\")\n\t\tparser.add_argument(\"--lrC\",\t\ttype=float,\tdefault=1e-2,\thelp=\"learning rate (classification network)\")\n\t\tparser.add_argument(\"--lrCdecay\",\ttype=float,\tdefault=1.0,\thelp=\"learning rate decay (classification network)\")\n\t\tparser.add_argument(\"--lrCstep\",\ttype=int,\tdefault=100000,\thelp=\"learning rate decay step size (classification network)\")\n\t\tparser.add_argument(\"--lrGP\",\t\ttype=float,\tdefault=None,\thelp=\"learning rate (geometric predictor)\")\n\t\tparser.add_argument(\"--lrGPdecay\",\ttype=float,\tdefault=1.0,\thelp=\"learning rate decay (geometric predictor)\")\n\t\tparser.add_argument(\"--lrGPstep\",\ttype=int,\tdefault=100000,\thelp=\"learning rate decay step size (geometric predictor)\")\n\t\tparser.add_argument(\"--fromIt\",\t\ttype=int,\tdefault=0,\t\thelp=\"resume training from iteration number\")\n\t\tparser.add_argument(\"--toIt\",\t\ttype=int,\tdefault=500000,\thelp=\"run training to iteration number\")\n\telse: # evaluation\n\t\tparser.add_argument(\"--batchSize\",\ttype=int,\tdefault=1,\t\thelp=\"batch size for evaluation\")\n\topt = parser.parse_args()\n\n\tif opt.lrGP is None: opt.lrGP = 0 if opt.netType==\"CNN\" else \\\n\t\t\t\t\t\t\t\t\t1e-2 if opt.netType==\"STN\" else \\\n\t\t\t\t\t\t\t\t\t1e-4 if opt.netType==\"IC-STN\" else None\n\n\t# --- below are automatically set ---\n\topt.training = training\n\topt.H,opt.W = [int(x) for x in opt.size.split(\"x\")]\n\topt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize)))\n\topt.warpDim = 2 if opt.warpType == \"translation\" else \\\n\t\t\t\t  4 if opt.warpType == \"similarity\" else \\\n\t\t\t\t  6 if opt.warpType == \"affine\" else \\\n\t\t\t\t  8 if opt.warpType == \"homography\" else None\n\topt.labelN = 10\n\topt.canon4pts = np.array([[-1,-1],[-1,1],[1,1],[1,-1]],dtype=np.float32)\n\topt.image4pts = np.array([[0,0],[0,opt.H-1],[opt.W-1,opt.H-1],[opt.W-1,0]],dtype=np.float32)\n\topt.refMtrx = warp.fit(Xsrc=opt.canon4pts,Xdst=opt.image4pts)\n\tif opt.netType==\"STN\": opt.warpN = 1\n\n\tprint(\"({0}) {1}\".format(\n\t\tutil.toGreen(\"{0}\".format(opt.group)),\n\t\tutil.toGreen(\"{0}\".format(opt.model))))\n\tprint(\"------------------------------------------\")\n\tprint(\"network type: {0}, recurrent warps: {1}\".format(\n\t\tutil.toYellow(\"{0}\".format(opt.netType)),\n\t\tutil.toYellow(\"{0}\".format(opt.warpN if opt.netType==\"IC-STN\" else \"X\"))))\n\tprint(\"batch size: {0}, image size: {1}x{2}\".format(\n\t\tutil.toYellow(\"{0}\".format(opt.batchSize)),\n\t\tutil.toYellow(\"{0}\".format(opt.H)),\n\t\tutil.toYellow(\"{0}\".format(opt.W))))\n\tprint(\"warpScale: (pert) {0} (trans) {1}\".format(\n\t\tutil.toYellow(\"{0}\".format(opt.pertScale)),\n\t\tutil.toYellow(\"{0}\".format(opt.transScale))))\n\tif training:\n\t\tprint(\"[geometric predictor]    stddev={0}, lr={1}\".format(\n\t\t\tutil.toYellow(\"{0:.0e}\".format(opt.stdGP)),\n\t\t\tutil.toYellow(\"{0:.0e}\".format(opt.lrGP))))\n\t\tprint(\"[classification network] stddev={0}, lr={1}\".format(\n\t\t\tutil.toYellow(\"{0:.0e}\".format(opt.stdC)),\n\t\t\tutil.toYellow(\"{0:.0e}\".format(opt.lrC))))\n\tprint(\"------------------------------------------\")\n\tif training:\n\t\tprint(util.toMagenta(\"training model ({0}) {1}...\".format(opt.group,opt.model)))\n\n\treturn opt\n"
  },
  {
    "path": "MNIST-tensorflow/train.py",
    "content": "import numpy as np\r\nimport time,os,sys\r\nimport argparse\r\nimport util\r\n\r\nprint(util.toYellow(\"=======================================================\"))\r\nprint(util.toYellow(\"train.py (training on MNIST)\"))\r\nprint(util.toYellow(\"=======================================================\"))\r\n\r\nimport tensorflow as tf\r\nimport data,graph,warp,util\r\nimport options\r\n\r\nprint(util.toMagenta(\"setting configurations...\"))\r\nopt = options.set(training=True)\r\n\r\n# create directories for model output\r\nutil.mkdir(\"models_{0}\".format(opt.group))\r\n\r\nprint(util.toMagenta(\"building graph...\"))\r\ntf.reset_default_graph()\r\n# build graph\r\nwith tf.device(\"/gpu:0\"):\r\n\t# ------ define input data ------\r\n\timage = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.H,opt.W])\r\n\tlabel = tf.placeholder(tf.int64,shape=[opt.batchSize])\r\n\tPH = [image,label]\r\n\t# ------ generate perturbation ------\r\n\tpInit = data.genPerturbations(opt)\r\n\tpInitMtrx = warp.vec2mtrx(opt,pInit)\r\n\t# ------ build network ------\r\n\timage = tf.expand_dims(image,axis=-1)\r\n\timagePert = warp.transformImage(opt,image,pInitMtrx)\r\n\tif opt.netType==\"CNN\":\r\n\t\toutput = graph.fullCNN(opt,imagePert)\r\n\telif opt.netType==\"STN\":\r\n\t\timageWarpAll = graph.STN(opt,imagePert)\r\n\t\timageWarp = imageWarpAll[-1]\r\n\t\toutput = graph.CNN(opt,imageWarp)\r\n\telif opt.netType==\"IC-STN\":\r\n\t\timageWarpAll = graph.ICSTN(opt,image,pInit)\r\n\t\timageWarp = imageWarpAll[-1]\r\n\t\toutput = graph.CNN(opt,imageWarp)\r\n\tsoftmax = tf.nn.softmax(output)\r\n\tlabelOnehot = tf.one_hot(label,opt.labelN)\r\n\tprediction = tf.equal(tf.argmax(softmax,1),label)\r\n\t# ------ define loss ------\r\n\tsoftmaxLoss = tf.nn.softmax_cross_entropy_with_logits(logits=output,labels=labelOnehot)\r\n\tloss = tf.reduce_mean(softmaxLoss)\r\n\t# ------ optimizer ------\r\n\tlrGP_PH,lrC_PH = tf.placeholder(tf.float32,shape=[]),tf.placeholder(tf.float32,shape=[])\r\n\toptim = util.setOptimizer(opt,loss,lrGP_PH,lrC_PH)\r\n\t# ------ generate summaries ------\r\n\tsummaryImageTrain = []\r\n\tsummaryImageTest = []\r\n\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\r\n\t\tfor l in range(opt.warpN+1):\r\n\t\t\tsummaryImageTrain.append(util.imageSummary(opt,imageWarpAll[l],\"TRAIN_warp{0}\".format(l),opt.H,opt.W))\r\n\t\t\tsummaryImageTest.append(util.imageSummary(opt,imageWarpAll[l],\"TEST_warp{0}\".format(l),opt.H,opt.W))\r\n\t\tsummaryImageTrain = tf.summary.merge(summaryImageTrain)\r\n\t\tsummaryImageTest = tf.summary.merge(summaryImageTest)\r\n\tsummaryLossTrain = tf.summary.scalar(\"TRAIN_loss\",loss)\r\n\ttestErrorPH = tf.placeholder(tf.float32,shape=[])\r\n\ttestImagePH = tf.placeholder(tf.float32,shape=[opt.labelN,opt.H,opt.W,1])\r\n\tsummaryErrorTest = tf.summary.scalar(\"TEST_error\",testErrorPH)\r\n\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\r\n\t\tsummaryMeanTest0 = util.imageSummaryMeanVar(opt,testImagePH,\"TEST_mean_init\",opt.H,opt.W)\r\n\t\tsummaryMeanTest1 = util.imageSummaryMeanVar(opt,testImagePH,\"TEST_mean_warped\",opt.H,opt.W)\r\n\t\tsummaryVarTest0 = util.imageSummaryMeanVar(opt,testImagePH*3,\"TEST_var_init\",opt.H,opt.W)\r\n\t\tsummaryVarTest1 = util.imageSummaryMeanVar(opt,testImagePH*3,\"TEST_var_warped\",opt.H,opt.W)\r\n\r\n# load data\r\nprint(util.toMagenta(\"loading MNIST dataset...\"))\r\ntrainData,validData,testData = data.loadMNIST(\"data/MNIST.npz\")\r\n\r\n# prepare model saver/summary writer\r\nsaver = tf.train.Saver(max_to_keep=20)\r\nsummaryWriter = tf.summary.FileWriter(\"summary_{0}/{1}\".format(opt.group,opt.model))\r\n\r\nprint(util.toYellow(\"======= TRAINING START =======\"))\r\ntimeStart = time.time()\r\n# start session\r\ntfConfig = tf.ConfigProto(allow_soft_placement=True)\r\ntfConfig.gpu_options.allow_growth = True\r\nwith tf.Session(config=tfConfig) as sess:\r\n\tsess.run(tf.global_variables_initializer())\r\n\tsummaryWriter.add_graph(sess.graph)\r\n\tif opt.fromIt!=0:\r\n\t\tutil.restoreModel(opt,sess,saver,opt.fromIt)\r\n\t\tprint(util.toMagenta(\"resuming from iteration {0}...\".format(opt.fromIt)))\r\n\tprint(util.toMagenta(\"start training...\"))\r\n\r\n\t# training loop\r\n\tfor i in range(opt.fromIt,opt.toIt):\r\n\t\tlrGP = opt.lrGP*opt.lrGPdecay**(i//opt.lrGPstep)\r\n\t\tlrC = opt.lrC*opt.lrCdecay**(i//opt.lrCstep)\r\n\t\t# make training batch\r\n\t\tbatch = data.makeBatch(opt,trainData,PH)\r\n\t\tbatch[lrGP_PH] = lrGP\r\n\t\tbatch[lrC_PH] = lrC\r\n\t\t# run one step\r\n\t\t_,l = sess.run([optim,loss],feed_dict=batch)\r\n\t\tif (i+1)%100==0:\r\n\t\t\tprint(\"it. {0}/{1}  lr={3}(GP),{4}(C), loss={5}, time={2}\"\r\n\t\t\t\t.format(util.toCyan(\"{0}\".format(i+1)),\r\n\t\t\t\t\t\topt.toIt,\r\n\t\t\t\t\t\tutil.toGreen(\"{0:.2f}\".format(time.time()-timeStart)),\r\n\t\t\t\t\t\tutil.toYellow(\"{0:.0e}\".format(lrGP)),\r\n\t\t\t\t\t\tutil.toYellow(\"{0:.0e}\".format(lrC)),\r\n\t\t\t\t\t\tutil.toRed(\"{0:.4f}\".format(l))))\r\n\t\tif (i+1)%100==0:\r\n\t\t\tsummaryWriter.add_summary(sess.run(summaryLossTrain,feed_dict=batch),i+1)\r\n\t\tif (i+1)%500==0 and (opt.netType==\"STN\" or opt.netType==\"IC-STN\"):\r\n\t\t\tsummaryWriter.add_summary(sess.run(summaryImageTrain,feed_dict=batch),i+1)\r\n\t\t\tsummaryWriter.add_summary(sess.run(summaryImageTest,feed_dict=batch),i+1)\r\n\t\tif (i+1)%1000==0:\r\n\t\t\t# evaluate on test set\r\n\t\t\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\r\n\t\t\t\ttestAcc,testMean,testVar = data.evalTest(opt,sess,testData,PH,prediction,imagesEval=[imagePert,imageWarp])\r\n\t\t\telse:\r\n\t\t\t\ttestAcc,_,_ = data.evalTest(opt,sess,testData,PH,prediction)\r\n\t\t\ttestError = (1-testAcc)*100\r\n\t\t\tsummaryWriter.add_summary(sess.run(summaryErrorTest,feed_dict={testErrorPH:testError}),i+1)\r\n\t\t\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\r\n\t\t\t\tsummaryWriter.add_summary(sess.run(summaryMeanTest0,feed_dict={testImagePH:testMean[0]}),i+1)\r\n\t\t\t\tsummaryWriter.add_summary(sess.run(summaryMeanTest1,feed_dict={testImagePH:testMean[1]}),i+1)\r\n\t\t\t\tsummaryWriter.add_summary(sess.run(summaryVarTest0,feed_dict={testImagePH:testVar[0]}),i+1)\r\n\t\t\t\tsummaryWriter.add_summary(sess.run(summaryVarTest1,feed_dict={testImagePH:testVar[1]}),i+1)\r\n\t\tif (i+1)%10000==0:\r\n\t\t\tutil.saveModel(opt,sess,saver,i+1)\r\n\t\t\tprint(util.toGreen(\"model saved: {0}/{1}, it.{2}\".format(opt.group,opt.model,i+1)))\r\n\r\nprint(util.toYellow(\"======= TRAINING DONE =======\"))\r\n"
  },
  {
    "path": "MNIST-tensorflow/util.py",
    "content": "import numpy as np\nimport scipy.misc\nimport tensorflow as tf\nimport os\nimport termcolor\n\ndef mkdir(path):\n\tif not os.path.exists(path): os.mkdir(path)\ndef imread(fname):\n\treturn scipy.misc.imread(fname)/255.0\ndef imsave(fname,array):\n\tscipy.misc.toimage(array,cmin=0.0,cmax=1.0).save(fname)\n\n# convert to colored strings\ndef toRed(content): return termcolor.colored(content,\"red\",attrs=[\"bold\"])\ndef toGreen(content): return termcolor.colored(content,\"green\",attrs=[\"bold\"])\ndef toBlue(content): return termcolor.colored(content,\"blue\",attrs=[\"bold\"])\ndef toCyan(content): return termcolor.colored(content,\"cyan\",attrs=[\"bold\"])\ndef toYellow(content): return termcolor.colored(content,\"yellow\",attrs=[\"bold\"])\ndef toMagenta(content): return termcolor.colored(content,\"magenta\",attrs=[\"bold\"])\n\n# make image summary from image batch\ndef imageSummary(opt,image,tag,H,W):\n\tblockSize = opt.visBlockSize\n\timageOne = tf.batch_to_space(image[:blockSize**2],crops=[[0,0],[0,0]],block_size=blockSize)\n\timagePermute = tf.reshape(imageOne,[H,blockSize,W,blockSize,-1])\n\timageTransp = tf.transpose(imagePermute,[1,0,3,2,4])\n\timageBlocks = tf.reshape(imageTransp,[1,H*blockSize,W*blockSize,-1])\n\timageBlocks = tf.cast(imageBlocks*255,tf.uint8)\n\tsummary = tf.summary.image(tag,imageBlocks)\n\treturn summary\n\n# make image summary from image batch (mean/variance)\ndef imageSummaryMeanVar(opt,image,tag,H,W):\n\timageOne = tf.batch_to_space_nd(image,crops=[[0,0],[0,0]],block_shape=[1,10])\n\timagePermute = tf.reshape(imageOne,[H,1,W,10,-1])\n\timageTransp = tf.transpose(imagePermute,[1,0,3,2,4])\n\timageBlocks = tf.reshape(imageTransp,[1,H*1,W*10,-1])\n\timageBlocks = tf.cast(imageBlocks*255,tf.uint8)\n\tsummary = tf.summary.image(tag,imageBlocks)\n\treturn summary\n\n# set optimizer for different learning rates\ndef setOptimizer(opt,loss,lrGP,lrC):\n\tvarsGP = [v for v in tf.global_variables() if \"geometric\" in v.name]\n\tvarsC = [v for v in tf.global_variables() if \"classifier\" in v.name]\n\tgradC = tf.gradients(loss,varsC)\n\toptimC = tf.train.GradientDescentOptimizer(lrC).apply_gradients(zip(gradC,varsC))\n\tif len(varsGP)>0:\n\t\tgradGP = tf.gradients(loss,varsGP)\n\t\toptimGP = tf.train.GradientDescentOptimizer(lrGP).apply_gradients(zip(gradGP,varsGP))\n\t\toptim = tf.group(optimC,optimGP)\n\telse:\n\t\toptim = optimC\n\treturn optim\n\n# restore model\ndef restoreModel(opt,sess,saver,it):\n\tsaver.restore(sess,\"models_{0}/{1}_it{2}.ckpt\".format(opt.group,opt.model,it,opt.warpN))\n# save model\ndef saveModel(opt,sess,saver,it):\n\tsaver.save(sess,\"models_{0}/{1}_it{2}.ckpt\".format(opt.group,opt.model,it,opt.warpN))\n\n"
  },
  {
    "path": "MNIST-tensorflow/warp.py",
    "content": "import numpy as np\nimport scipy.linalg\nimport tensorflow as tf\n\n# fit (affine) warp between two sets of points \ndef fit(Xsrc,Xdst):\n\tptsN = len(Xsrc)\n\tX,Y,U,V,O,I = Xsrc[:,0],Xsrc[:,1],Xdst[:,0],Xdst[:,1],np.zeros([ptsN]),np.ones([ptsN])\n\tA = np.concatenate((np.stack([X,Y,I,O,O,O],axis=1),\n\t\t\t\t\t\tnp.stack([O,O,O,X,Y,I],axis=1)),axis=0)\n\tb = np.concatenate((U,V),axis=0)\n\tp1,p2,p3,p4,p5,p6 = scipy.linalg.lstsq(A,b)[0].squeeze()\n\tpMtrx = np.array([[p1,p2,p3],[p4,p5,p6],[0,0,1]],dtype=np.float32)\n\treturn pMtrx\n\n# compute composition of warp parameters\ndef compose(opt,p,dp):\n\twith tf.name_scope(\"compose\"):\n\t\tpMtrx = vec2mtrx(opt,p)\n\t\tdpMtrx = vec2mtrx(opt,dp)\n\t\tpMtrxNew = tf.matmul(dpMtrx,pMtrx)\n\t\tpMtrxNew /= pMtrxNew[:,2:3,2:3]\n\t\tpNew = mtrx2vec(opt,pMtrxNew)\n\treturn pNew\n\n# compute inverse of warp parameters\ndef inverse(opt,p):\n\twith tf.name_scope(\"inverse\"):\n\t\tpMtrx = vec2mtrx(opt,p)\n\t\tpInvMtrx = tf.matrix_inverse(pMtrx)\n\t\tpInv = mtrx2vec(opt,pInvMtrx)\n\treturn pInv\n\n# convert warp parameters to matrix\ndef vec2mtrx(opt,p):\n\twith tf.name_scope(\"vec2mtrx\"):\n\t\tO = tf.zeros([opt.batchSize])\n\t\tI = tf.ones([opt.batchSize])\n\t\tif opt.warpType==\"translation\":\n\t\t\ttx,ty = tf.unstack(p,axis=1)\n\t\t\tpMtrx = tf.transpose(tf.stack([[I,O,tx],[O,I,ty],[O,O,I]]),perm=[2,0,1])\n\t\tif opt.warpType==\"similarity\":\n\t\t\tpc,ps,tx,ty = tf.unstack(p,axis=1)\n\t\t\tpMtrx = tf.transpose(tf.stack([[I+pc,-ps,tx],[ps,I+pc,ty],[O,O,I]]),perm=[2,0,1])\n\t\tif opt.warpType==\"affine\":\n\t\t\tp1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1)\n\t\t\tpMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[O,O,I]]),perm=[2,0,1])\n\t\tif opt.warpType==\"homography\":\n\t\t\tp1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1)\n\t\t\tpMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[p7,p8,I]]),perm=[2,0,1])\n\treturn pMtrx\n\n# convert warp matrix to parameters\ndef mtrx2vec(opt,pMtrx):\n\twith tf.name_scope(\"mtrx2vec\"):\n\t\t[row0,row1,row2] = tf.unstack(pMtrx,axis=1)\n\t\t[e00,e01,e02] = tf.unstack(row0,axis=1)\n\t\t[e10,e11,e12] = tf.unstack(row1,axis=1)\n\t\t[e20,e21,e22] = tf.unstack(row2,axis=1)\n\t\tif opt.warpType==\"translation\": p = tf.stack([e02,e12],axis=1)\n\t\tif opt.warpType==\"similarity\": p = tf.stack([e00-1,e10,e02,e12],axis=1)\n\t\tif opt.warpType==\"affine\": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12],axis=1)\n\t\tif opt.warpType==\"homography\": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],axis=1)\n\treturn p\n\n# warp the image\ndef transformImage(opt,image,pMtrx):\n\twith tf.name_scope(\"transformImage\"):\n\t\trefMtrx = tf.tile(tf.expand_dims(opt.refMtrx,axis=0),[opt.batchSize,1,1])\n\t\ttransMtrx = tf.matmul(refMtrx,pMtrx)\n\t\t# warp the canonical coordinates\n\t\tX,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H))\n\t\tX,Y = X.flatten(),Y.flatten()\n\t\tXYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T\n\t\tXYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32)\n\t\tXYwarpHom = tf.matmul(transMtrx,XYhom)\n\t\tXwarpHom,YwarpHom,ZwarpHom = tf.unstack(XYwarpHom,axis=1)\n\t\tXwarp = tf.reshape(XwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])\n\t\tYwarp = tf.reshape(YwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])\n\t\t# get the integer sampling coordinates\n\t\tXfloor,Xceil = tf.floor(Xwarp),tf.ceil(Xwarp)\n\t\tYfloor,Yceil = tf.floor(Ywarp),tf.ceil(Ywarp)\n\t\tXfloorInt,XceilInt = tf.to_int32(Xfloor),tf.to_int32(Xceil)\n\t\tYfloorInt,YceilInt = tf.to_int32(Yfloor),tf.to_int32(Yceil)\n\t\timageIdx = np.tile(np.arange(opt.batchSize).reshape([opt.batchSize,1,1]),[1,opt.H,opt.W])\n\t\timageVec = tf.reshape(image,[-1,int(image.shape[-1])])\n\t\timageVecOut = tf.concat([imageVec,tf.zeros([1,int(image.shape[-1])])],axis=0)\n\t\tidxUL = (imageIdx*opt.H+YfloorInt)*opt.W+XfloorInt\n\t\tidxUR = (imageIdx*opt.H+YfloorInt)*opt.W+XceilInt\n\t\tidxBL = (imageIdx*opt.H+YceilInt)*opt.W+XfloorInt\n\t\tidxBR = (imageIdx*opt.H+YceilInt)*opt.W+XceilInt\n\t\tidxOutside = tf.fill([opt.batchSize,opt.H,opt.W],opt.batchSize*opt.H*opt.W)\n\t\tdef insideImage(Xint,Yint):\n\t\t\treturn (Xint>=0)&(Xint<opt.W)&(Yint>=0)&(Yint<opt.H)\n\t\tidxUL = tf.where(insideImage(XfloorInt,YfloorInt),idxUL,idxOutside)\n\t\tidxUR = tf.where(insideImage(XceilInt,YfloorInt),idxUR,idxOutside)\n\t\tidxBL = tf.where(insideImage(XfloorInt,YceilInt),idxBL,idxOutside)\n\t\tidxBR = tf.where(insideImage(XceilInt,YceilInt),idxBR,idxOutside)\n\t\t# bilinear interpolation\n\t\tXratio = tf.reshape(Xwarp-Xfloor,[opt.batchSize,opt.H,opt.W,1])\n\t\tYratio = tf.reshape(Ywarp-Yfloor,[opt.batchSize,opt.H,opt.W,1])\n\t\timageUL = tf.to_float(tf.gather(imageVecOut,idxUL))*(1-Xratio)*(1-Yratio)\n\t\timageUR = tf.to_float(tf.gather(imageVecOut,idxUR))*(Xratio)*(1-Yratio)\n\t\timageBL = tf.to_float(tf.gather(imageVecOut,idxBL))*(1-Xratio)*(Yratio)\n\t\timageBR = tf.to_float(tf.gather(imageVecOut,idxBR))*(Xratio)*(Yratio)\n\t\timageWarp = imageUL+imageUR+imageBL+imageBR\n\treturn imageWarp\n"
  },
  {
    "path": "README.md",
    "content": "## Inverse Compositional Spatial Transformer Networks\n[Chen-Hsuan Lin](https://chenhsuanlin.bitbucket.io/)\nand [Simon Lucey](http://www.simonlucey.com/)  \nIEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017 (**oral presentation**)  \n\nProject page: https://chenhsuanlin.bitbucket.io/inverse-compositional-STN  \nPaper: https://chenhsuanlin.bitbucket.io/inverse-compositional-STN/paper.pdf  \nPoster: https://chenhsuanlin.bitbucket.io/inverse-compositional-STN/poster.pdf   \narXiv preprint: https://arxiv.org/abs/1612.03897\n\n<p align=\"center\"><img src=\"https://www.andrew.cmu.edu/user/chenhsul/images/ICSTN2.png\" width=600 height=250></p>\n\nWe provide TensorFlow code for the following experiments:\n- MNIST classification\n- traffic sign classification\n\n**[NEW!]** The PyTorch implementation of the MNIST experiment is now up!  \n\n--------------------------------------\n\n## TensorFlow\n\n### Prerequisites  \nThis code is developed with Python3 (`python3`) but it is also compatible with Python2.7 (`python`). TensorFlow r1.0+ is required. The dependencies can install by running\n```\npip3 install --upgrade numpy scipy termcolor matplotlib tensorflow-gpu\n```\nIf you're using Python2.7, use `pip2` instead; if you don't have sudo access, add the `--user` flag.  \n\n### Running the code  \nThe training code can be executed via the command\n```\npython3 train.py <netType> [(options)]\n```\n`<netType>` should be one of the following:  \n1. `CNN` - standard convolutional neural network  \n2. `STN` - Spatial Transformer Network (STN)  \n3. `IC-STN` - Inverse Compositional Spatial Transformer Network (IC-STN)  \n\nThe list of optional arguments can be found by executing `python3 train.py --help`.  \nThe default training settings in this released code is slightly different from that in the paper; it is stabler and optimizes the networks better.  \n\nWhen the code is run for the first time, the datasets will be automatically downloaded and preprocessed.  \nThe checkpoints are saved in the automatically created directory `model_GROUP`; summaries are saved in `summary_GROUP`.\n\n### Visualizing the results  \nWe've included code to visualize the training over TensorBoard. To execute, run\n```\ntensorboard --logdir=summary_GROUP --port=6006\n```\n\nWe provide three types of data visualization:  \n1. **SCALARS**: training/test error over iterations  \n2. **IMAGES**: alignment results and mean/variance appearances  \n3. **GRAPH**: network architecture\n\n--------------------------------------\n\n## PyTorch\n\nThe PyTorch version of the code is stil under active development. The training speed is currently slower than the TensorFlow version. Suggestions on improvements are welcome! :)\n\n### Prerequisites  \nThis code is developed with Python3 (`python3`). It has not been tested with Python2.7 yet. PyTorch 0.2.0+ is required. Please see http://pytorch.org/ for installation instructions.  \nVisdom is also required; it can be installed by running\n```\npip3 install --upgrade visdom\n```\nIf you don't have sudo access, add the `--user` flag.  \n\n### Running the code  \nFirst, start a Visdom server by running\n```\npython3 -m visdom.server -port=7000\n```\nThe training code can be executed via the command (using the same port number)\n```\npython3 train.py <netType> --port=7000 [(options)]\n```\n`<netType>` should be one of the following:  \n1. `CNN` - standard convolutional neural network  \n2. `STN` - Spatial Transformer Network (STN)  \n3. `IC-STN` - Inverse Compositional Spatial Transformer Network (IC-STN)  \n\nThe list of optional arguments can be found by executing `python3 train.py --help`.  \nThe default training settings in this released code is slightly different from that in the paper; it is stabler and optimizes the networks better.  \n\nWhen the code is run for the first time, the datasets will be automatically downloaded and preprocessed.  \nThe checkpoints are saved in the automatically created directory `model_GROUP`; summaries are saved in `summary_GROUP`.\n\n### Visualizing the results  \nWe provide three types of data visualization on Visdom:  \n1. Training/test error over iterations  \n2. Alignment results and mean/variance appearances  \n\n--------------------------------------\n\nIf you find our code useful for your research, please cite\n```\n@inproceedings{lin2017inverse,\n  title={Inverse Compositional Spatial Transformer Networks},\n  author={Lin, Chen-Hsuan and Lucey, Simon},\n  booktitle={IEEE Conference on Computer Vision and Pattern Recognition ({CVPR})},\n  year={2017}\n}\n```\n\nPlease contact me (chlin@cmu.edu) if you have any questions!\n\n\n"
  },
  {
    "path": "traffic-sign-tensorflow/data.py",
    "content": "import numpy as np\nimport scipy.linalg,scipy.misc\nimport os,time\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nimport csv\n\nimport warp\n\n# load GTSRB data\ndef loadGTSRB(opt,fname):\n\tif not os.path.exists(fname):\n\t\t# download and preprocess GTSRB dataset\n\t\tos.makedirs(os.path.dirname(fname))\n\t\tos.system(\"wget -O data/GTSRB_Final_Training_Images.zip http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Training_Images.zip\")\n\t\tos.system(\"wget -O data/GTSRB_Final_Test_Images.zip http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Test_Images.zip\")\n\t\tos.system(\"wget -O data/GTSRB_Final_Test_GT.zip http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Test_GT.zip\")\n\t\tos.system(\"cd data && unzip GTSRB_Final_Training_Images.zip\")\n\t\tos.system(\"cd data && unzip GTSRB_Final_Test_Images.zip\")\n\t\tos.system(\"cd data && unzip GTSRB_Final_Test_GT.zip\")\n\t\t# training data\n\t\tprint(\"preparing training data...\")\n\t\timages,bboxes,labels = [],[],[]\n\t\tfor c in range(43):\n\t\t\tprefix = \"data/GTSRB/Final_Training/Images/{0:05d}\".format(c)\n\t\t\twith open(\"{0}/GT-{1:05d}.csv\".format(prefix,c)) as file:\n\t\t\t\treader = csv.reader(file,delimiter=\";\")\n\t\t\t\tnext(reader)\n\t\t\t\tfor line in reader:\n\t\t\t\t\timg = plt.imread(prefix+\"/\"+line[0])\n\t\t\t\t\trawH,rawW = img.shape[0],img.shape[1]\n\t\t\t\t\tscaleH,scaleW = float(opt.fullH)/rawH,float(opt.fullW)/rawW\n\t\t\t\t\timgResize = scipy.misc.imresize(img,(opt.fullH,opt.fullW,3))\n\t\t\t\t\timages.append(imgResize)\n\t\t\t\t\tbboxes.append([float(line[3])*scaleW,float(line[4])*scaleH,\n\t\t\t\t\t\t\t\t   float(line[5])*scaleW,float(line[6])*scaleH])\n\t\t\t\t\tlabels.append(int(line[7]))\n\t\ttrainData = {\n\t\t\t\"image\": np.array(images),\n\t\t\t\"bbox\": np.array(bboxes),\n\t\t\t\"label\": np.array(labels)\n\t\t}\n\t\t# test data\n\t\tprint(\"preparing test data...\")\n\t\timages,bboxes,labels = [],[],[]\n\t\tprefix = \"data/GTSRB/Final_Test/Images/\"\n\t\twith open(\"data/GT-final_test.csv\") as file:\n\t\t\treader = csv.reader(file,delimiter=\";\")\n\t\t\tnext(reader)\n\t\t\tfor line in reader:\n\t\t\t\timg = plt.imread(prefix+\"/\"+line[0])\n\t\t\t\trawH,rawW = img.shape[0],img.shape[1]\n\t\t\t\tscaleH,scaleW = float(opt.fullH)/rawH,float(opt.fullW)/rawW\n\t\t\t\timgResize = scipy.misc.imresize(img,(opt.fullH,opt.fullW,3))\n\t\t\t\timages.append(imgResize)\n\t\t\t\tbboxes.append([float(line[3])*scaleW,float(line[4])*scaleH,\n\t\t\t\t\t\t\t   float(line[5])*scaleW,float(line[6])*scaleH])\n\t\t\t\tlabels.append(int(line[7]))\n\t\ttestData = {\n\t\t\t\"image\": np.array(images),\n\t\t\t\"bbox\": np.array(bboxes),\n\t\t\t\"label\": np.array(labels)\n\t\t}\n\t\tnp.savez(fname,train=trainData,test=testData)\n\t\tos.system(\"rm -rf data/*.zip\")\n\tGTSRB = np.load(fname)\n\ttrainData = GTSRB[\"train\"].item()\n\ttestData = GTSRB[\"test\"].item()\n\treturn trainData,testData\n\n# generate training batch\ndef genPerturbations(opt):\n\twith tf.name_scope(\"genPerturbations\"):\n\t\tX = np.tile(opt.canon4pts[:,0],[opt.batchSize,1])\n\t\tY = np.tile(opt.canon4pts[:,1],[opt.batchSize,1])\n\t\tdX = tf.random_normal([opt.batchSize,4])*opt.pertScale \\\n\t\t\t+tf.random_normal([opt.batchSize,1])*opt.transScale\n\t\tdY = tf.random_normal([opt.batchSize,4])*opt.pertScale \\\n\t\t\t+tf.random_normal([opt.batchSize,1])*opt.transScale\n\t\tO = np.zeros([opt.batchSize,4],dtype=np.float32)\n\t\tI = np.ones([opt.batchSize,4],dtype=np.float32)\n\t\t# fit warp parameters to generated displacements\n\t\tif opt.warpType==\"homography\":\n\t\t\tA = tf.concat([tf.stack([X,Y,I,O,O,O,-X*(X+dX),-Y*(X+dX)],axis=-1),\n\t\t\t\t\t\t   tf.stack([O,O,O,X,Y,I,-X*(Y+dY),-Y*(Y+dY)],axis=-1)],1)\n\t\t\tb = tf.expand_dims(tf.concat([X+dX,Y+dY],1),-1)\n\t\t\tpPert = tf.matrix_solve(A,b)[:,:,0]\n\t\t\tpPert -= tf.to_float([[1,0,0,0,1,0,0,0]])\n\t\telse:\n\t\t\tif opt.warpType==\"translation\":\n\t\t\t\tJ = np.concatenate([np.stack([I,O],axis=-1),\n\t\t\t\t\t\t\t\t\tnp.stack([O,I],axis=-1)],axis=1)\n\t\t\tif opt.warpType==\"similarity\":\n\t\t\t\tJ = np.concatenate([np.stack([X,Y,I,O],axis=-1),\n\t\t\t\t\t\t\t\t\tnp.stack([-Y,X,O,I],axis=-1)],axis=1)\n\t\t\tif opt.warpType==\"affine\":\n\t\t\t\tJ = np.concatenate([np.stack([X,Y,I,O,O,O],axis=-1),\n\t\t\t\t\t\t\t\t\tnp.stack([O,O,O,X,Y,I],axis=-1)],axis=1)\n\t\t\tdXY = tf.expand_dims(tf.concat([dX,dY],1),-1)\n\t\t\tpPert = tf.matrix_solve_ls(J,dXY)[:,:,0]\n\treturn pPert\n\n# make training batch\ndef makeBatch(opt,data,PH):\n\tN = len(data[\"image\"])\n\trandIdx = np.random.randint(N,size=[opt.batchSize])\n\t# put data in placeholders\n\t[image,label] = PH\n\tbatch = {\n\t\timage: data[\"image\"][randIdx]/255.0,\n\t\tlabel: data[\"label\"][randIdx],\n\t}\n\treturn batch\n\n# evaluation on test set\ndef evalTest(opt,sess,data,PH,prediction,imagesEval=[]):\n\tN = len(data[\"image\"])\n\t# put data in placeholders\n\t[image,label] = PH\n\tbatchN = int(np.ceil(N/opt.batchSize))\n\twarped = [{},{}]\n\tcount = 0\n\tfor b in range(batchN):\n\t\t# use some dummy data (0) as batch filler if necessary\n\t\tif b!=batchN-1:\n\t\t\trealIdx = np.arange(opt.batchSize*b,opt.batchSize*(b+1))\n\t\telse:\n\t\t\trealIdx = np.arange(opt.batchSize*b,N)\n\t\tidx = np.zeros([opt.batchSize],dtype=int)\n\t\tidx[:len(realIdx)] = realIdx\n\t\tbatch = {\n\t\t\timage: data[\"image\"][idx]/255.0,\n\t\t\tlabel: data[\"label\"][idx],\n\t\t}\n\t\tevalList = sess.run([prediction]+imagesEval,feed_dict=batch)\n\t\tpred = evalList[0]\n\t\tcount += pred[:len(realIdx)].sum()\n\t\tif len(imagesEval)>0:\n\t\t\timgs = evalList[1:]\n\t\t\tfor i in range(len(realIdx)):\n\t\t\t\tif data[\"label\"][idx[i]] not in warped[0]: warped[0][data[\"label\"][idx[i]]] = []\n\t\t\t\tif data[\"label\"][idx[i]] not in warped[1]: warped[1][data[\"label\"][idx[i]]] = []\n\t\t\t\twarped[0][data[\"label\"][idx[i]]].append(imgs[0][i])\n\t\t\t\twarped[1][data[\"label\"][idx[i]]].append(imgs[1][i])\n\taccuracy = float(count)/N\n\tif len(imagesEval)>0:\n\t\tmean = [np.array([np.mean(warped[0][l],axis=0) for l in warped[0]]),\n\t\t\t\tnp.array([np.mean(warped[1][l],axis=0) for l in warped[1]])]\n\t\tvar = [np.array([np.var(warped[0][l],axis=0) for l in warped[0]]),\n\t\t\t   np.array([np.var(warped[1][l],axis=0) for l in warped[1]])]\n\telse: mean,var = None,None\n\treturn accuracy,mean,var\n"
  },
  {
    "path": "traffic-sign-tensorflow/graph.py",
    "content": "import numpy as np\nimport tensorflow as tf\nimport time\nimport data,warp,util\n\n# build classification network\ndef fullCNN(opt,image):\n\tdef conv2Layer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdC)\n\t\tconv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding=\"VALID\")+bias\n\t\treturn conv\n\tdef linearLayer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC)\n\t\tfc = tf.matmul(feat,weight)+bias\n\t\treturn fc\n\twith tf.variable_scope(\"classifier\"):\n\t\tfeat = image\n\t\twith tf.variable_scope(\"conv1\"):\n\t\t\tfeat = conv2Layer(opt,feat,6)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\twith tf.variable_scope(\"conv2\"):\n\t\t\tfeat = conv2Layer(opt,feat,12)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\t\tfeat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding=\"VALID\")\n\t\twith tf.variable_scope(\"conv3\"):\n\t\t\tfeat = conv2Layer(opt,feat,24)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\tfeat = tf.reshape(feat,[opt.batchSize,-1])\n\t\twith tf.variable_scope(\"fc4\"):\n\t\t\tfeat = linearLayer(opt,feat,200)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\twith tf.variable_scope(\"fc5\"):\n\t\t\tfeat = linearLayer(opt,feat,opt.labelN)\n\t\toutput = feat\n\treturn output\n\n# build classification network\ndef CNN(opt,image):\n\tdef conv2Layer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdC)\n\t\tconv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding=\"VALID\")+bias\n\t\treturn conv\n\tdef linearLayer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdC)\n\t\tfc = tf.matmul(feat,weight)+bias\n\t\treturn fc\n\twith tf.variable_scope(\"classifier\"):\n\t\tfeat = image\n\t\twith tf.variable_scope(\"conv1\"):\n\t\t\tfeat = conv2Layer(opt,feat,6)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\twith tf.variable_scope(\"conv2\"):\n\t\t\tfeat = conv2Layer(opt,feat,12)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\t\tfeat = tf.nn.max_pool(feat,ksize=[1,2,2,1],strides=[1,2,2,1],padding=\"VALID\")\n\t\tfeat = tf.reshape(feat,[opt.batchSize,-1])\n\t\twith tf.variable_scope(\"fc3\"):\n\t\t\tfeat = linearLayer(opt,feat,opt.labelN)\n\t\toutput = feat\n\treturn output\n\n# build Spatial Transformer Network\ndef STN(opt,image):\n\tdef conv2Layer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP)\n\t\tconv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding=\"VALID\")+bias\n\t\treturn conv\n\tdef linearLayer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdGP)\n\t\tfc = tf.matmul(feat,weight)+bias\n\t\treturn fc\n\timageWarpAll = [image]\n\twith tf.variable_scope(\"geometric\"):\n\t\tfeat = image\n\t\twith tf.variable_scope(\"conv1\"):\n\t\t\tfeat = conv2Layer(opt,feat,6)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\twith tf.variable_scope(\"conv2\"):\n\t\t\tfeat = conv2Layer(opt,feat,24)\n\t\t\tfeat = tf.nn.relu(feat)\n\t\tfeat = tf.reshape(feat,[opt.batchSize,-1])\n\t\twith tf.variable_scope(\"fc3\"):\n\t\t\tfeat = linearLayer(opt,feat,opt.warpDim)\n\t\tp = feat\n\tpMtrx = warp.vec2mtrx(opt,p)\n\timageWarp = warp.transformImage(opt,image,pMtrx)\n\timageWarpAll.append(imageWarp)\n\treturn imageWarpAll\n\n# build Inverse Compositional STN\ndef ICSTN(opt,imageFull,p):\n\tdef conv2Layer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[7,7,int(feat.shape[-1]),outDim],stddev=opt.stdGP)\n\t\tconv = tf.nn.conv2d(feat,weight,strides=[1,1,1,1],padding=\"VALID\")+bias\n\t\treturn conv\n\tdef linearLayer(opt,feat,outDim):\n\t\tweight,bias = createVariable(opt,[int(feat.shape[-1]),outDim],stddev=opt.stdGP)\n\t\tfc = tf.matmul(feat,weight)+bias\n\t\treturn fc\n\timageWarpAll = []\n\tfor l in range(opt.warpN):\n\t\twith tf.variable_scope(\"geometric\",reuse=l>0):\n\t\t\tpMtrx = warp.vec2mtrx(opt,p)\n\t\t\timageWarp = warp.transformCropImage(opt,imageFull,pMtrx)\n\t\t\timageWarpAll.append(imageWarp)\n\t\t\tfeat = imageWarp\n\t\t\twith tf.variable_scope(\"conv1\"):\n\t\t\t\tfeat = conv2Layer(opt,feat,6)\n\t\t\t\tfeat = tf.nn.relu(feat)\n\t\t\twith tf.variable_scope(\"conv2\"):\n\t\t\t\tfeat = conv2Layer(opt,feat,24)\n\t\t\t\tfeat = tf.nn.relu(feat)\n\t\t\tfeat = tf.reshape(feat,[opt.batchSize,-1])\n\t\t\twith tf.variable_scope(\"fc3\"):\n\t\t\t\tfeat = linearLayer(opt,feat,opt.warpDim)\n\t\t\tdp = feat\n\t\tp = warp.compose(opt,p,dp)\n\tpMtrx = warp.vec2mtrx(opt,p)\n\timageWarp = warp.transformCropImage(opt,imageFull,pMtrx)\n\timageWarpAll.append(imageWarp)\n\treturn imageWarpAll\n\n# auxiliary function for creating weight and bias\ndef createVariable(opt,weightShape,biasShape=None,stddev=None):\n\tif biasShape is None: biasShape = [weightShape[-1]]\n\tweight = tf.get_variable(\"weight\",shape=weightShape,dtype=tf.float32,\n\t\t\t\t\t\t\t\t\t  initializer=tf.random_normal_initializer(stddev=stddev))\n\tbias = tf.get_variable(\"bias\",shape=biasShape,dtype=tf.float32,\n\t\t\t\t\t\t\t\t  initializer=tf.random_normal_initializer(stddev=stddev))\n\treturn weight,bias\n"
  },
  {
    "path": "traffic-sign-tensorflow/options.py",
    "content": "import numpy as np\nimport argparse\nimport warp\nimport util\n\ndef set(training):\n\n\t# parse input arguments\n\tparser = argparse.ArgumentParser()\n\tparser.add_argument(\"netType\",\t\tchoices=[\"CNN\",\"STN\",\"IC-STN\"],\t\thelp=\"type of network\")\n\tparser.add_argument(\"--group\",\t\t\t\t\tdefault=\"0\",\t\t\thelp=\"name for group\")\n\tparser.add_argument(\"--model\",\t\t\t\t\tdefault=\"test\",\t\t\thelp=\"name for model instance\")\n\tparser.add_argument(\"--size\",\t\t\t\t\tdefault=\"36x36\",\t\thelp=\"image resolution\")\n\tparser.add_argument(\"--sizeFull\",\t\t\t\tdefault=\"50x50\",\t\thelp=\"full image resolution\")\n\tparser.add_argument(\"--warpType\",\t\t\t\tdefault=\"homography\",\thelp=\"type of warp function on images\",\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tchoices=[\"translation\",\"similarity\",\"affine\",\"homography\"])\n\tparser.add_argument(\"--warpN\",\t\ttype=int,\tdefault=4,\t\t\t\thelp=\"number of recurrent transformations (for IC-STN)\")\n\tparser.add_argument(\"--stdC\",\t\ttype=float,\tdefault=0.01,\t\t\thelp=\"initialization stddev (classification network)\")\n\tparser.add_argument(\"--stdGP\",\t\ttype=float,\tdefault=0.001,\t\t\thelp=\"initialization stddev (geometric predictor)\")\n\tparser.add_argument(\"--pertScale\",\ttype=float,\tdefault=0.25,\t\t\thelp=\"initial perturbation scale\")\n\tparser.add_argument(\"--transScale\",\ttype=float,\tdefault=0.25,\t\t\thelp=\"initial translation scale\")\n\tif training: # training\n\t\tparser.add_argument(\"--batchSize\",\ttype=int,\tdefault=100,\thelp=\"batch size for SGD\")\n\t\tparser.add_argument(\"--lrC\",\t\ttype=float,\tdefault=1e-2,\thelp=\"learning rate (classification network)\")\n\t\tparser.add_argument(\"--lrCdecay\",\ttype=float,\tdefault=0.1,\thelp=\"learning rate decay (classification network)\")\n\t\tparser.add_argument(\"--lrCstep\",\ttype=int,\tdefault=500000,\thelp=\"learning rate decay step size (classification network)\")\n\t\tparser.add_argument(\"--lrGP\",\t\ttype=float,\tdefault=None,\thelp=\"learning rate (geometric predictor)\")\n\t\tparser.add_argument(\"--lrGPdecay\",\ttype=float,\tdefault=0.1,\thelp=\"learning rate decay (geometric predictor)\")\n\t\tparser.add_argument(\"--lrGPstep\",\ttype=int,\tdefault=500000,\thelp=\"learning rate decay step size (geometric predictor)\")\n\t\tparser.add_argument(\"--fromIt\",\t\ttype=int,\tdefault=0,\t\thelp=\"resume training from iteration number\")\n\t\tparser.add_argument(\"--toIt\",\t\ttype=int,\tdefault=1000000,help=\"run training to iteration number\")\n\telse: # evaluation\n\t\tparser.add_argument(\"--batchSize\",\ttype=int,\tdefault=1,\t\thelp=\"batch size for evaluation\")\n\topt = parser.parse_args()\n\n\tif opt.lrGP is None: opt.lrGP = 0 if opt.netType==\"CNN\" else \\\n\t\t\t\t\t\t\t\t\t1e-3 if opt.netType==\"STN\" else \\\n\t\t\t\t\t\t\t\t\t3e-5 if opt.netType==\"IC-STN\" else None\n\n\t# --- below are automatically set ---\n\topt.training = training\n\topt.H,opt.W = [int(x) for x in opt.size.split(\"x\")]\n\topt.fullH,opt.fullW = [int(x) for x in opt.sizeFull.split(\"x\")]\n\topt.visBlockSize = int(np.floor(np.sqrt(opt.batchSize)))\n\topt.warpDim = 2 if opt.warpType == \"translation\" else \\\n\t\t\t\t  4 if opt.warpType == \"similarity\" else \\\n\t\t\t\t  6 if opt.warpType == \"affine\" else \\\n\t\t\t\t  8 if opt.warpType == \"homography\" else None\n\topt.labelN = 43\n\topt.canon4pts = np.array([[-1,-1],[-1,1],[1,1],[1,-1]],dtype=np.float32)\n\topt.image4pts = np.array([[0,0],[0,opt.H-1],[opt.W-1,opt.H-1],[opt.W-1,0]],dtype=np.float32)\n\topt.bbox = [int(opt.fullW/2-opt.W/2),int(opt.fullH/2-opt.H/2),int(opt.fullW/2+opt.W/2),int(opt.fullH/2+opt.H/2)]\n\topt.bbox4pts = np.array([[opt.bbox[0],opt.bbox[1]],[opt.bbox[0],opt.bbox[3]],\n\t\t\t\t\t\t\t [opt.bbox[2],opt.bbox[3]],[opt.bbox[2],opt.bbox[1]]],dtype=np.float32)\n\topt.refMtrx = warp.fit(Xsrc=opt.canon4pts,Xdst=opt.image4pts)\n\topt.bboxRefMtrx = warp.fit(Xsrc=opt.canon4pts,Xdst=opt.bbox4pts)\n\tif opt.netType==\"STN\": opt.warpN = 1\n\n\tprint(\"({0}) {1}\".format(\n\t\tutil.toGreen(\"{0}\".format(opt.group)),\n\t\tutil.toGreen(\"{0}\".format(opt.model))))\n\tprint(\"------------------------------------------\")\n\tprint(\"network type: {0}, recurrent warps: {1}\".format(\n\t\tutil.toYellow(\"{0}\".format(opt.netType)),\n\t\tutil.toYellow(\"{0}\".format(opt.warpN if opt.netType==\"IC-STN\" else \"X\"))))\n\tprint(\"batch size: {0}, image size: {1}x{2}\".format(\n\t\tutil.toYellow(\"{0}\".format(opt.batchSize)),\n\t\tutil.toYellow(\"{0}\".format(opt.H)),\n\t\tutil.toYellow(\"{0}\".format(opt.W))))\n\tprint(\"warpScale: (pert) {0} (trans) {1}\".format(\n\t\tutil.toYellow(\"{0}\".format(opt.pertScale)),\n\t\tutil.toYellow(\"{0}\".format(opt.transScale))))\n\tif training:\n\t\tprint(\"[geometric predictor]    stddev={0}, lr={1}\".format(\n\t\t\tutil.toYellow(\"{0:.0e}\".format(opt.stdGP)),\n\t\t\tutil.toYellow(\"{0:.0e}\".format(opt.lrGP))))\n\t\tprint(\"[classification network] stddev={0}, lr={1}\".format(\n\t\t\tutil.toYellow(\"{0:.0e}\".format(opt.stdC)),\n\t\t\tutil.toYellow(\"{0:.0e}\".format(opt.lrC))))\n\tprint(\"------------------------------------------\")\n\tif training:\n\t\tprint(util.toMagenta(\"training model ({0}) {1}...\".format(opt.group,opt.model)))\n\n\treturn opt\n"
  },
  {
    "path": "traffic-sign-tensorflow/train.py",
    "content": "import numpy as np\r\nimport time,os,sys\r\nimport argparse\r\nimport util\r\n\r\nprint(util.toYellow(\"=======================================================\"))\r\nprint(util.toYellow(\"train.py (training on MNIST)\"))\r\nprint(util.toYellow(\"=======================================================\"))\r\n\r\nimport tensorflow as tf\r\nimport data,graph,warp,util\r\nimport options\r\n\r\nprint(util.toMagenta(\"setting configurations...\"))\r\nopt = options.set(training=True)\r\n\r\n# create directories for model output\r\nutil.mkdir(\"models_{0}\".format(opt.group))\r\n\r\nprint(util.toMagenta(\"building graph...\"))\r\ntf.reset_default_graph()\r\n# build graph\r\nwith tf.device(\"/gpu:0\"):\r\n\t# ------ define input data ------\r\n\timageFull = tf.placeholder(tf.float32,shape=[opt.batchSize,opt.fullH,opt.fullW,3])\r\n\timageMean,imageVar = tf.nn.moments(imageFull,axes=[1,2],keep_dims=True)\r\n\timageFullNormalize = (imageFull-imageMean)/tf.sqrt(imageVar)\r\n\tlabel = tf.placeholder(tf.int64,shape=[opt.batchSize])\r\n\tPH = [imageFull,label]\r\n\t# ------ generate perturbation ------\r\n\tpInit = data.genPerturbations(opt)\r\n\tpInitMtrx = warp.vec2mtrx(opt,pInit)\r\n\t# ------ build network ------\r\n\timagePert = warp.transformCropImage(opt,imageFullNormalize,pInitMtrx)\r\n\timagePertRescale = imagePert*tf.sqrt(imageVar)+imageMean\r\n\tif opt.netType==\"CNN\":\r\n\t\toutput = graph.fullCNN(opt,imagePert)\r\n\telif opt.netType==\"STN\":\r\n\t\timageWarpAll = graph.STN(opt,imagePert)\r\n\t\timageWarp = imageWarpAll[-1]\r\n\t\toutput = graph.CNN(opt,imageWarp)\r\n\t\timageWarpRescale = imageWarp*tf.sqrt(imageVar)+imageMean\r\n\telif opt.netType==\"IC-STN\":\r\n\t\timageWarpAll = graph.ICSTN(opt,imageFullNormalize,pInit)\r\n\t\timageWarp = imageWarpAll[-1]\r\n\t\toutput = graph.CNN(opt,imageWarp)\r\n\t\timageWarpRescale = imageWarp*tf.sqrt(imageVar)+imageMean\r\n\tsoftmax = tf.nn.softmax(output)\r\n\tlabelOnehot = tf.one_hot(label,opt.labelN)\r\n\tprediction = tf.equal(tf.argmax(softmax,1),label)\r\n\t# ------ define loss ------\r\n\tsoftmaxLoss = tf.nn.softmax_cross_entropy_with_logits(logits=output,labels=labelOnehot)\r\n\tloss = tf.reduce_mean(softmaxLoss)\r\n\t# ------ optimizer ------\r\n\tlrGP_PH,lrC_PH = tf.placeholder(tf.float32,shape=[]),tf.placeholder(tf.float32,shape=[])\r\n\toptim = util.setOptimizer(opt,loss,lrGP_PH,lrC_PH)\r\n\t# ------ generate summaries ------\r\n\tsummaryImageTrain = []\r\n\tsummaryImageTest = []\r\n\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\r\n\t\tfor l in range(opt.warpN+1):\r\n\t\t\tsummaryImageTrain.append(util.imageSummary(opt,imageWarpAll[l]*tf.sqrt(imageVar)+imageMean,\"TRAIN_warp{0}\".format(l),opt.H,opt.W))\r\n\t\t\tsummaryImageTest.append(util.imageSummary(opt,imageWarpAll[l]*tf.sqrt(imageVar)+imageMean,\"TEST_warp{0}\".format(l),opt.H,opt.W))\r\n\t\tsummaryImageTrain = tf.summary.merge(summaryImageTrain)\r\n\t\tsummaryImageTest = tf.summary.merge(summaryImageTest)\r\n\tsummaryLossTrain = tf.summary.scalar(\"TRAIN_loss\",loss)\r\n\ttestErrorPH = tf.placeholder(tf.float32,shape=[])\r\n\ttestImagePH = tf.placeholder(tf.float32,shape=[opt.labelN,opt.H,opt.W,3])\r\n\tsummaryErrorTest = tf.summary.scalar(\"TEST_error\",testErrorPH)\r\n\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\r\n\t\tsummaryMeanTest0 = util.imageSummaryMeanVar(opt,testImagePH,\"TEST_mean_init\",opt.H,opt.W)\r\n\t\tsummaryMeanTest1 = util.imageSummaryMeanVar(opt,testImagePH,\"TEST_mean_warped\",opt.H,opt.W)\r\n\t\tsummaryVarTest0 = util.imageSummaryMeanVar(opt,testImagePH,\"TEST_var_init\",opt.H,opt.W)\r\n\t\tsummaryVarTest1 = util.imageSummaryMeanVar(opt,testImagePH,\"TEST_var_warped\",opt.H,opt.W)\r\n\r\n# load data\r\nprint(util.toMagenta(\"loading GTSRB dataset...\"))\r\ntrainData,testData = data.loadGTSRB(opt,\"data/GTSRB.npz\")\r\n\r\n# prepare model saver/summary writer\r\nsaver = tf.train.Saver(max_to_keep=20)\r\nsummaryWriter = tf.summary.FileWriter(\"summary_{0}/{1}\".format(opt.group,opt.model))\r\n\r\nprint(util.toYellow(\"======= TRAINING START =======\"))\r\ntimeStart = time.time()\r\n# start session\r\ntfConfig = tf.ConfigProto(allow_soft_placement=True)\r\ntfConfig.gpu_options.allow_growth = True\r\nwith tf.Session(config=tfConfig) as sess:\r\n\tsess.run(tf.global_variables_initializer())\r\n\tsummaryWriter.add_graph(sess.graph)\r\n\tif opt.fromIt!=0:\r\n\t\tutil.restoreModel(opt,sess,saver,opt.fromIt)\r\n\t\tprint(util.toMagenta(\"resuming from iteration {0}...\".format(opt.fromIt)))\r\n\tprint(util.toMagenta(\"start training...\"))\r\n\r\n\t# training loop\r\n\tfor i in range(opt.fromIt,opt.toIt):\r\n\t\tlrGP = opt.lrGP*opt.lrGPdecay**(i//opt.lrGPstep)\r\n\t\tlrC = opt.lrC*opt.lrCdecay**(i//opt.lrCstep)\r\n\t\t# make training batch\r\n\t\tbatch = data.makeBatch(opt,trainData,PH)\r\n\t\tbatch[lrGP_PH] = lrGP\r\n\t\tbatch[lrC_PH] = lrC\r\n\t\t# run one step\r\n\t\t_,l = sess.run([optim,loss],feed_dict=batch)\r\n\t\tif (i+1)%100==0:\r\n\t\t\tprint(\"it. {0}/{1}  lr={3}(GP),{4}(C), loss={5}, time={2}\"\r\n\t\t\t\t.format(util.toCyan(\"{0}\".format(i+1)),\r\n\t\t\t\t\t\topt.toIt,\r\n\t\t\t\t\t\tutil.toGreen(\"{0:.2f}\".format(time.time()-timeStart)),\r\n\t\t\t\t\t\tutil.toYellow(\"{0:.0e}\".format(lrGP)),\r\n\t\t\t\t\t\tutil.toYellow(\"{0:.0e}\".format(lrC)),\r\n\t\t\t\t\t\tutil.toRed(\"{0:.4f}\".format(l))))\r\n\t\tif (i+1)%100==0:\r\n\t\t\tsummaryWriter.add_summary(sess.run(summaryLossTrain,feed_dict=batch),i+1)\r\n\t\tif (i+1)%500==0 and (opt.netType==\"STN\" or opt.netType==\"IC-STN\"):\r\n\t\t\tsummaryWriter.add_summary(sess.run(summaryImageTrain,feed_dict=batch),i+1)\r\n\t\t\tsummaryWriter.add_summary(sess.run(summaryImageTest,feed_dict=batch),i+1)\r\n\t\tif (i+1)%1000==0:\r\n\t\t\t# evaluate on test set\r\n\t\t\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\r\n\t\t\t\ttestAcc,testMean,testVar = data.evalTest(opt,sess,testData,PH,prediction,imagesEval=[imagePert,imageWarp])\r\n\t\t\telse:\r\n\t\t\t\ttestAcc,_,_ = data.evalTest(opt,sess,testData,PH,prediction)\r\n\t\t\ttestError = (1-testAcc)*100\r\n\t\t\tsummaryWriter.add_summary(sess.run(summaryErrorTest,feed_dict={testErrorPH:testError}),i+1)\r\n\t\t\tif opt.netType==\"STN\" or opt.netType==\"IC-STN\":\r\n\t\t\t\tsummaryWriter.add_summary(sess.run(summaryMeanTest0,feed_dict={testImagePH:testMean[0]}),i+1)\r\n\t\t\t\tsummaryWriter.add_summary(sess.run(summaryMeanTest1,feed_dict={testImagePH:testMean[1]}),i+1)\r\n\t\t\t\tsummaryWriter.add_summary(sess.run(summaryVarTest0,feed_dict={testImagePH:testVar[0]}),i+1)\r\n\t\t\t\tsummaryWriter.add_summary(sess.run(summaryVarTest1,feed_dict={testImagePH:testVar[1]}),i+1)\r\n\t\tif (i+1)%10000==0:\r\n\t\t\tutil.saveModel(opt,sess,saver,i+1)\r\n\t\t\tprint(util.toGreen(\"model saved: {0}/{1}, it.{2}\".format(opt.group,opt.model,i+1)))\r\n\r\nprint(util.toYellow(\"======= TRAINING DONE =======\"))\r\n"
  },
  {
    "path": "traffic-sign-tensorflow/util.py",
    "content": "import numpy as np\nimport scipy.misc\nimport tensorflow as tf\nimport os\nimport termcolor\n\ndef mkdir(path):\n\tif not os.path.exists(path): os.mkdir(path)\ndef imread(fname):\n\treturn scipy.misc.imread(fname)/255.0\ndef imsave(fname,array):\n\tscipy.misc.toimage(array,cmin=0.0,cmax=1.0).save(fname)\n\n# convert to colored strings\ndef toRed(content): return termcolor.colored(content,\"red\",attrs=[\"bold\"])\ndef toGreen(content): return termcolor.colored(content,\"green\",attrs=[\"bold\"])\ndef toBlue(content): return termcolor.colored(content,\"blue\",attrs=[\"bold\"])\ndef toCyan(content): return termcolor.colored(content,\"cyan\",attrs=[\"bold\"])\ndef toYellow(content): return termcolor.colored(content,\"yellow\",attrs=[\"bold\"])\ndef toMagenta(content): return termcolor.colored(content,\"magenta\",attrs=[\"bold\"])\n\n# make image summary from image batch\ndef imageSummary(opt,image,tag,H,W):\n\tblockSize = opt.visBlockSize\n\timageOne = tf.batch_to_space(image[:blockSize**2],crops=[[0,0],[0,0]],block_size=blockSize)\n\timagePermute = tf.reshape(imageOne,[H,blockSize,W,blockSize,-1])\n\timageTransp = tf.transpose(imagePermute,[1,0,3,2,4])\n\timageBlocks = tf.reshape(imageTransp,[1,H*blockSize,W*blockSize,-1])\n\timageBlocks = tf.cast(imageBlocks*255,tf.uint8)\n\tsummary = tf.summary.image(tag,imageBlocks)\n\treturn summary\n\n# make image summary from image batch (mean/variance)\ndef imageSummaryMeanVar(opt,image,tag,H,W):\n\timage = tf.concat([image,np.zeros([2,H,W,3])],axis=0)\n\timageOne = tf.batch_to_space_nd(image,crops=[[0,0],[0,0]],block_shape=[5,9])\n\timagePermute = tf.reshape(imageOne,[H,5,W,9,-1])\n\timageTransp = tf.transpose(imagePermute,[1,0,3,2,4])\n\timageBlocks = tf.reshape(imageTransp,[1,H*5,W*9,-1])\n\t# imageBlocks = tf.cast(imageBlocks*255,tf.uint8)\n\tsummary = tf.summary.image(tag,imageBlocks)\n\treturn summary\n\n# set optimizer for different learning rates\ndef setOptimizer(opt,loss,lrGP,lrC):\n\tvarsGP = [v for v in tf.global_variables() if \"geometric\" in v.name]\n\tvarsC = [v for v in tf.global_variables() if \"classifier\" in v.name]\n\tgradC = tf.gradients(loss,varsC)\n\toptimC = tf.train.GradientDescentOptimizer(lrC).apply_gradients(zip(gradC,varsC))\n\tif len(varsGP)>0:\n\t\tgradGP = tf.gradients(loss,varsGP)\n\t\toptimGP = tf.train.GradientDescentOptimizer(lrGP).apply_gradients(zip(gradGP,varsGP))\n\t\toptim = tf.group(optimC,optimGP)\n\telse:\n\t\toptim = optimC\n\treturn optim\n\n# restore model\ndef restoreModel(opt,sess,saver,it):\n\tsaver.restore(sess,\"models_{0}/{1}_it{2}.ckpt\".format(opt.group,opt.model,it,opt.warpN))\n# save model\ndef saveModel(opt,sess,saver,it):\n\tsaver.save(sess,\"models_{0}/{1}_it{2}.ckpt\".format(opt.group,opt.model,it,opt.warpN))\n\n"
  },
  {
    "path": "traffic-sign-tensorflow/warp.py",
    "content": "import numpy as np\nimport scipy.linalg\nimport tensorflow as tf\n\n# fit (affine) warp between two sets of points \ndef fit(Xsrc,Xdst):\n\tptsN = len(Xsrc)\n\tX,Y,U,V,O,I = Xsrc[:,0],Xsrc[:,1],Xdst[:,0],Xdst[:,1],np.zeros([ptsN]),np.ones([ptsN])\n\tA = np.concatenate((np.stack([X,Y,I,O,O,O],axis=1),\n\t\t\t\t\t\tnp.stack([O,O,O,X,Y,I],axis=1)),axis=0)\n\tb = np.concatenate((U,V),axis=0)\n\tp1,p2,p3,p4,p5,p6 = scipy.linalg.lstsq(A,b)[0].squeeze()\n\tpMtrx = np.array([[p1,p2,p3],[p4,p5,p6],[0,0,1]],dtype=np.float32)\n\treturn pMtrx\n\n# compute composition of warp parameters\ndef compose(opt,p,dp):\n\twith tf.name_scope(\"compose\"):\n\t\tpMtrx = vec2mtrx(opt,p)\n\t\tdpMtrx = vec2mtrx(opt,dp)\n\t\tpMtrxNew = tf.matmul(dpMtrx,pMtrx)\n\t\tpMtrxNew /= pMtrxNew[:,2:3,2:3]\n\t\tpNew = mtrx2vec(opt,pMtrxNew)\n\treturn pNew\n\n# compute inverse of warp parameters\ndef inverse(opt,p):\n\twith tf.name_scope(\"inverse\"):\n\t\tpMtrx = vec2mtrx(opt,p)\n\t\tpInvMtrx = tf.matrix_inverse(pMtrx)\n\t\tpInv = mtrx2vec(opt,pInvMtrx)\n\treturn pInv\n\n# convert warp parameters to matrix\ndef vec2mtrx(opt,p):\n\twith tf.name_scope(\"vec2mtrx\"):\n\t\tO = tf.zeros([opt.batchSize])\n\t\tI = tf.ones([opt.batchSize])\n\t\tif opt.warpType==\"translation\":\n\t\t\ttx,ty = tf.unstack(p,axis=1)\n\t\t\tpMtrx = tf.transpose(tf.stack([[I,O,tx],[O,I,ty],[O,O,I]]),perm=[2,0,1])\n\t\tif opt.warpType==\"similarity\":\n\t\t\tpc,ps,tx,ty = tf.unstack(p,axis=1)\n\t\t\tpMtrx = tf.transpose(tf.stack([[I+pc,-ps,tx],[ps,I+pc,ty],[O,O,I]]),perm=[2,0,1])\n\t\tif opt.warpType==\"affine\":\n\t\t\tp1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1)\n\t\t\tpMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[O,O,I]]),perm=[2,0,1])\n\t\tif opt.warpType==\"homography\":\n\t\t\tp1,p2,p3,p4,p5,p6,p7,p8 = tf.unstack(p,axis=1)\n\t\t\tpMtrx = tf.transpose(tf.stack([[I+p1,p2,p3],[p4,I+p5,p6],[p7,p8,I]]),perm=[2,0,1])\n\treturn pMtrx\n\n# convert warp matrix to parameters\ndef mtrx2vec(opt,pMtrx):\n\twith tf.name_scope(\"mtrx2vec\"):\n\t\t[row0,row1,row2] = tf.unstack(pMtrx,axis=1)\n\t\t[e00,e01,e02] = tf.unstack(row0,axis=1)\n\t\t[e10,e11,e12] = tf.unstack(row1,axis=1)\n\t\t[e20,e21,e22] = tf.unstack(row2,axis=1)\n\t\tif opt.warpType==\"translation\": p = tf.stack([e02,e12],axis=1)\n\t\tif opt.warpType==\"similarity\": p = tf.stack([e00-1,e10,e02,e12],axis=1)\n\t\tif opt.warpType==\"affine\": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12],axis=1)\n\t\tif opt.warpType==\"homography\": p = tf.stack([e00-1,e01,e02,e10,e11-1,e12,e20,e21],axis=1)\n\treturn p\n\n# warp the image\ndef transformImage(opt,image,pMtrx):\n\twith tf.name_scope(\"transformImage\"):\n\t\trefMtrx = tf.tile(tf.expand_dims(opt.refMtrx,axis=0),[opt.batchSize,1,1])\n\t\ttransMtrx = tf.matmul(refMtrx,pMtrx)\n\t\t# warp the canonical coordinates\n\t\tX,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H))\n\t\tX,Y = X.flatten(),Y.flatten()\n\t\tXYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T\n\t\tXYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32)\n\t\tXYwarpHom = tf.matmul(transMtrx,XYhom)\n\t\tXwarpHom,YwarpHom,ZwarpHom = tf.unstack(XYwarpHom,axis=1)\n\t\tXwarp = tf.reshape(XwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])\n\t\tYwarp = tf.reshape(YwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])\n\t\t# get the integer sampling coordinates\n\t\tXfloor,Xceil = tf.floor(Xwarp),tf.ceil(Xwarp)\n\t\tYfloor,Yceil = tf.floor(Ywarp),tf.ceil(Ywarp)\n\t\tXfloorInt,XceilInt = tf.to_int32(Xfloor),tf.to_int32(Xceil)\n\t\tYfloorInt,YceilInt = tf.to_int32(Yfloor),tf.to_int32(Yceil)\n\t\timageIdx = np.tile(np.arange(opt.batchSize).reshape([opt.batchSize,1,1]),[1,opt.H,opt.W])\n\t\timageVec = tf.reshape(image,[-1,int(image.shape[-1])])\n\t\timageVecOut = tf.concat([imageVec,tf.zeros([1,int(image.shape[-1])])],axis=0)\n\t\tidxUL = (imageIdx*opt.H+YfloorInt)*opt.W+XfloorInt\n\t\tidxUR = (imageIdx*opt.H+YfloorInt)*opt.W+XceilInt\n\t\tidxBL = (imageIdx*opt.H+YceilInt)*opt.W+XfloorInt\n\t\tidxBR = (imageIdx*opt.H+YceilInt)*opt.W+XceilInt\n\t\tidxOutside = tf.fill([opt.batchSize,opt.H,opt.W],opt.batchSize*opt.H*opt.W)\n\t\tdef insideImage(Xint,Yint):\n\t\t\treturn (Xint>=0)&(Xint<opt.W)&(Yint>=0)&(Yint<opt.H)\n\t\tidxUL = tf.where(insideImage(XfloorInt,YfloorInt),idxUL,idxOutside)\n\t\tidxUR = tf.where(insideImage(XceilInt,YfloorInt),idxUR,idxOutside)\n\t\tidxBL = tf.where(insideImage(XfloorInt,YceilInt),idxBL,idxOutside)\n\t\tidxBR = tf.where(insideImage(XceilInt,YceilInt),idxBR,idxOutside)\n\t\t# bilinear interpolation\n\t\tXratio = tf.reshape(Xwarp-Xfloor,[opt.batchSize,opt.H,opt.W,1])\n\t\tYratio = tf.reshape(Ywarp-Yfloor,[opt.batchSize,opt.H,opt.W,1])\n\t\timageUL = tf.to_float(tf.gather(imageVecOut,idxUL))*(1-Xratio)*(1-Yratio)\n\t\timageUR = tf.to_float(tf.gather(imageVecOut,idxUR))*(Xratio)*(1-Yratio)\n\t\timageBL = tf.to_float(tf.gather(imageVecOut,idxBL))*(1-Xratio)*(Yratio)\n\t\timageBR = tf.to_float(tf.gather(imageVecOut,idxBR))*(Xratio)*(Yratio)\n\t\timageWarp = imageUL+imageUR+imageBL+imageBR\n\treturn imageWarp\n\n# warp the image\ndef transformCropImage(opt,imageFull,pMtrx):\n\twith tf.name_scope(\"transformImage\"):\n\t\trefMtrx = tf.tile(tf.expand_dims(opt.bboxRefMtrx,axis=0),[opt.batchSize,1,1])\n\t\ttransMtrx = tf.matmul(refMtrx,pMtrx)\n\t\t# warp the canonical coordinates\n\t\tX,Y = np.meshgrid(np.linspace(-1,1,opt.W),np.linspace(-1,1,opt.H))\n\t\tX,Y = X.flatten(),Y.flatten()\n\t\tXYhom = np.stack([X,Y,np.ones_like(X)],axis=1).T\n\t\tXYhom = np.tile(XYhom,[opt.batchSize,1,1]).astype(np.float32)\n\t\tXYwarpHom = tf.matmul(transMtrx,XYhom)\n\t\tXwarpHom,YwarpHom,ZwarpHom = tf.unstack(XYwarpHom,axis=1)\n\t\tXwarp = tf.reshape(XwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])\n\t\tYwarp = tf.reshape(YwarpHom/(ZwarpHom+1e-8),[opt.batchSize,opt.H,opt.W])\n\t\t# get the integer sampling coordinates\n\t\tXfloor,Xceil = tf.floor(Xwarp),tf.ceil(Xwarp)\n\t\tYfloor,Yceil = tf.floor(Ywarp),tf.ceil(Ywarp)\n\t\tXfloorInt,XceilInt = tf.to_int32(Xfloor),tf.to_int32(Xceil)\n\t\tYfloorInt,YceilInt = tf.to_int32(Yfloor),tf.to_int32(Yceil)\n\t\timageIdx = np.tile(np.arange(opt.batchSize).reshape([opt.batchSize,1,1]),[1,opt.H,opt.W])\n\t\timageVec = tf.reshape(imageFull,[-1,int(imageFull.shape[-1])])\n\t\timageVecOut = tf.concat([imageVec,tf.zeros([1,int(imageFull.shape[-1])])],axis=0)\n\t\tidxUL = (imageIdx*opt.fullH+YfloorInt)*opt.fullW+XfloorInt\n\t\tidxUR = (imageIdx*opt.fullH+YfloorInt)*opt.fullW+XceilInt\n\t\tidxBL = (imageIdx*opt.fullH+YceilInt)*opt.fullW+XfloorInt\n\t\tidxBR = (imageIdx*opt.fullH+YceilInt)*opt.fullW+XceilInt\n\t\tidxOutside = tf.fill([opt.batchSize,opt.H,opt.W],opt.batchSize*opt.fullH*opt.fullW)\n\t\tdef insideImage(Xint,Yint):\n\t\t\treturn (Xint>=0)&(Xint<opt.fullW)&(Yint>=0)&(Yint<opt.fullH)\n\t\tidxUL = tf.where(insideImage(XfloorInt,YfloorInt),idxUL,idxOutside)\n\t\tidxUR = tf.where(insideImage(XceilInt,YfloorInt),idxUR,idxOutside)\n\t\tidxBL = tf.where(insideImage(XfloorInt,YceilInt),idxBL,idxOutside)\n\t\tidxBR = tf.where(insideImage(XceilInt,YceilInt),idxBR,idxOutside)\n\t\t# bilinear interpolation\n\t\tXratio = tf.reshape(Xwarp-Xfloor,[opt.batchSize,opt.H,opt.W,1])\n\t\tYratio = tf.reshape(Ywarp-Yfloor,[opt.batchSize,opt.H,opt.W,1])\n\t\timageUL = tf.to_float(tf.gather(imageVecOut,idxUL))*(1-Xratio)*(1-Yratio)\n\t\timageUR = tf.to_float(tf.gather(imageVecOut,idxUR))*(Xratio)*(1-Yratio)\n\t\timageBL = tf.to_float(tf.gather(imageVecOut,idxBL))*(1-Xratio)*(Yratio)\n\t\timageBR = tf.to_float(tf.gather(imageVecOut,idxBR))*(Xratio)*(Yratio)\n\t\timageWarp = imageUL+imageUR+imageBL+imageBR\n\treturn imageWarp\n"
  }
]