Repository: xcyan/nips16_PTN Branch: master Commit: bd17c6c5aa8c Files: 32 Total size: 98.4 KB Directory structure: gitextract_tsuro31j/ ├── .gitignore ├── LICENSE ├── README.md ├── demo_pretrain_singleclass.sh ├── demo_train_cnn_vol_singleclass.sh ├── demo_train_ptn_comb_singleclass.sh ├── demo_train_ptn_proj_singleclass.sh ├── download_models.sh ├── eval_models.sh ├── exp_multiclass.txt ├── exp_singleclass.txt ├── install_ptnbhwd.sh ├── prepare_data.sh ├── scripts/ │ ├── arch_PTN.lua │ ├── arch_rotatorRNN.lua │ ├── eval_quant_test.lua │ ├── train_PTN.lua │ ├── train_rotatorRNN_base.lua │ └── train_rotatorRNN_curriculum.lua └── utils/ ├── adam_v2.lua ├── data.lua ├── data_test.lua ├── data_val.lua ├── dataset.lua ├── model_utils.lua ├── testset_ptn.lua ├── trainset_ptn.lua ├── trainset_rotatorRNN_base.lua ├── trainset_rotatorRNN_curriculum.lua ├── valset_ptn.lua ├── valset_rotatorRNN_base.lua └── valset_rotatorRNN_curriculum.lua ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Folders models/ data/ # Temporal files *.t7 *.mat *.jpg *.png ================================================ FILE: LICENSE ================================================ The MIT License (MIT) Copyright (c) 2016 Xinchen Yan, Jimei Yang, Ersin Yumer, Yijie Guo and Honglak Lee Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Perspective Transformer Nets (PTN) This is the code for NIPS 2016 paper [Perspective Transformer Nets: Learning Single-View 3D Object Reconstruction without 3D Supervision](https://papers.nips.cc/paper/6206-perspective-transformer-nets-learning-single-view-3d-object-reconstruction-without-3d-supervision.pdf) by Xinchen Yan, Jimei Yang, Ersin Yumer, Yijie Guo and Honglak Lee

Please follow the instructions to run the code. ## Requirements PTN requires or works with * Mac OS X or Linux * NVIDIA GPU ## Installing Dependency * Install [Torch](http://torch.ch) * Install [Mattorch](https://github.com/clementfarabet/lua---mattorch) * Install [Perspective Transformer Layer](https://github.com/xcyan/ptnbhwd.git) The following command installs the Perspective Transformer Layer: ``` ./install_ptnbhwd.sh ``` ## Dataset Downloading * Please run the command to download the pre-processed dataset (including rendered 2D views and 3D volumes): ``` ./prepare_data.sh ``` * Disclaimer: Please cite the [ShapeNet paper](https://arxiv.org/pdf/1512.03012.pdf) as well. ## Pre-trained Models Downloading (single-class experiment) PTN-Proj: ptn_proj.t7 PTN-Comb: ptn_comb.t7 CNN-Vol: cnn_vol.t7 * The following command downloads the pre-trained models: ``` ./download_models.sh ``` ## Testing using Pre-trained Models (single-class experiment) * The following command evaluates the pre-trained models: ``` ./eval_models.sh ``` ## Training (single-class experiment) * If you want to pre-train the view-point indepedent image encoder on single-class, please run the following command. Note that the pre-training could take a few days on a single TITAN X GPU. ``` ./demo_pretrain_singleclass.sh ``` * If you want to train PTN-Proj (unsupervised) on single-class based on pre-trained encoder, please run the command. ``` ./demo_train_ptn_proj_singleclass.sh ``` * If you want to train PTN-Comb (3D supervision) on single-class based on pre-trained encoder, please run the command. ``` ./demo_train_ptn_comb_singleclass.sh ``` * If you want to train CNN-Vol (3D supervision) on single-class based on pre-trained encoder, please run the command. ``` ./demo_train_cnn_vol_singleclass.sh ``` ## Using your own camera * In many cases, you want to implement your own camera matrix (e.g., intrinsic or extrinsic). Please feel free to modify [this function](https://github.com/xcyan/nips16_PTN/blob/master/scripts/train_PTN.lua#L207). * Before start your own implementation, we recommand to go through some basic camera geometry in [this computer vision textbook](http://szeliski.org/Book/drafts/SzeliskiBook_20100903_draft.pdf) written by Richard Szeliski (see Eq 2.59 at Page 53). * Note that in our voxel ray-tracing implementation, we used the inverse camera matrix. ## Third-party Implementation Besides our torch implementation, we recommend to see also the following third-party re-implementation: * [TensorFlow Implementation](https://github.com/tensorflow/models/tree/archive/research/ptn): This re-implementation was developed during Xinchen's Google internship; If you find a bug, please file a bug including @xcyan. ## Citation If you find this useful, please cite our work as follows: ``` @incollection{NIPS2016_6206, title = {Perspective Transformer Nets: Learning Single-View 3D Object Reconstruction without 3D Supervision}, author = {Yan, Xinchen and Yang, Jimei and Yumer, Ersin and Guo, Yijie and Lee, Honglak}, booktitle = {Advances in Neural Information Processing Systems 29}, editor = {D. D. Lee and M. Sugiyama and U. V. Luxburg and I. Guyon and R. Garnett}, pages = {1696--1704}, year = {2016}, publisher = {Curran Associates, Inc.}, url = {http://papers.nips.cc/paper/6206-perspective-transformer-nets-learning-single-view-3d-object-reconstruction-without-3d-supervision.pdf} } ``` ================================================ FILE: demo_pretrain_singleclass.sh ================================================ mkdir ./models th scripts/train_rotatorRNN_base.lua --gpu 1 --niter 160 --save_every 40 th scripts/train_rotatorRNN_curriculum.lua --gpu 1 --kstep 2 --batch_size 32 --adam 1 --niter 40 --save_every 20 th scripts/train_rotatorRNN_curriculum.lua --gpu 1 --kstep 4 --batch_size 32 --adam 2 --niter 40 --save_every 20 th scripts/train_rotatorRNN_curriculum.lua --gpu 1 --kstep 8 --batch_size 32 --adam 2 --niter 40 --save_every 20 th scripts/train_rotatorRNN_curriculum.lua --gpu 1 --kstep 12 --batch_size 16 --adam 2 --niter 40 --save_every 20 th scripts/train_rotatorRNN_curriculum.lua --gpu 1 --kstep 16 --batch_size 8 --adam 2 --niter 40 --save_every 20 ================================================ FILE: demo_train_cnn_vol_singleclass.sh ================================================ mkdir ./models th scripts/train_PTN.lua --gpu 3 --niter 100 --lambda_vox 1 --lambda_msk 0 ================================================ FILE: demo_train_ptn_comb_singleclass.sh ================================================ mkdir ./models th scripts/train_PTN.lua --gpu 2 --niter 100 --lambda_vox 1 --lambda_msk 1 ================================================ FILE: demo_train_ptn_proj_singleclass.sh ================================================ mkdir ./models th scripts/train_PTN.lua --gpu 1 --niter 100 --lambda_vox 0 --lambda_msk 1 ================================================ FILE: download_models.sh ================================================ mkdir ./models wget -O models/ptn_proj.t7 https://umich.box.com/shared/static/5z5oci8lwdhzszf88s5azbeq1nsa15ad.t7 wget -O models/ptn_comb.t7 https://umich.box.com/shared/static/pq7axx8ypva8c0e7jm7ajbfsm972d1de.t7 wget -O models/cnn_vol.t7 https://umich.box.com/shared/static/2ur48aj419slj8cwkeobubhwnvripc0s.t7 ================================================ FILE: eval_models.sh ================================================ th scripts/eval_quant_test.lua --gpu 1 ================================================ FILE: exp_multiclass.txt ================================================ 02691156 02828884 02933112 02958343 03001627 03211117 03636649 03691459 04090263 04256520 04379243 04401088 04530566 ================================================ FILE: exp_singleclass.txt ================================================ 03001627 ================================================ FILE: install_ptnbhwd.sh ================================================ mkdir pt_layer git clone https://github.com/xcyan/ptnbhwd.git pt_layer cd pt_layer luarocks make ptnbhwd-scm-1.rockspec cd .. ================================================ FILE: prepare_data.sh ================================================ mkdir ./data wget -O data/all_ids.tar.gz https://umich.box.com/shared/static/4m1mr6aud793gwi7jn266t32w83ml25l.gz wget -O data/all_viewdata.tar.gz https://umich.box.com/shared/static/ckvihxh4berjzcgd3s8aiu87aurv3gms.gz wget -O data/all_voxdata.tar.gz https://umich.box.com/shared/static/bwyx8qsby2f38ju1ybcrp50a1q9uzenu.gz tar xf data/all_ids.tar.gz -C data/ echo "It may take a while, please be patient." tar xf data/all_viewdata.tar.gz -C data/ tar xf data/all_voxdata.tar.gz -C data/ ================================================ FILE: scripts/arch_PTN.lua ================================================ local PTN = {} function PTN.create(opt) local encoder = PTN.create_encoder(opt) local voxel_dec = PTN.create_voxel_dec(opt) local projector = PTN.create_projector(opt) return encoder, voxel_dec, projector end function PTN.create_encoder(opt) local encoder = nn.Sequential() -- 64 x 64 x 3 --> 32 x 32 x 64 encoder:add(nn.SpatialConvolution(3, 64, 5, 5, 2, 2, 2, 2)) encoder:add(nn.ReLU()) -- 32 x 32 x 64 --> 16 x 16 x 128 encoder:add(nn.SpatialConvolution(64, 128, 5, 5, 2, 2, 2, 2)) encoder:add(nn.ReLU()) -- 16 x 16 x 128 --> 8 x 8 x 256 encoder:add(nn.SpatialConvolution(128, 256, 5, 5, 2, 2, 2, 2)) encoder:add(nn.ReLU()) -- 8 x 8 x 256 --> 1024 encoder:add(nn.Reshape(8*8*256)) encoder:add(nn.Linear(8*8*256, 1024)) encoder:add(nn.ReLU()) -- 1024 --> 1024 encoder:add(nn.Linear(1024, 1024)) encoder:add(nn.ReLU()) -- identity unit local eid = nn.Sequential() eid:add(nn.Linear(1024, opt.nz)) eid:add(nn.ReLU()) -- viewpoint unit local erot = nn.Sequential() erot:add(nn.Linear(1024, opt.nz)) erot:add(nn.ReLU()) encoder:add(nn.ConcatTable():add(eid):add(erot)) return encoder end function PTN.create_voxel_dec(opt) local voxel_dec = nn.Sequential() voxel_dec:add(nn.Linear(opt.nz, 3*3*3*512)) voxel_dec:add(nn.ReLU()) voxel_dec:add(nn.Reshape(512, 3, 3, 3)) -- 512 x 3 x 3 x 3 --> 256 x 6 x 6 x 6 voxel_dec:add(nn.VolumetricFullConvolution(512, 256, 4, 4, 4, 1, 1, 1, 0, 0, 0)) voxel_dec:add(nn.ReLU()) -- 256 x 6 x 6 x 6 --> 96 x 15 x 15 x 15 voxel_dec:add(nn.VolumetricFullConvolution(256, 96, 5, 5, 5, 2, 2, 2, 0, 0, 0)) voxel_dec:add(nn.ReLU()) -- 96 x 15 x 15 x 15 --> 1 x 32 x 32 x 32 voxel_dec:add(nn.VolumetricFullConvolution(96, 1, 6, 6, 6, 2, 2, 2, 1, 1, 1)) voxel_dec:add(nn.Sigmoid()) return voxel_dec end function PTN.create_projector(opt) local grid_stream = nn.PerspectiveGridGenerator(opt.vox_size, opt.vox_size, opt.vox_size, opt.focal_length) local input_stream = nn.Transpose({2,4},{4,5}) local projector = nn.Sequential() projector:add(nn.ParallelTable():add(input_stream):add(grid_stream)) projector:add(nn.BilinearSamplerPerspective(opt.focal_length)) projector:add(nn.Transpose({4,5}, {2,4})) -- B x c x Dim1 x Dim2 x Dim3 projector:add(nn.Max(4)) return projector end return PTN ================================================ FILE: scripts/arch_rotatorRNN.lua ================================================ local rotatorRNN = {} function rotatorRNN.create(opt) local encoder = rotatorRNN.create_encoder(opt) local actor = rotatorRNN.create_actor(opt) local mixer = rotatorRNN.create_mixer(opt) local decoder_msk = rotatorRNN.create_decoder_msk(opt) local decoder_im = rotatorRNN.create_decoder_im(opt) return encoder, actor, mixer, decoder_msk, decoder_im end function rotatorRNN.create_encoder(opt) local encoder = nn.Sequential() -- 64 x 64 x 3 --> 32 x 32 x 64 encoder:add(nn.SpatialConvolution(3, 64, 5, 5, 2, 2, 2, 2)) encoder:add(nn.ReLU()) -- 32 x 32 x 64 --> 16 x 16 x 128 encoder:add(nn.SpatialConvolution(64, 128, 5, 5, 2, 2, 2, 2)) encoder:add(nn.ReLU()) -- 16 x 16 x 128 --> 8 x 8 x 256 encoder:add(nn.SpatialConvolution(128, 256, 5, 5, 2, 2, 2, 2)) encoder:add(nn.ReLU()) -- 8 x 8 x 256 --> 1024 encoder:add(nn.Reshape(8*8*256)) encoder:add(nn.Linear(8*8*256, 1024)) encoder:add(nn.ReLU()) -- 1024 --> 1024 encoder:add(nn.Linear(1024, 1024)) encoder:add(nn.ReLU()) -- identity unit local eid = nn.Sequential() eid:add(nn.Linear(1024, opt.nz)) eid:add(nn.ReLU()) -- viewpoint unit local erot = nn.Sequential() erot:add(nn.Linear(1024, opt.nz)) erot:add(nn.ReLU()) encoder:add(nn.ConcatTable():add(eid):add(erot)) return encoder end function rotatorRNN.create_actor(opt) -- h1, a --> h2 local actor = nn.Sequential() actor:add(nn.Bilinear(opt.nz, opt.na, opt.nz)) actor:add(nn.ReLU()) return actor end function rotatorRNN.create_mixer(opt) local mixer = nn.Sequential() mixer:add(nn.JoinTable(2)) mixer:add(nn.Linear(opt.nz*2, 1024)) mixer:add(nn.ReLU()) mixer:add(nn.Linear(1024, 1024)) mixer:add(nn.ReLU()) return mixer end function rotatorRNN.create_decoder_msk(opt) local decoderM = nn.Sequential() -- 1024 --> 8 x 8 x 128 decoderM:add(nn.Linear(1024, 8*8*128)) decoderM:add(nn.ReLU()) decoderM:add(nn.Reshape(128, 8, 8)) -- 8 x 8 x 128 --> 16 x 16 x 64 decoderM:add(nn.SpatialUpSamplingNearest(2)) decoderM:add(nn.SpatialConvolution(128, 64, 5, 5, 1, 1, 2, 2)) decoderM:add(nn.ReLU()) -- 16 x 16 x 64 --> 32 x 32 x 32 decoderM:add(nn.SpatialUpSamplingNearest(2)) decoderM:add(nn.SpatialConvolution(64, 32, 5, 5, 1, 1, 2, 2)) decoderM:add(nn.ReLU()) -- 32 x 32 x 32 --> 64 x 64 x 1 decoderM:add(nn.SpatialUpSamplingNearest(2)) decoderM:add(nn.SpatialConvolution(32, 1, 5, 5, 1, 1, 2, 2)) decoderM:add(nn.Sigmoid()) return decoderM end function rotatorRNN.create_decoder_im(opt) local decoderI = nn.Sequential() -- 1024 --> 8 x 8 x 256 decoderI:add(nn.Linear(1024, 8*8*256)) decoderI:add(nn.ReLU()) decoderI:add(nn.Reshape(256, 8, 8)) -- 8 x 8 x 256 --> 16 x 16 x 128 decoderI:add(nn.SpatialUpSamplingNearest(2)) decoderI:add(nn.SpatialConvolution(256, 128, 5, 5, 1, 1, 2, 2)) decoderI:add(nn.ReLU()) -- 16 x 16 x 128 --> 32 x 32 x 64 decoderI:add(nn.SpatialUpSamplingNearest(2)) decoderI:add(nn.SpatialConvolution(128, 64, 5, 5, 1, 1, 2, 2)) decoderI:add(nn.ReLU()) -- 32 x 32 x 64 --> 64 x 64 x 3 decoderI:add(nn.SpatialUpSamplingNearest(2)) decoderI:add(nn.SpatialConvolution(64, 3, 5, 5, 1, 1, 2, 2)) decoderI:add(nn.Tanh()) return decoderI end return rotatorRNN ================================================ FILE: scripts/eval_quant_test.lua ================================================ require 'torch' require 'nn' require 'cunn' require 'cudnn' require 'ptn' require 'nngraph' require 'optim' require 'image' require 'mattorch' model_utils = require 'utils.model_utils' optim_utils = require 'utils.adam_v2' opt = lapp[[ --save_every (default 20) --print_every (default 1) --data_root (default 'data') --data_id_path (default 'data/shapenetcore_ids') --data_view_path (default 'data/shapenetcore_viewdata') --data_vox_path (default 'data/shapenetcore_voxdata') --dataset (default 'dataset_ptn') --gpu (default 0) --use_cudnn (default 1) --nz (default 512) --na (default 3) --nview (default 24) --nThreads (default 1) --niter (default 100) --display (default 1) --checkpoint_dir (default 'models/') --kstep (default 24) --batch_size (default 6) --adam (default 1) --arch_name (default 'arch_PTN') --weight_decay (default 0.001) --exp_list (default 'singleclass') --load_size (default 64) --vox_size (default 32) --thresh (default 0.5) ]] opt.focal_length = math.sqrt(3)/2 opt.ntrain = math.huge for k,v in pairs(opt) do opt[k] = tonumber(os.getenv(k)) or os.getenv(k) or opt[k] end print(opt) if opt.display == 0 then opt.display = false end if opt.gpu > 0 then ok, cunn = pcall(require, 'cunn') ok2, cutorch = pcall(require, 'cutorch') cutorch.setDevice(opt.gpu) end opt.manualSeed = torch.random(1, 10000) -- fix seed print("Random Seed: " .. opt.manualSeed) torch.manualSeed(opt.manualSeed) torch.setnumthreads(1) torch.setdefaulttensortype('torch.FloatTensor') -- create data loader local TestLoader = require('utils/data_test.lua') base_loader = torch.load(opt.checkpoint_dir .. 'cnn_vol.t7') encoder = base_loader.encoder base_voxel_dec = base_loader.voxel_dec unsup_loader = torch.load(opt.checkpoint_dir .. 'ptn_proj.t7') unsup_voxel_dec = unsup_loader.voxel_dec sup_loader = torch.load(opt.checkpoint_dir .. 'ptn_comb.t7') sup_voxel_dec = sup_loader.voxel_dec collectgarbage() local criterion_vox = nn.MSECriterion() criterion_vox.sizeAverage = false ---------------------------------------------- local batch_im_in = torch.Tensor(opt.batch_size * opt.kstep, 3, opt.load_size, opt.load_size) local batch_vox = torch.Tensor(opt.batch_size * opt.kstep, 1, opt.vox_size, opt.vox_size, opt.vox_size) local epoch_tm = torch.Timer() local tm = torch.Timer() local data_tm = torch.Timer() if opt.gpu > 0 then batch_im_in = batch_im_in:cuda() batch_vox = batch_vox:cuda() encoder:cuda() base_voxel_dec:cuda() unsup_voxel_dec:cuda() sup_voxel_dec:cuda() criterion_vox:cuda() end paramEnc, gradEnc = encoder:getParameters() base_params, base_grads = base_voxel_dec:getParameters() unsup_params, unsup_grads = unsup_voxel_dec:getParameters() sup_params, sup_grads = sup_voxel_dec:getParameters() encoder:evaluate() base_voxel_dec:evaluate() sup_voxel_dec:evaluate() unsup_voxel_dec:evaluate() --LIST = {'airplane', 'bench', 'dresser', 'car', 'chair', 'display', 'lamp', 'loudspeaker', 'rifle', 'sofa', 'table', 'telephone', 'vessel'} LIST = {'chair'} for category_idx = 1, #LIST do -- load data opt.eval_list = LIST[category_idx] local data = TestLoader.new(opt.nThreads, opt.dataset, opt) local base_iouVOX = 0 local sup_iouVOX = 0 local unsup_iouVOX = 0 for i = 1, data:size() / opt.batch_size do xlua.progress(i, math.floor(data:size() / opt.batch_size)) tm:reset() base_grads:zero() unsup_grads:zero() sup_grads:zero() gradEnc:zero() -- data_tm:reset(); data_tm:resume() cur_ims, cur_vox, _ = data:getBatch() data_tm:stop() for m = 1, opt.batch_size do for k = 1, opt.kstep do local rng_rot = math.random(2) local delta if rng_rot == 1 then delta = -1 elseif rng_rot == 2 then delta = 1 end batch_im_in[(m-1)*opt.kstep+k]:copy(cur_ims[m][k]:mul(2):add(-1)) batch_vox[(m-1)*opt.kstep+k]:copy(cur_vox[m]) end end local f_id = encoder:forward(batch_im_in)[1]:clone() local f_base_vox = base_voxel_dec:forward(f_id) local f_unsup_vox = unsup_voxel_dec:forward(f_id) local f_sup_vox = sup_voxel_dec:forward(f_id) local base_fg_thresh = torch.gt(f_base_vox, opt.thresh):double() local base_area_intersc = torch.cmul(base_fg_thresh, batch_vox:double()) local base_area_union = (base_fg_thresh+batch_vox:double()):gt(0.9) local sup_fg_thresh = torch.gt(f_sup_vox, opt.thresh):double() local sup_area_intersc = torch.cmul(sup_fg_thresh, batch_vox:double()) local sup_area_union = (sup_fg_thresh+batch_vox:double()):gt(0.9) local unsup_fg_thresh = torch.gt(f_unsup_vox, opt.thresh):double() local unsup_area_intersc = torch.cmul(unsup_fg_thresh, batch_vox:double()) local unsup_area_union = (unsup_fg_thresh+batch_vox:double()):gt(0.9) for m = 1, opt.batch_size do for k = 1, opt.kstep do local base_curIOU = base_area_intersc[(m-1)*opt.kstep+k]:sum() / base_area_union[(m-1)*opt.kstep+k]:sum() local sup_curIOU = sup_area_intersc[(m-1)*opt.kstep+k]:sum() / sup_area_union[(m-1)*opt.kstep+k]:sum() local unsup_curIOU = unsup_area_intersc[(m-1)*opt.kstep+k]:sum() / unsup_area_union[(m-1)*opt.kstep+k]:sum() base_iouVOX = base_iouVOX + base_curIOU sup_iouVOX = sup_iouVOX + sup_curIOU unsup_iouVOX = unsup_iouVOX + unsup_curIOU --print(string.format('[%d, %d]: %.4f', m, k, unsup_curIOU)) end end end local dataSize = math.floor(data:size() / opt.batch_size) * opt.batch_size base_iouVOX = base_iouVOX / (dataSize * opt.kstep) sup_iouVOX = sup_iouVOX / (dataSize * opt.kstep) unsup_iouVOX = unsup_iouVOX / (dataSize * opt.kstep) print(string.format('cat [%s]:\tCNN-VOL IOU = %g\tPTN-COMB IOU = %g\tPTN-PROJ IOU = %g', LIST[category_idx], base_iouVOX, sup_iouVOX, unsup_iouVOX)) end -------------------------------------------------- ================================================ FILE: scripts/train_PTN.lua ================================================ require 'torch' require 'nn' require 'cunn' --require 'cudnn' require 'ptn' require 'nngraph' require 'optim' require 'image' require 'mattorch' model_utils = require 'utils.model_utils' optim_utils = require 'utils.adam_v2' opt = lapp[[ --save_every (default 20) --print_every (default 1) --data_root (default 'data') --data_id_path (default 'data/shapenetcore_ids') --data_view_path (default 'data/shapenetcore_viewdata') --data_vox_path (default 'data/shapenetcore_voxdata') --dataset (default 'dataset_ptn') --gpu (default 0) --use_cudnn (default 1) --nz (default 512) --na (default 3) --nview (default 24) --nThreads (default 4) --niter (default 100) --display (default 1) --checkpoint_dir (default 'models/') --lambda_msk (default 1) --lambda_vox (default 0) --kstep (default 24) --batch_size (default 6) --adam (default 1) --arch_name (default 'arch_PTN') --weight_decay (default 0.001) --exp_list (default 'singleclass') --load_size (default 64) --vox_size (default 32) ]] opt.focal_length = math.sqrt(3)/2 opt.ntrain = math.huge for k,v in pairs(opt) do opt[k] = tonumber(os.getenv(k)) or os.getenv(k) or opt[k] end print(opt) if opt.display == 0 then opt.display = false end if opt.gpu > 0 then ok, cunn = pcall(require, 'cunn') ok2, cutorch = pcall(require, 'cutorch') cutorch.setDevice(opt.gpu) end opt.manualSeed = torch.random(1, 10000) -- fix seed print("Random Seed: " .. opt.manualSeed) torch.manualSeed(opt.manualSeed) torch.setnumthreads(1) torch.setdefaulttensortype('torch.FloatTensor') -- create data loader local TrainLoader = require('utils/data.lua') local ValLoader = require('utils/data_val.lua') local data = TrainLoader.new(opt.nThreads, opt.dataset, opt) local data_val = ValLoader.new(opt.nThreads, opt.dataset, opt) print("Dataset: " .. opt.dataset, "train_size: ", data:size(), "val_size: ", data_val:size()) local function weights_init(m) local name = torch.type(m) if name:find('Convolution') and name:find('Spatial') then local nin = m.nInputPlane*m.kH*m.kW m.weight:uniform(-0.08, 0.08):mul(math.sqrt(1/nin)) m.bias:fill(0) elseif name:find('Convolution') and name:find('Volumeric') then local nin = m.nInputPlane*m.kT*m.kH*m.kW m.weight:uniform(-0.08, 0.08):mul(math.sqrt(1/nin)) m.bias:fill(0) elseif name:find('Linear') then local nin = m.weight:size(2) m.weight:uniform(-0.08, 0.08):mul(math.sqrt(1/nin)) m.bias:fill(0) elseif name:find('BatchNormalization') then if m.weight then m.weight:normal(1.0, 0.02) end if m.bias then m.bias:fill(0) end end end opt.model_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg(%g,%g)_ks%d_vs%d', opt.arch_name, opt.exp_list, opt.nview, opt.adam, opt.batch_size, opt.nz, opt.weight_decay, opt.lambda_msk, opt.lambda_vox, opt.kstep, opt.vox_size) -- initialize parameters init_models = dofile('scripts/' .. opt.arch_name .. '.lua') encoder, voxel_dec, projector = init_models.create(opt) encoder:apply(weights_init) voxel_dec:apply(weights_init) projector:apply(weights_init) opt.model_path = opt.checkpoint_dir .. opt.model_name if not paths.dirp(opt.model_path) then paths.mkdir(opt.model_path) end -- load encoder from RNN-16 if opt.exp_list == 'singleclass' then opt.basemodel_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', 'arch_rotatorRNN', opt.exp_list, opt.nview, 2, 8, opt.nz, opt.weight_decay, 10, 16) opt.basemodel_epoch = 20 --[[elseif opt.exp_list == 'multiclass' then opt.basemodel_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', 'rotatorRNN1_64', opt.exp_list, opt.nview, 2, 8, opt.nz, opt.weight_decay, 10, 16) opt.basemodel_epoch = 20]] loader = torch.load(opt.checkpoint_dir .. opt.basemodel_name .. string.format('/net-epoch-%d.t7', opt.basemodel_epoch)) encoder = loader.encoder end collectgarbage() -- load model from previos iterations prev_iter = 0 for i = opt.niter, 1, -opt.save_every do print(opt.model_path .. string.format('/net-epoch-%d.t7', i)) if paths.filep(opt.model_path .. string.format('/net-epoch-%d.t7', i)) then prev_iter = i loader = torch.load(opt.model_path .. string.format('/net-epoch-%d.t7', i)) state = torch.load(opt.model_path .. '/state.t7') print(string.format('resuming from epoch %d', i)) break end end -- build nngraph if prev_iter > 0 then encoder = loader.encoder voxel_dec = loader.voxel_dec projector = loader.projector end -- criterion local criterion_vox = nn.MSECriterion() criterion_vox.sizeAverage = false local criterion_msk = nn.MSECriterion() criterion_msk.sizeAverage = false -- hyperparams function getAdamParams(opt) config = {} if opt.adam == 1 then config.learningRate = 0.0001 config.epsilon = 1e-8 config.beta1 = 0.9 config.beta2 = 0.999 config.weightDecay = opt.weight_decay elseif opt.adam == 2 then config.learningRate = 0.1 config.epsilon = 1e-8 config.beta1 = 0.5 config.beta2 = 0.999 config.weightDecay = opt.weight_decay end return config end config = getAdamParams(opt) print(config) ------------------------------------------- local batch_im_in = torch.Tensor(opt.batch_size, 3, opt.load_size, opt.load_size) local batch_feat = torch.Tensor(opt.batch_size * opt.kstep, opt.nz) local batch_vox = torch.Tensor(opt.batch_size * opt.kstep, 1, opt.vox_size, opt.vox_size, opt.vox_size) local batch_proj = torch.Tensor(opt.batch_size * opt.kstep, 1, opt.vox_size, opt.vox_size) local batch_trans = torch.Tensor(opt.batch_size * opt.kstep, 4, 4) local tmp_gt_im = torch.Tensor(opt.batch_size, 3, opt.load_size, opt.load_size) local tmp_pred_proj = torch.Tensor(opt.batch_size, 1, opt.vox_size, opt.vox_size) local tmp_gt_proj = torch.Tensor(opt.batch_size, 1, opt.vox_size, opt.vox_size) local errVOX, errMSK local epoch_tm = torch.Timer() local tm = torch.Timer() local data_tm = torch.Timer() -------------------------------------------- if opt.gpu > 0 then batch_im_in = batch_im_in:cuda() batch_feat = batch_feat:cuda() batch_vox = batch_vox:cuda() batch_proj = batch_proj:cuda() batch_trans = batch_trans:cuda() encoder:cuda() voxel_dec:cuda() projector:cuda() criterion_vox:cuda() criterion_msk:cuda() end params, grads = voxel_dec:getParameters() paramEnc, gradEnc = encoder:getParameters() paramProj, gradProj = projector:getParameters() -- perspective projection -------------------------------------------------- local specify_pers_transformation = function(phi, theta, focal_length) local T = torch.Tensor(4, 4):zero() local K = torch.Tensor(4, 4):eye(4) local E = torch.Tensor(4, 4):eye(4) local sin_phi = math.sin(phi*math.pi/180.0) local cos_phi = math.cos(phi*math.pi/180.0) local sin_theta = math.sin((-theta)*math.pi/180.0) local cos_theta = math.cos((-theta)*math.pi/180.0) -- rotation axis -- z R_azi = torch.Tensor(3, 3):zero() R_azi[1][1] = cos_theta R_azi[3][3] = cos_theta R_azi[1][3] = -sin_theta R_azi[3][1] = sin_theta R_azi[2][2] = 1 -- rotation axis -- x R_ele = torch.Tensor(3, 3):zero() R_ele[1][1] = cos_phi R_ele[1][2] = sin_phi R_ele[2][1] = -sin_phi R_ele[2][2] = cos_phi R_ele[3][3] = 1 R_comb = R_azi * R_ele local colR = torch.Tensor(3,1):zero() --local focal_length = math.sqrt(3)/2 colR[1][1] = opt.focal_length + math.sqrt(1)/2 colR = R_comb * colR E[{{1,3}, {1,3}}] = R_comb:clone() E[{{1,3}, {4}}] = -colR:clone() K[3][3] = 1/opt.focal_length K[2][2] = 1/opt.focal_length T = E * K return T end local getTransMatrix = function(vid) local T = specify_pers_transformation(30, vid*15, opt.focal_length) return T end -------------------------------------------------- local opfunc = function(x) collectgarbage() if x ~= params then params:copy(x) end grads:zero() -- train data_tm:reset(); data_tm:resume() cur_train_ims, cur_train_vox, _ = data:getBatch() data_tm:stop() for m = 1, opt.batch_size do local view_in = torch.random(opt.nview) local rng_rot = math.random(2) local delta if rng_rot == 1 then delta = -1 elseif rng_rot == 2 then delta = 1 end batch_im_in[m]:copy(cur_train_ims[m][view_in]:mul(2):add(-1)) local view_out = view_in for k = 1, opt.kstep do view_out = view_out + delta if view_out > opt.nview then view_out = 1 end if view_out < 1 then view_out = opt.nview end batch_vox[(m-1)*opt.kstep+k]:copy(cur_train_vox[m]) batch_trans[(m-1)*opt.kstep+k]:copy(getTransMatrix(view_out)) end end gradEnc:zero() local f_id = encoder:forward(batch_im_in)[1]:clone() for m = 1, opt.batch_size do for k = 1, opt.kstep do batch_feat[(m-1)*opt.kstep+k]:copy(f_id[m]) end end gradProj:zero() batch_proj = projector:forward({batch_vox, batch_trans}):clone() local f_vox = voxel_dec:forward(batch_feat) local f_proj = projector:forward({f_vox, batch_trans}) errVOX = criterion_vox:forward(f_vox, batch_vox) / (2 * opt.batch_size * opt.kstep) local df_dVOX = criterion_vox:backward(f_vox, batch_vox):mul(opt.lambda_vox):div(2 * opt.batch_size * opt.kstep) errMSK = criterion_msk:forward(f_proj, batch_proj) / (2 * opt.batch_size * opt.kstep) local df_dMSK = criterion_msk:backward(f_proj, batch_proj):mul(opt.lambda_msk):div(2 * opt.batch_size * opt.kstep) local df_dproj = projector:backward({f_vox, batch_trans}, df_dMSK) local df_dvox = voxel_dec:backward(batch_feat, df_dproj[1]:clone() + df_dVOX:clone()) local err = errVOX * opt.lambda_vox + errMSK * opt.lambda_msk return err, grads end -------------------------------------------------------- local feedforward = function(x) collectgarbage() if x ~= params then params:copy(x) end grads:zero() -- val data_tm:reset(); data_tm:resume() cur_ims, cur_vox, _ = data_val:getBatch() data_tm:stop() for m = 1, opt.batch_size do local view_in = torch.random(opt.nview) local rng_rot = math.random(2) local delta if rng_rot == 1 then delta = -1 elseif rng_rot == 2 then delta = 1 end batch_im_in[m]:copy(cur_ims[m][view_in]:mul(2):add(-1)) local view_out = view_in for k = 1, opt.kstep do view_out = view_out + delta if view_out > opt.nview then view_out = 1 end if view_out < 1 then view_out = opt.nview end batch_trans[(m-1)*opt.kstep+k]:copy(getTransMatrix(view_out)) batch_vox[(m-1)*opt.kstep+k]:copy(cur_vox[m]) end end gradEnc:zero() local f_id = encoder:forward(batch_im_in)[1]:clone() for m = 1, opt.batch_size do for k = 1, opt.kstep do batch_feat[(m-1)*opt.kstep+k]:copy(f_id[m]) end end gradProj:zero() batch_proj = projector:forward({batch_vox, batch_trans}):clone() local f_vox = voxel_dec:forward(batch_feat) local f_proj = projector:forward({f_vox, batch_trans}):clone() errVOX = criterion_vox:forward(f_vox, batch_vox) / (2 * opt.batch_size * opt.kstep) errMSK = criterion_msk:forward(f_proj, batch_proj) / (2 * opt.batch_size * opt.kstep) for m = 1, opt.batch_size do k = torch.random(opt.kstep) tmp_gt_im[m] = batch_im_in[m]:float():clone() tmp_pred_proj[m] = f_proj[(m-1)*opt.kstep+k]:float():clone() tmp_gt_proj[m] = batch_proj[(m-1)*opt.kstep+k]:float():clone() end local err = errVOX * opt.lambda_vox + errMSK * opt.lambda_msk end ------------------------------------------------------------ encoder:evaluate() -- train & val for epoch = prev_iter + 1, opt.niter do epoch_tm:reset() local counter = 0 -- train voxel_dec:training() projector:training() for i = 1, math.min(data:size() / (opt.batch_size), opt.ntrain) do tm:reset() optim_utils.adam_v2(opfunc, params, config, state) counter = counter + 1 print(string.format('Epoch: [%d][%8d / %8d]\t Time: %.3f DataTime: %.3f ' .. ' Err_Vox: %.4f, Err_Msk: %.4f', epoch, i-1, math.min(data:size() / (opt.batch_size), opt.ntrain), tm:time().real, data_tm:time().real, errVOX and errVOX or -1, errMSK and errMSK or -1)) end -- val voxel_dec:evaluate() projector:evaluate() --for i = 1, 1 do tm:reset() local to_plot = {} for i = 1, 24 / opt.batch_size do local err = feedforward(params) for j = 1, opt.batch_size do local res = tmp_gt_im[j]:float():clone() res = torch.squeeze(res) res:add(1):mul(0.5) to_plot[#to_plot+1] = res:clone() local res = tmp_pred_proj[j]:float():clone() res = torch.squeeze(res) res = res:repeatTensor(3, 1, 1) res = image.vflip(res) res = image.scale(res, opt.load_size, opt.load_size) res:mul(-1):add(1) to_plot[#to_plot+1] = res:clone() local res = tmp_gt_proj[j]:float():clone() res = torch.squeeze(res) res = res:repeatTensor(3, 1, 1) res = image.vflip(res) res = image.scale(res, opt.load_size, opt.load_size) res:mul(-1):add(1) to_plot[#to_plot+1] = res:clone() end end local formatted = image.toDisplayTensor({input=to_plot, nrow = 12}) formatted = formatted:double() formatted:mul(255) formatted = formatted:byte() image.save(opt.model_path .. string.format('/sample-%03d.jpg', epoch), formatted) if epoch % opt.save_every == 0 then torch.save((opt.model_path .. string.format('/net-epoch-%d.t7', epoch)), {encoder = encoder, voxel_dec = voxel_dec, projector = projector}) torch.save((opt.model_path .. '/state.t7'), state) end end ================================================ FILE: scripts/train_rotatorRNN_base.lua ================================================ -- torch reimplementation of deepRotator: https://github.com/jimeiyang/deepRotator.git require 'torch' require 'nn' require 'cunn' --require 'cudnn' require 'nngraph' require 'optim' require 'image' model_utils = require 'utils.model_utils' optim_utils = require 'utils.adam_v2' opt = lapp[[ --save_every (default 40) --print_every (default 1) --data_root (default 'data') --data_id_path (default 'data/shapenetcore_ids') --data_view_path (default 'data/shapenetcore_viewdata') --dataset (default 'dataset_rotatorRNN_base') --gpu (default 0) --nz (default 512) --na (default 3) --nview (default 24) --nThreads (default 4) --niter (default 160) --display (default 1) --checkpoint_dir (default 'models/') --lambda (default 10) --kstep (default 1) --batch_size (default 32) --adam (default 1) --arch_name (default 'arch_rotatorRNN') --weight_decay (default 0.001) --exp_list (default 'singleclass') --load_size (default 64) ]] opt.ntrain = math.huge for k,v in pairs(opt) do opt[k] = tonumber(os.getenv(k)) or os.getenv(k) or opt[k] end print(opt) if opt.display == 0 then opt.display = false end if opt.gpu > 0 then ok, cunn = pcall(require, 'cunn') ok2, cutorch = pcall(require, 'cutorch') cutorch.setDevice(opt.gpu) end opt.manualSeed = torch.random(1, 10000) -- fix seed print("Random Seed: " .. opt.manualSeed) torch.manualSeed(opt.manualSeed) torch.setnumthreads(1) torch.setdefaulttensortype('torch.FloatTensor') -- create data loader local TrainLoader = require 'utils/data.lua' local ValLoader = require 'utils/data_val.lua' local data = TrainLoader.new(opt.nThreads, opt.dataset, opt) local data_val = ValLoader.new(opt.nThreads, opt.dataset, opt) print("dataset: " .. opt.dataset, "train size: ", data:size(), "val size: ", data_val:size()) ---------------------------------------------------------------- local function weights_init(m) local name = torch.type(m) if name:find('Convolution') and name:find('Spatial') then local nin = m.nInputPlane*m.kH*m.kW m.weight:uniform(-0.08, 0.08):mul(math.sqrt(1/nin)) m.bias:fill(0) elseif name:find('Convolution') and name:find('Volumetric') then local nin = m.nInputPlane*m.kT*m.kH*m.kW m.weight:uniform(-0.08, 0.08):mul(math.sqrt(1/nin)) m.bias:fill(0) elseif name:find('Linear') then local nin = m.weight:size(2) m.weight:uniform(-0.08, 0.08):mul(math.sqrt(1/nin)) m.bias:fill(0) elseif name:find('BatchNormalization') then if m.weight then m.weight:normal(1.0, 0.02) end if m.bias then m.bias:fill(0) end end end opt.model_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', opt.arch_name, opt.exp_list, opt.nview, opt.adam, opt.batch_size, opt.nz, opt.weight_decay, opt.lambda, opt.kstep) -- initialize parameters init_models = dofile('scripts/' .. opt.arch_name .. '.lua') encoder, actor, mixer, decoder_msk, decoder_im = init_models.create(opt) encoder:apply(weights_init) actor:apply(weights_init) mixer:apply(weights_init) decoder_msk:apply(weights_init) decoder_im:apply(weights_init) opt.model_path = opt.checkpoint_dir .. opt.model_name if not paths.dirp(opt.model_path) then paths.mkdir(opt.model_path) end prev_iter = 0 -- load model from previous iterations for i = opt.niter, 1, -opt.save_every do print(opt.model_path .. string.format('/net-epoch-%d.t7', i)) if paths.filep(opt.model_path .. string.format('/net-epoch-%d.t7', i)) then prev_iter = i loader = torch.load(opt.model_path .. string.format('/net-epoch-%d.t7', i)) state = torch.load(opt.model_path .. '/state.t7') print(string.format('resuming from epoch %d', i)) break end end -- build nngraph if prev_iter > 0 then encoder = loader.encoder actor = loader.actor mixer = loader.mixer decoder_msk = loader.decoder_msk decoder_im = loader.decoder_im end -- criterion local criterion_im = nn.MSECriterion() criterion_im.sizeAverage = false local criterion_msk = nn.MSECriterion() criterion_msk.sizeAverage = false -- hyperparams function getAdamParams(opt) config = {} if opt.adam == 1 then config.learningRate = 0.0001 config.epsilon = 1e-8 config.beta1 = 0.9 config.beta2 = 0.999 config.weightDecay = opt.weight_decay end return config end config = getAdamParams(opt) print(config) ------------------------------------------------- local batch_im_in = torch.Tensor(opt.batch_size, 3, opt.load_size, opt.load_size) local batch_rot = torch.Tensor(opt.batch_size, opt.na):zero() local batch_outputs = {} for k = 1, opt.kstep do batch_outputs[2*k-1] = torch.Tensor(opt.batch_size, 3, opt.load_size, opt.load_size) batch_outputs[2*k] = torch.Tensor(opt.batch_size, 1, opt.load_size, opt.load_size) end local preds = {} for k = 1, opt.kstep do preds[2*k-1] = torch.Tensor(opt.batch_size, 3, opt.load_size, opt.load_size) preds[2*k] = torch.Tensor(opt.batch_size, 1, opt.load_size, opt.load_size) end local errIM, errMSK local epoch_tm = torch.Timer() local tm = torch.Timer() local data_tm = torch.Timer() ------------------------------------------------ if opt.gpu > 0 then batch_im_in = batch_im_in:cuda() batch_rot = batch_rot:cuda() for k = 1, opt.kstep do batch_outputs[2*k-1] = batch_outputs[2*k-1]:cuda() batch_outputs[2*k] = batch_outputs[2*k]:cuda() end encoder:cuda() actor:cuda() mixer:cuda() decoder_msk:cuda() decoder_im:cuda() criterion_im:cuda() criterion_msk:cuda() end local inputs = {nn.Identity()(), nn.Identity()()} local h_enc_id, h_enc_rot = encoder(inputs[1]):split(2) local outputs = {} local h_dec_rot = actor({h_enc_rot, inputs[2]}) local h_mix = mixer({h_enc_id, h_dec_rot}) local h_dec_msk = decoder_msk(h_mix) local h_dec_im = decoder_im(h_mix) table.insert(outputs, h_dec_im) table.insert(outputs, h_dec_msk) rotatorRNN = nn.gModule(inputs, outputs) params, grads = rotatorRNN:getParameters() local opfunc = function(x) collectgarbage() if x ~= params then params:copy(x) end grads:zero() -- train data_tm:reset(); data_tm:resume() cur_im_in, cur_outputs, cur_rot, _ = data:getBatch() data_tm:stop() batch_im_in:copy(cur_im_in:mul(2):add(-1)) for k = 1, opt.kstep do batch_outputs[k*2-1]:copy(cur_outputs[k*2-1]:mul(2):add(-1)) batch_outputs[k*2]:copy(cur_outputs[k*2]) end batch_rot:copy(cur_rot) local f = rotatorRNN:forward({batch_im_in, batch_rot}) errIM = 0 errMSK = 0 local df_dw = {} for k = 1, opt.kstep do -- fast forward (actor, mixer, decoder) errIM = errIM + criterion_im:forward(f[2*k-1], batch_outputs[2*k-1]) / (8 * opt.batch_size) errMSK = errMSK + criterion_msk:forward(f[2*k], batch_outputs[2*k]) / (2 * opt.batch_size) local df_dIM = criterion_im:backward(f[2*k-1], batch_outputs[2*k-1]):mul(opt.lambda):div(8 * opt.batch_size) local df_dMSK = criterion_msk:backward(f[2*k], batch_outputs[2*k]):div(2 * opt.batch_size) df_dw[2*k-1] = df_dIM:clone() df_dw[2*k] = df_dMSK:clone() end rotatorRNN:backward({batch_im_in, batch_rot}, df_dw) local err = errIM * opt.lambda + errMSK return err, grads end ------------------------------------------------- local feedforward = function(x) collectgarbage() if x ~= params then params:copy(x) end grads:zero() -- val data_tm:reset(); data_tm:resume() cur_im_in, cur_outputs, cur_rot, _ = data_val:getBatch() data_tm:stop() batch_im_in:copy(cur_im_in:mul(2):add(-1)) for k = 1, opt.kstep do batch_outputs[k*2-1]:copy(cur_outputs[k*2-1]:mul(2):add(-1)) batch_outputs[k*2]:copy(cur_outputs[k*2]) end batch_rot:copy(cur_rot) local f = rotatorRNN:forward({batch_im_in, batch_rot}) errIM = 0 errMSK = 0 for k = 1, opt.kstep do errIM = errIM + criterion_im:forward(f[2*k-1], batch_outputs[2*k-1]) / (8 * opt.batch_size) errMSK = errMSK + criterion_msk:forward(f[2*k], batch_outputs[2*k]) / (2 * opt.batch_size) preds[2*k-1] = f[2*k-1]:float():clone() preds[2*k] = f[2*k]:float():clone() end local err = errIM * opt.lambda + errMSK return err end -------------------------------------------------- -- train & val for epoch = prev_iter + 1, opt.niter do epoch_tm:reset() local counter = 0 -- train rotatorRNN:training() for i = 1, math.min(data:size() * opt.nview / 2 , opt.ntrain), opt.batch_size do tm:reset() optim_utils.adam_v2(opfunc, params, config, state) counter = counter + 1 print(string.format('Epoch: [%d][%8d / %8d]\t Time: %.3f DataTime: %.3f ' .. ' Err_Im: %.4f , Err_Msk: %.4f', epoch, ((i-1) / opt.batch_size), math.floor(math.min(data:size() * opt.nview / 2, opt.ntrain) / opt.batch_size), tm:time().real, data_tm:time().real, errIM and errIM or -1, errMSK and errMSK or -1)) end -- val rotatorRNN:evaluate() for i = 1, opt.batch_size do tm:reset() local err = feedforward(params) end -- plot local to_plot = {} for i = 1, 32 do for k = 1, opt.kstep do local res = batch_im_in[i]:float():clone() res = torch.squeeze(res) res:add(1):mul(0.5) to_plot[#to_plot+1] = res:clone() local res = preds[2*k][i]:float() res = torch.squeeze(res) res = res:repeatTensor(3, 1, 1) res:mul(-1):add(1) to_plot[#to_plot+1] = res:clone() local res = preds[2*k-1][i]:float() res = torch.squeeze(res) res:add(1):mul(0.5) to_plot[#to_plot+1] = res:clone() local res = batch_outputs[2*k-1][i]:float():clone() res = torch.squeeze(res) res:add(1):mul(0.5) to_plot[#to_plot+1] = res:clone() end end local formatted = image.toDisplayTensor({input=to_plot, nrow = 16}) formatted = formatted:double() formatted:mul(255) formatted = formatted:byte() image.save(opt.model_path .. string.format('/sample-%03d.jpg', epoch), formatted) if epoch % opt.save_every == 0 then torch.save((opt.model_path .. string.format('/net-epoch-%d.t7', epoch)), {encoder = encoder, actor = actor, mixer = mixer, decoder_msk = decoder_msk, decoder_im = decoder_im}) torch.save((opt.model_path .. '/state.t7'), state) end end ================================================ FILE: scripts/train_rotatorRNN_curriculum.lua ================================================ -- torch reimplementation of deepRotator: https://github.com/jimeiyang/deepRotator.git require 'torch' require 'nn' require 'cunn' -- require 'cudnn' require 'nngraph' require 'optim' require 'image' model_utils = require 'utils.model_utils' optim_utils = require 'utils.adam_v2' opt = lapp[[ --save_every (default 20) --print_every (default 1) --data_root (default 'data') --data_id_path (default 'data/shapenetcore_ids') --data_view_path (default 'data/shapenetcore_viewdata') --dataset (default 'dataset_rotatorRNN_curriculum') --gpu (default 0) --use_cudnn (default 1) --nz (default 512) --na (default 3) --nview (default 24) --nThreads (default 4) --niter (default 40) --display (default 1) --checkpoint_dir (default 'models/') --lambda (default 10) --kstep (default 2) --batch_size (default 32) --adam (default 1) --arch_name (default 'arch_rotatorRNN') --weight_decay (default 0.001) --exp_list (default 'singleclass') --load_size (default 64) ]] opt.ntrain = math.huge for k,v in pairs(opt) do opt[k] = tonumber(os.getenv(k)) or os.getenv(k) or opt[k] end print(opt) if opt.display == 0 then opt.display = false end if opt.gpu > 0 then ok, cunn = pcall(require, 'cunn') ok2, cutorch = pcall(require, 'cutorch') cutorch.setDevice(opt.gpu) end opt.manualSeed = torch.random(1, 10000) -- fix seed print("Random Seed: " .. opt.manualSeed) torch.manualSeed(opt.manualSeed) torch.setnumthreads(1) torch.setdefaulttensortype('torch.FloatTensor') -- create data loader local TrainLoader = require('utils/data.lua') local ValLoader = require('utils/data_val.lua') local data = TrainLoader.new(opt.nThreads, opt.dataset, opt) local data_val = ValLoader.new(opt.nThreads, opt.dataset, opt) print("Dataset: " .. opt.dataset, "train_size: ", data:size(), "val_size: ", data_val:size()) ------------------------------------------------ if opt.exp_list == 'singleclass' then if opt.kstep == 2 then opt.basemodel_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', opt.arch_name, opt.exp_list, opt.nview, 1, 32, opt.nz, opt.weight_decay, opt.lambda, 1) opt.basemodel_epoch = 160 loader = torch.load(opt.checkpoint_dir .. opt.basemodel_name .. string.format('/net-epoch-%d.t7', opt.basemodel_epoch)) elseif opt.kstep == 4 then opt.basemodel_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', opt.arch_name, opt.exp_list, opt.nview, 1, 32, opt.nz, opt.weight_decay, opt.lambda, 2) opt.basemodel_epoch = 40 loader = torch.load(opt.checkpoint_dir .. opt.basemodel_name .. string.format('/net-epoch-%d.t7', opt.basemodel_epoch)) elseif opt.kstep == 8 then opt.basemodel_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', opt.arch_name, opt.exp_list, opt.nview, 2, 32, opt.nz, opt.weight_decay, opt.lambda, 4) opt.basemodel_epoch = 40 loader = torch.load(opt.checkpoint_dir .. opt.basemodel_name .. string.format('/net-epoch-%d.t7', opt.basemodel_epoch)) elseif opt.kstep == 12 then opt.basemodel_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', opt.arch_name, opt.exp_list, opt.nview, 2, 32, opt.nz, opt.weight_decay, opt.lambda, 8) opt.basemodel_epoch = 40 loader = torch.load(opt.checkpoint_dir .. opt.basemodel_name .. string.format('/net-epoch-%d.t7', opt.basemodel_epoch)) elseif opt.kstep == 16 then opt.basemodel_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', opt.arch_name, opt.exp_list, opt.nview, 2, 16, opt.nz, opt.weight_decay, opt.lambda, 12) opt.basemodel_epoch = 40 loader = torch.load(opt.checkpoint_dir .. opt.basemodel_name .. string.format('/net-epoch-%d.t7', opt.basemodel_epoch)) end --[[elseif opt.exp_list == 'multiclass' then if opt.kstep == 2 then opt.basemodel_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', opt.arch_name, opt.exp_list, opt.nview, 1, 32, opt.nz, opt.weight_decay, opt.lambda, 1) opt.basemodel_epoch = 160 loader = torch.load(opt.checkpoint_dir .. opt.basemodel_name .. string.format('/net-epoch-%d.t7', opt.basemodel_epoch)) elseif opt.kstep == 4 then opt.basemodel_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', opt.arch_name, opt.exp_list, opt.nview, 1, 32, opt.nz, opt.weight_decay, opt.lambda, 2) opt.basemodel_epoch = 40 loader = torch.load(opt.checkpoint_dir .. opt.basemodel_name .. string.format('/net-epoch-%d.t7', opt.basemodel_epoch)) elseif opt.kstep == 8 then opt.basemodel_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', opt.arch_name, opt.exp_list, opt.nview, 2, 8, opt.nz, opt.weight_decay, opt.lambda, 4) opt.basemodel_epoch = 40 loader = torch.load(opt.checkpoint_dir .. opt.basemodel_name .. string.format('/net-epoch-%d.t7', opt.basemodel_epoch)) elseif opt.kstep == 12 then opt.basemodel_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', opt.arch_name, opt.exp_list, opt.nview, 3, 4, opt.nz, opt.weight_decay, opt.lambda, 8) opt.basemodel_epoch = 40 loader = torch.load(opt.checkpoint_dir .. opt.basemodel_name .. string.format('/net-epoch-%d.t7', opt.basemodel_epoch)) elseif opt.kstep == 16 then opt.basemodel_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', opt.arch_name, opt.exp_list, opt.nview, 3, 3, opt.nz, opt.weight_decay, opt.lambda, 12) opt.basemodel_epoch = 40 loader = torch.load(opt.checkpoint_dir .. opt.basemodel_name .. string.format('/net-epoch-%d.t7', opt.basemodel_epoch)) end]] end opt.model_name = string.format('%s_%s_nv%d_adam%d_bs%d_nz%d_wd%g_lbg%g_ks%d', opt.arch_name, opt.exp_list, opt.nview, opt.adam, opt.batch_size, opt.nz, opt.weight_decay, opt.lambda, opt.kstep) opt.model_path = opt.checkpoint_dir .. opt.model_name if not paths.dirp(opt.model_path) then paths.mkdir(opt.model_path) end prev_iter = 0 -- load model from previous iterations for i = opt.niter, 1, -opt.save_every do print(opt.model_path .. string.format('/net-epoch-%d.t7', i)) if paths.filep(opt.model_path .. string.format('/net-epoch-%d.t7', i)) then prev_iter = i loader = torch.load(opt.model_path .. string.format('/net-epoch-%d.t7', i)) state = torch.load(opt.model_path .. '/state.t7') print(string.format('resuming from epoch %d', i)) break end end -- build nngraph encoder = loader.encoder actor = loader.actor mixer = loader.mixer decoder_msk = loader.decoder_msk decoder_im = loader.decoder_im -- criterion local criterion_im = nn.MSECriterion() criterion_im.sizeAverage = false local criterion_msk = nn.MSECriterion() criterion_msk.sizeAverage = false -- hyperparams function getAdamParams(opt) config = {} if opt.adam == 1 then config.learningRate = 0.0001 config.epsilon = 1e-8 config.beta1 = 0.9 config.beta2 = 0.999 config.weightDecay = opt.weight_decay elseif opt.adam == 2 then config.learningRate = 0.00001 config.epsilon = 1e-8 config.beta1 = 0.9 config.beta2 = 0.999 config.weightDecay = opt.weight_decay elseif opt.adam == 3 then config.learningRate = 0.000003 config.epsilon = 1e-8 config.beta1 = 0.9 config.beta2 = 0.999 config.weightDecay = opt.weight_decay end return config end config = getAdamParams(opt) print(config) -------------------------------------------------------- local batch_im_in = torch.Tensor(opt.batch_size, 3, opt.load_size, opt.load_size) local batch_rot = torch.Tensor(opt.batch_size, opt.na):zero() local batch_outputs = {} for k = 1, opt.kstep do batch_outputs[2*k-1] = torch.Tensor(opt.batch_size, 3, opt.load_size, opt.load_size) batch_outputs[2*k] = torch.Tensor(opt.batch_size, 1, opt.load_size, opt.load_size) end local preds = {} for k = 1, opt.kstep do preds[2*k-1] = torch.Tensor(opt.batch_size, 3, opt.load_size, opt.load_size) preds[2*k] = torch.Tensor(opt.batch_size, 1, opt.load_size, opt.load_size) end local errIM, errMSK local epoch_tm = torch.Timer() local tm = torch.Timer() local data_tm = torch.Timer() -------------------------------------------------------- if opt.gpu > 0 then batch_im_in = batch_im_in:cuda() batch_rot = batch_rot:cuda() for k = 1, opt.kstep do batch_outputs[k*2-1] = batch_outputs[k*2-1]:cuda() batch_outputs[k*2] = batch_outputs[k*2]:cuda() end encoder:cuda() actor:cuda() mixer:cuda() decoder_msk:cuda() decoder_im:cuda() criterion_im:cuda() criterion_msk:cuda() end params, grads = model_utils.combine_all_parameters(encoder, actor, mixer, decoder_msk, decoder_im) clone_actor = model_utils.clone_many_times(actor, opt.kstep) nelem = opt.batch_size * opt.kstep ------------------------------------------- local opfunc = function(x) collectgarbage() if x ~= params then params:copy(x) end grads:zero() -- train data_tm:reset(); data_tm:resume() cur_im_in, cur_outputs, cur_rot, _ = data:getBatch() data_tm:stop() batch_im_in:copy(cur_im_in:mul(2):add(-1)) for k = 1, opt.kstep do batch_outputs[k*2-1]:copy(cur_outputs[k*2-1]:mul(2):add(-1)) batch_outputs[k*2]:copy(cur_outputs[k*2]) end batch_rot:copy(cur_rot) local f_enc = encoder:forward(batch_im_in) errIM = 0 errMSK = 0 local df_enc_id = f_enc[1]:clone():zero() local df_enc_view = f_enc[2]:clone():zero() rnn_state = {f_enc[2]:clone()} drnn_state = {} for k = 1, opt.kstep do -- fast forward (actor, mixer, decoder) local f_act = clone_actor[k]:forward({rnn_state[k], batch_rot}) table.insert(rnn_state, f_act:clone()) local f_mix = mixer:forward({f_enc[1]:clone(), f_act}) local f_dec_im = decoder_im:forward(f_mix) local f_dec_msk = decoder_msk:forward(f_mix) errIM = errIM + criterion_im:forward(f_dec_im, batch_outputs[k*2-1]) / (8 * nelem) errMSK = errMSK + criterion_msk:forward(f_dec_msk, batch_outputs[k*2]) / (2 * nelem) local df_dIM = criterion_im:backward(f_dec_im, batch_outputs[k*2-1]):mul(opt.lambda):div(2 * nelem) local df_dMSK = criterion_msk:backward(f_dec_msk, batch_outputs[k*2]):div(2 * nelem) -- backward (decoder_mixer) local df_dec_im = decoder_im:backward(f_mix, df_dIM) local df_dec_msk = decoder_msk:backward(f_mix, df_dMSK) local df_dec = df_dec_im + df_dec_msk local df_mix = mixer:backward({f_enc[1]:clone(), f_act}, df_dec) df_enc_id = df_enc_id + df_mix[1]:clone() table.insert(drnn_state, df_mix[2]:clone()) end -- backward (actor) local sum_df_actor = drnn_state[opt.kstep]:clone():zero() for k = opt.kstep, 1, -1 do local tmp = clone_actor[k]:backward({rnn_state[k], batch_rot}, sum_df_actor+drnn_state[k]) sum_df_actor = tmp[1]:clone() end df_enc_view = df_enc_view + sum_df_actor local df_enc = encoder:backward(batch_im_in, {df_enc_id, df_enc_view}) local err = errIM * opt.lambda + errMSK return err, grads end ------------------------------------------- local feedforward = function(x) collectgarbage() if x ~= params then params:copy(x) end grads:zero() -- val data_tm:reset(); data_tm:resume() cur_im_in, cur_outputs, cur_rot, _ = data_val:getBatch() data_tm:stop() batch_im_in:copy(cur_im_in:mul(2):add(-1)) for k = 1, opt.kstep do batch_outputs[k*2-1]:copy(cur_outputs[k*2-1]:mul(2):add(-1)) batch_outputs[k*2]:copy(cur_outputs[k*2]) end batch_rot:copy(cur_rot) local f_enc = encoder:forward(batch_im_in) errIM = 0 errMSK = 0 rnn_state = {f_enc[2]:clone()} for k = 1, opt.kstep do -- fast forward (actor, mixer, decoder) local f_act = clone_actor[k]:forward({rnn_state[k], batch_rot}) table.insert(rnn_state, f_act:clone()) local f_mix = mixer:forward({f_enc[1]:clone(), f_act}) local f_dec_im = decoder_im:forward(f_mix) local f_dec_msk = decoder_msk:forward(f_mix) errIM = errIM + criterion_im:forward(f_dec_im, batch_outputs[k*2-1]) / (8 * nelem) errMSK = errMSK + criterion_msk:forward(f_dec_msk, batch_outputs[k*2]) / (2 * nelem) preds[k*2-1] = f_dec_im:float():clone() preds[k*2] = f_dec_msk:float():clone() end local err = errIM * opt.lambda + errMSK return err end ------------------------------------------- -- train & val for epoch = prev_iter + 1, opt.niter do epoch_tm:reset() local counter = 0 -- train encoder:training() mixer:training() decoder_msk:training() decoder_im:training() for k = 1, opt.kstep do clone_actor[k]:training() end for i = 1, math.min(data:size() / 5, opt.ntrain) do tm:reset() optim_utils.adam_v2(opfunc, params, config, state) counter = counter + 1 print(string.format('Epoch: [%d][%8d / %8d]\t Time: %.3f DataTime: %.3f ' .. ' Err_Im: %.4f, Err_Msk: %.4f', epoch, i-1, math.min(data:size() / 5, opt.ntrain), tm:time().real, data_tm:time().real, errIM and errIM or -1, errMSK and errMSK or -1)) end -- val encoder:evaluate() mixer:evaluate() decoder_msk:evaluate() decoder_im:evaluate() for k = 1, opt.kstep do clone_actor[k]:evaluate() end for i = 1, math.ceil(32/opt.kstep) do tm:reset() local err = feedforward(params) end -- plot local to_plot = {} for i = 1, math.ceil(32/opt.kstep) do for k = 1, opt.kstep do local res = batch_im_in[i]:float():clone() res = torch.squeeze(res) res:add(1):mul(0.5) to_plot[#to_plot+1] = res:clone() local res = preds[k*2][i]:float():clone() res = torch.squeeze(res) res = res:repeatTensor(3, 1, 1) res:mul(-1):add(1) to_plot[#to_plot+1] = res:clone() local res = preds[k*2-1][i]:float():clone() res = torch.squeeze(res) res:add(1):mul(0.5) to_plot[#to_plot+1] = res:clone() local res = batch_outputs[k*2-1][i]:float():clone() res = torch.squeeze(res) res:add(1):mul(0.5) to_plot[#to_plot+1] = res:clone() end end local formatted = image.toDisplayTensor({input=to_plot, nrow = 16}) formatted = formatted:double() formatted:mul(255) formatted = formatted:byte() image.save(opt.model_path .. string.format('/sample-%03d.jpg', epoch), formatted) if epoch % opt.save_every == 0 then torch.save((opt.model_path .. string.format('/net-epoch-%d.t7', epoch)), {encoder = encoder, actor = actor, mixer = mixer, decoder_msk = decoder_msk, decoder_im = decoder_im}) torch.save((opt.model_path .. '/state.t7'), state) end end ================================================ FILE: utils/adam_v2.lua ================================================ local optim2 = {} function optim2.adam_v2(opfunc, x, config, state) local config = config or {} local state = state or config local lr = config.learningRate or 0.001 local wd = config.weightDecay or 0.004 local beta1 = config.beta1 or 0.1 local beta2 = config.beta2 or 0.001 local epsilon = config.epsilon or 1e-8 local fx, dfdx = opfunc(x) if wd ~= 0 then dfdx:add(wd, x) end state.t = state.t or 0 state.m = state.m or x.new(dfdx:size()):zero() state.v = state.v or x.new(dfdx:size()):zero() state.denom = state.denom or x.new(dfdx:size()):zero() state.t = state.t + 1 --print(dfdx:size()) --print(state.m:size()) state.m:mul(beta1):add(1-beta1, dfdx) state.v:mul(beta2):addcmul(1-beta2, dfdx, dfdx) state.denom:copy(state.v):sqrt():add(epsilon) if state.t < 10000 then local biasCorrection1 = 1 - beta1^state.t local biasCorrection2 = 1 - beta2^state.t lr = lr * math.sqrt(biasCorrection2)/biasCorrection1 end --print('lr = %g', lr) x:addcdiv(-lr, state.m, state.denom) return x, {fx} end return optim2 ================================================ FILE: utils/data.lua ================================================ -- code adapted from https://github.com/soumith/dcgan.torch.git local Threads = require 'threads' Threads.serialization('threads.sharedserialize') local data = {} local result = {} local unpack = unpack and unpack or table.unpack function data.new(n, dataset_name, opt_) opt_ = opt_ or {} local self = {} for k,v in pairs(data) do self[k] = v end local donkey_file if dataset_name == 'dataset_rotatorRNN_base' then donkey_file = 'trainset_rotatorRNN_base.lua' elseif dataset_name == 'dataset_rotatorRNN_curriculum' then donkey_file = 'trainset_rotatorRNN_curriculum.lua' elseif dataset_name == 'dataset_ptn' then donkey_file = 'trainset_ptn.lua' end if n > 0 then local options = opt_ self.threads = Threads(n, function() require 'torch' end, function(idx) opt = options tid = idx local seed = (opt.manualSeed and opt.manualSeed or 0) + idx torch.manualSeed(seed) torch.setnumthreads(1) print(string.format('Starting donkey with id: %d seed %d', tid, seed)) assert(options, 'options not found') assert(opt, 'opts not given') paths.dofile(donkey_file) end ) else if donkey_file then paths.dofile(donkey_file) end self.threads = {} function self.threads:addjob(f1, f2) f2(f1()) end function self.threads:dojob() end function self.threads:synchronize() end end local nSamples = 0 self.threads:addjob(function() return dataLoader:size() end, function(c) nSamples = c end) self.threads:synchronize() self._size = nSamples for i = 1, n do self.threads:addjob(self._getFromThreads, self._pushResult) end return self end function data._getFromThreads() assert(opt.batch_size, 'opt.batch_size not found') return dataLoader:sample(opt.batch_size) end function data._pushResult(...) local res = {...} if res == nil then self.threads:synchronize() end result[1] = res end function data:getBatch() self.threads:addjob(self._getFromThreads, self._pushResult) self.threads:dojob() local res = result[1] result[1] = nil if torch.type(res) == 'table' then return unpack(res) end return res end function data:size() return self._size end return data ================================================ FILE: utils/data_test.lua ================================================ -- code adapted from https://github.com/soumith/dcgan.torch.git local Threads = require 'threads' Threads.serialization('threads.sharedserialize') local data = {} local result = {} local unpack = unpack and unpack or table.unpack function data.new(n, dataset_name, opt_) opt_ = opt_ or {} local self = {} for k,v in pairs(data) do self[k] = v end local donkey_file if dataset_name == 'dataset_rotatorRNN_base' then donkey_file = 'testset_rotatorRNN_base.lua' elseif dataset_name == 'dataset_rotatorRNN_curriculum' then donkey_file = 'testset_rotatorRNN_curriculum.lua' elseif dataset_name == 'dataset_ptn' then donkey_file = 'testset_ptn.lua' end if n > 0 then local options = opt_ self.threads = Threads(n, function() require 'torch' end, function(idx) opt = options tid = idx local seed = (opt.manualSeed and opt.manualSeed or 0) + idx torch.manualSeed(seed) torch.setnumthreads(1) print(string.format('Starting donkey with id: %d seed %d', tid, seed)) assert(options, 'options not found') assert(opt, 'opts not given') paths.dofile(donkey_file) end ) else if donkey_file then paths.dofile(donkey_file) end self.threads = {} function self.threads:addjob(f1, f2) f2(f1()) end function self.threads:dojob() end function self.threads:synchronize() end end local nSamples = 0 self.threads:addjob(function() return dataLoader:size() end, function(c) nSamples = c end) self.threads:synchronize() self._size = nSamples for i = 1, n do self.threads:addjob(self._getFromThreads, self._pushResult) end return self end function data._getFromThreads() assert(opt.batch_size, 'opt.batch_size not found') return dataLoader:sample(opt.batch_size) end function data._pushResult(...) local res = {...} if res == nil then self.threads:synchronize() end result[1] = res end function data:getBatch() self.threads:addjob(self._getFromThreads, self._pushResult) self.threads:dojob() local res = result[1] result[1] = nil if torch.type(res) == 'table' then return unpack(res) end return res end function data:size() return self._size end return data ================================================ FILE: utils/data_val.lua ================================================ -- code adapted from https://github.com/soumith/dcgan.torch.git local Threads = require 'threads' Threads.serialization('threads.sharedserialize') local data = {} local result = {} local unpack = unpack and unpack or table.unpack function data.new(n, dataset_name, opt_) opt_ = opt_ or {} local self = {} for k,v in pairs(data) do self[k] = v end local donkey_file if dataset_name == 'dataset_rotatorRNN_base' then donkey_file = 'valset_rotatorRNN_base.lua' elseif dataset_name == 'dataset_rotatorRNN_curriculum' then donkey_file = 'valset_rotatorRNN_curriculum.lua' elseif dataset_name == 'dataset_ptn' then donkey_file = 'valset_ptn.lua' end if n > 0 then local options = opt_ self.threads = Threads(n, function() require 'torch' end, function(idx) opt = options tid = idx local seed = (opt.manualSeed and opt.manualSeed or 0) + idx torch.manualSeed(seed) torch.setnumthreads(1) print(string.format('Starting donkey with id: %d seed %d', tid, seed)) assert(options, 'options not found') assert(opt, 'opts not given') paths.dofile(donkey_file) end ) else if donkey_file then paths.dofile(donkey_file) end self.threads = {} function self.threads:addjob(f1, f2) f2(f1()) end function self.threads:dojob() end function self.threads:synchronize() end end local nSamples = 0 self.threads:addjob(function() return dataLoader:size() end, function(c) nSamples = c end) self.threads:synchronize() self._size = nSamples for i = 1, n do self.threads:addjob(self._getFromThreads, self._pushResult) end return self end function data._getFromThreads() assert(opt.batch_size, 'opt.batch_size not found') return dataLoader:sample(opt.batch_size) end function data._pushResult(...) local res = {...} if res == nil then self.threads:synchronize() end result[1] = res end function data:getBatch() self.threads:addjob(self._getFromThreads, self._pushResult) self.threads:dojob() local res = result[1] result[1] = nil if torch.type(res) == 'table' then return unpack(res) end return res end function data:size() return self._size end return data ================================================ FILE: utils/dataset.lua ================================================ [[ Copyright (c) 2015-present, Facebook, Inc. All rights reserved. This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. An additional grant of patent rights can be found in the PATENTS file in the same directory. ]] require 'torch' torch.setdefaulttensortype('torch.FloatTensor') local ffi = require 'ffi' local class = require('pl.class') local dir = require 'pl.dir' local tablex = require 'pl.tablex' local argcheck = require 'argcheck' require 'sys' require 'xlua' require 'image' local dataset = torch.class('dataLoader') local initcheck = argcheck{ pack=true, help=[[ A dataset class for images in a flat folder structure (folder-name is class-name). Optimized for extremely large datasets (upwards of 14 million images). Tested only on Linux (as it uses command-line linux utilities to scale up) ]], {check=function(paths) local out = true; for k,v in ipairs(paths) do if type(v) ~= 'string' then print('paths can only be of string input'); out = false end end return out end, name="paths", type="table", help="Multiple paths of directories with images"}, {name="sampleSize", type="table", help="a consistent sample size to resize the images"}, {name="split", type="number", help="Percentage of split to go to Training" }, {name="samplingMode", type="string", help="Sampling mode: random | balanced ", default = "balanced"}, {name="verbose", type="boolean", help="Verbose mode during initialization", default = false}, {name="loadSize", type="table", help="a size to load the images to, initially", opt = true}, {name="forceClasses", type="table", help="If you want this loader to map certain classes to certain indices, " .. "pass a classes table that has {classname : classindex} pairs." .. " For example: {3 : 'dog', 5 : 'cat'}" .. "This function is very useful when you want two loaders to have the same " .. "class indices (trainLoader/testLoader for example)", opt = true}, {name="sampleHookTrain", type="function", help="applied to sample during training(ex: for lighting jitter). " .. "It takes the image path as input", opt = true}, {name="sampleHookTest", type="function", help="applied to sample during testing", opt = true}, } function dataset:__init(...) -- argcheck local args = initcheck(...) print(args) for k,v in pairs(args) do self[k] = v end if not self.loadSize then self.loadSize = self.sampleSize; end if not self.sampleHookTrain then self.sampleHookTrain = self.defaultSampleHook end if not self.sampleHookTest then self.sampleHookTest = self.defaultSampleHook end -- find class names self.classes = {} local classPaths = {} if self.forceClasses then for k,v in pairs(self.forceClasses) do self.classes[k] = v classPaths[k] = {} end end local function tableFind(t, o) for k,v in pairs(t) do if v == o then return k end end end -- loop over each paths folder, get list of unique class names, -- also store the directory paths per class -- for each class, for k,path in ipairs(self.paths) do local dirs = dir.getdirectories(path); for k,dirpath in ipairs(dirs) do local class = paths.basename(dirpath) local idx = tableFind(self.classes, class) if not idx then table.insert(self.classes, class) idx = #self.classes classPaths[idx] = {} end if not tableFind(classPaths[idx], dirpath) then table.insert(classPaths[idx], dirpath); end end end self.classIndices = {} for k,v in ipairs(self.classes) do self.classIndices[v] = k end -- define command-line tools, try your best to maintain OSX compatibility local wc = 'wc' local cut = 'cut' local find = 'find' if jit.os == 'OSX' then wc = 'gwc' cut = 'gcut' find = 'gfind' end ---------------------------------------------------------------------- -- Options for the GNU find command local extensionList = {'jpg', 'png','JPG','PNG','JPEG', 'ppm', 'PPM', 'bmp', 'BMP'} local findOptions = ' -iname "*.' .. extensionList[1] .. '"' for i=2,#extensionList do findOptions = findOptions .. ' -o -iname "*.' .. extensionList[i] .. '"' end -- find the image path names self.imagePath = torch.CharTensor() -- path to each image in dataset self.imageClass = torch.LongTensor() -- class index of each image (class index in self.classes) self.classList = {} -- index of imageList to each image of a particular class self.classListSample = self.classList -- the main list used when sampling data print('running "find" on each class directory, and concatenate all' .. ' those filenames into a single file containing all image paths for a given class') -- so, generates one file per class local classFindFiles = {} for i=1,#self.classes do classFindFiles[i] = os.tmpname() end local combinedFindList = os.tmpname(); local tmpfile = os.tmpname() local tmphandle = assert(io.open(tmpfile, 'w')) -- iterate over classes for i, class in ipairs(self.classes) do -- iterate over classPaths for j,path in ipairs(classPaths[i]) do local command = find .. ' "' .. path .. '" ' .. findOptions .. ' >>"' .. classFindFiles[i] .. '" \n' tmphandle:write(command) end end io.close(tmphandle) os.execute('bash ' .. tmpfile) os.execute('rm -f ' .. tmpfile) print('now combine all the files to a single large file') local tmpfile = os.tmpname() local tmphandle = assert(io.open(tmpfile, 'w')) -- concat all finds to a single large file in the order of self.classes for i=1,#self.classes do local command = 'cat "' .. classFindFiles[i] .. '" >>' .. combinedFindList .. ' \n' tmphandle:write(command) end io.close(tmphandle) os.execute('bash ' .. tmpfile) os.execute('rm -f ' .. tmpfile) --========================================================================== print('load the large concatenated list of sample paths to self.imagePath') local maxPathLength = tonumber(sys.fexecute(wc .. " -L '" .. combinedFindList .. "' |" .. cut .. " -f1 -d' '")) + 1 local length = tonumber(sys.fexecute(wc .. " -l '" .. combinedFindList .. "' |" .. cut .. " -f1 -d' '")) assert(length > 0, "Could not find any image file in the given input paths") assert(maxPathLength > 0, "paths of files are length 0?") self.imagePath:resize(length, maxPathLength):fill(0) local s_data = self.imagePath:data() local count = 0 for line in io.lines(combinedFindList) do ffi.copy(s_data, line) s_data = s_data + maxPathLength if self.verbose and count % 10000 == 0 then xlua.progress(count, length) end; count = count + 1 end self.numSamples = self.imagePath:size(1) if self.verbose then print(self.numSamples .. ' samples found.') end --========================================================================== print('Updating classList and imageClass appropriately') self.imageClass:resize(self.numSamples) local runningIndex = 0 for i=1,#self.classes do if self.verbose then xlua.progress(i, #(self.classes)) end local length = tonumber(sys.fexecute(wc .. " -l '" .. classFindFiles[i] .. "' |" .. cut .. " -f1 -d' '")) if length == 0 then error('Class has zero samples') else self.classList[i] = torch.linspace(runningIndex + 1, runningIndex + length, length):long() self.imageClass[{{runningIndex + 1, runningIndex + length}}]:fill(i) end runningIndex = runningIndex + length end --========================================================================== -- clean up temporary files print('Cleaning up temporary files') local tmpfilelistall = '' for i=1,#(classFindFiles) do tmpfilelistall = tmpfilelistall .. ' "' .. classFindFiles[i] .. '"' if i % 1000 == 0 then os.execute('rm -f ' .. tmpfilelistall) tmpfilelistall = '' end end os.execute('rm -f ' .. tmpfilelistall) os.execute('rm -f "' .. combinedFindList .. '"') --========================================================================== if self.split == 100 then self.testIndicesSize = 0 else print('Splitting training and test sets to a ratio of ' .. self.split .. '/' .. (100-self.split)) self.classListTrain = {} self.classListTest = {} self.classListSample = self.classListTrain local totalTestSamples = 0 -- split the classList into classListTrain and classListTest for i=1,#self.classes do local list = self.classList[i] local count = self.classList[i]:size(1) local splitidx = math.floor((count * self.split / 100) + 0.5) -- +round local perm = torch.randperm(count) self.classListTrain[i] = torch.LongTensor(splitidx) for j=1,splitidx do self.classListTrain[i][j] = list[perm[j]] end if splitidx == count then -- all samples were allocated to train set self.classListTest[i] = torch.LongTensor() else self.classListTest[i] = torch.LongTensor(count-splitidx) totalTestSamples = totalTestSamples + self.classListTest[i]:size(1) local idx = 1 for j=splitidx+1,count do self.classListTest[i][idx] = list[perm[j]] idx = idx + 1 end end end -- Now combine classListTest into a single tensor self.testIndices = torch.LongTensor(totalTestSamples) self.testIndicesSize = totalTestSamples local tdata = self.testIndices:data() local tidx = 0 for i=1,#self.classes do local list = self.classListTest[i] if list:dim() ~= 0 then local ldata = list:data() for j=0,list:size(1)-1 do tdata[tidx] = ldata[j] tidx = tidx + 1 end end end end end -- size(), size(class) function dataset:size(class, list) list = list or self.classList if not class then return self.numSamples elseif type(class) == 'string' then return list[self.classIndices[class]]:size(1) elseif type(class) == 'number' then return list[class]:size(1) end end -- getByClass function dataset:getByClass(class) local index = math.ceil(torch.uniform() * self.classListSample[class]:nElement()) local imgpath = ffi.string(torch.data(self.imagePath[self.classListSample[class][index]])) return self:sampleHookTrain(imgpath) end -- converts a table of samples (and corresponding labels) to a clean tensor local function tableToOutput(self, dataTable, scalarTable) local data, scalarLabels, labels local quantity = #scalarTable assert(dataTable[1]:dim() == 3) data = torch.Tensor(quantity, self.sampleSize[1], self.sampleSize[2], self.sampleSize[3]) scalarLabels = torch.LongTensor(quantity):fill(-1111) for i=1,#dataTable do data[i]:copy(dataTable[i]) scalarLabels[i] = scalarTable[i] end return data, scalarLabels end -- sampler, samples from the training set. function dataset:sample(quantity) assert(quantity) local dataTable = {} local scalarTable = {} for i=1,quantity do local class = torch.random(1, #self.classes) local out = self:getByClass(class) table.insert(dataTable, out) table.insert(scalarTable, class) end local data, scalarLabels = tableToOutput(self, dataTable, scalarTable) return data, scalarLabels end function dataset:get(i1, i2) local indices = torch.range(i1, i2); local quantity = i2 - i1 + 1; assert(quantity > 0) -- now that indices has been initialized, get the samples local dataTable = {} local scalarTable = {} for i=1,quantity do -- load the sample local imgpath = ffi.string(torch.data(self.imagePath[indices[i]])) local out = self:sampleHookTest(imgpath) table.insert(dataTable, out) table.insert(scalarTable, self.imageClass[indices[i]]) end local data, scalarLabels = tableToOutput(self, dataTable, scalarTable) return data, scalarLabels end return dataset ================================================ FILE: utils/model_utils.lua ================================================ -- adapted from https://github.com/wojciechz/learning_to_execute -- utilities for combining/flattening parameters in a model -- the code in this script is more general than it needs to be, which is -- why it is kind of a large require 'torch' local model_utils = {} function model_utils.combine_all_parameters(...) --[[ like module:getParameters, but operates on many modules ]]-- -- get parameters local networks = {...} local parameters = {} local gradParameters = {} for i = 1, #networks do local net_params, net_grads = networks[i]:parameters() if net_params then for _, p in pairs(net_params) do parameters[#parameters + 1] = p end for _, g in pairs(net_grads) do gradParameters[#gradParameters + 1] = g end end end local function storageInSet(set, storage) local storageAndOffset = set[torch.pointer(storage)] if storageAndOffset == nil then return nil end local _, offset = unpack(storageAndOffset) return offset end -- this function flattens arbitrary lists of parameters, -- even complex shared ones local function flatten(parameters) if not parameters or #parameters == 0 then return torch.Tensor() end local Tensor = parameters[1].new local storages = {} local nParameters = 0 for k = 1,#parameters do local storage = parameters[k]:storage() if not storageInSet(storages, storage) then storages[torch.pointer(storage)] = {storage, nParameters} nParameters = nParameters + storage:size() end end local flatParameters = Tensor(nParameters):fill(1) local flatStorage = flatParameters:storage() for k = 1,#parameters do local storageOffset = storageInSet(storages, parameters[k]:storage()) parameters[k]:set(flatStorage, storageOffset + parameters[k]:storageOffset(), parameters[k]:size(), parameters[k]:stride()) parameters[k]:zero() end local maskParameters= flatParameters:float():clone() local cumSumOfHoles = flatParameters:float():cumsum(1) local nUsedParameters = nParameters - cumSumOfHoles[#cumSumOfHoles] local flatUsedParameters = Tensor(nUsedParameters) local flatUsedStorage = flatUsedParameters:storage() for k = 1,#parameters do local offset = cumSumOfHoles[parameters[k]:storageOffset()] parameters[k]:set(flatUsedStorage, parameters[k]:storageOffset() - offset, parameters[k]:size(), parameters[k]:stride()) end for _, storageAndOffset in pairs(storages) do local k, v = unpack(storageAndOffset) flatParameters[{{v+1,v+k:size()}}]:copy(Tensor():set(k)) end if cumSumOfHoles:sum() == 0 then flatUsedParameters:copy(flatParameters) else local counter = 0 for k = 1,flatParameters:nElement() do if maskParameters[k] == 0 then counter = counter + 1 flatUsedParameters[counter] = flatParameters[counter+cumSumOfHoles[k]] end end assert (counter == nUsedParameters) end return flatUsedParameters end -- flatten parameters and gradients local flatParameters = flatten(parameters) local flatGradParameters = flatten(gradParameters) -- return new flat vector that contains all discrete parameters return flatParameters, flatGradParameters end function model_utils.clone_many_times(net, T) local clones = {} local params, gradParams if net.parameters then params, gradParams = net:parameters() if params == nil then params = {} end end local paramsNoGrad if net.parametersNoGrad then paramsNoGrad = net:parametersNoGrad() end local mem = torch.MemoryFile("w"):binary() mem:writeObject(net) for t = 1, T do -- We need to use a new reader for each clone. -- We don't want to use the pointers to already read objects. local reader = torch.MemoryFile(mem:storage(), "r"):binary() local clone = reader:readObject() reader:close() if net.parameters then local cloneParams, cloneGradParams = clone:parameters() local cloneParamsNoGrad for i = 1, #params do cloneParams[i]:set(params[i]) cloneGradParams[i]:set(gradParams[i]) end if paramsNoGrad then cloneParamsNoGrad = clone:parametersNoGrad() for i =1,#paramsNoGrad do cloneParamsNoGrad[i]:set(paramsNoGrad[i]) end end end clones[t] = clone collectgarbage() end mem:close() return clones end return model_utils ================================================ FILE: utils/testset_ptn.lua ================================================ -- code adapted from https://github.com/soumith/dcgan.torch.git require 'image' require 'mattorch' dir = require 'pl.dir' dataLoader = {} dataLoader.counter = 0 local categories = {} local files = {} local size = 0 for cat in io.lines('exp_' .. opt.exp_list .. '.txt') do print(cat) categories[#categories + 1] = cat local dirpath = opt.data_root .. '/' .. cat local list = opt.data_id_path .. '/' .. cat .. '_testids.txt' cls_files = {} for line in io.lines(list) do cls_files[#cls_files + 1] = line size = size + 1 end files[#files + 1] = cls_files end ------------------------------------ local load_size = {3, opt.load_size} local function loadImage(path) local input = image.load(path, 3, 'float') input = image.scale(input, load_size[2], load_size[2]) return input end ---------------------------------------------------- function dataLoader:sample(quantity) local class_idx_batch = torch.Tensor(quantity) for n = 1, quantity do class_idx_batch[n] = torch.randperm(#categories)[1] end local batch_ims = {} for n = 1, quantity do batch_ims[n] = torch.Tensor(opt.nview, 3, load_size[2], load_size[2]) end local batch_vox = torch.Tensor(quantity, 1, opt.vox_size, opt.vox_size, opt.vox_size) for n = 1, quantity do local cls_files cls_files = files[class_idx_batch[n]] local file_idx = self.counter + n local obj_list = opt.data_view_path .. '/' .. cls_files[file_idx] for k = 1, opt.nview do local img_in = loadImage(string.format('%s/imgs/a%03d_e030.jpg', obj_list, k*(360/opt.nview))) batch_ims[n][k]:copy(img_in) end local vox_path = opt.data_vox_path .. '/' .. cls_files[file_idx] local vox_loader = mattorch.load(string.format('%s/model_%d.mat', vox_path, opt.vox_size)) local vox_instance = vox_loader.voxel batch_vox[n]:copy(vox_instance) end self.counter = self.counter + quantity collectgarbage() return batch_ims, batch_vox, class_idx_batch end function dataLoader:size() return size end ================================================ FILE: utils/trainset_ptn.lua ================================================ -- code adapted from https://github.com/soumith/dcgan.torch.git require 'image' require 'mattorch' dir = require 'pl.dir' dataLoader = {} local categories = {} local files = {} local size = 0 for cat in io.lines('exp_' .. opt.exp_list .. '.txt') do print(cat) categories[#categories + 1] = cat local dirpath = opt.data_root .. '/' .. cat local list = opt.data_id_path .. '/' .. cat .. '_trainids.txt' cls_files = {} for line in io.lines(list) do cls_files[#cls_files + 1] = line size = size + 1 end files[#files + 1] = cls_files end ------------------------------------ local load_size = {3, opt.load_size} local function loadImage(path) local input = image.load(path, 3, 'float') input = image.scale(input, load_size[2], load_size[2]) return input end ---------------------------------------------------- function dataLoader:sample(quantity) local class_idx_batch = torch.Tensor(quantity) for n = 1, quantity do class_idx_batch[n] = torch.randperm(#categories)[1] end local batch_ims = {} for n = 1, quantity do batch_ims[n] = torch.Tensor(opt.nview, 3, load_size[2], load_size[2]) end local batch_vox = torch.Tensor(quantity, 1, opt.vox_size, opt.vox_size, opt.vox_size) for n = 1, quantity do local cls_files cls_files = files[class_idx_batch[n]] local file_idx = torch.randperm(#cls_files)[1] local obj_list = opt.data_view_path .. '/' .. cls_files[file_idx] for k = 1, opt.nview do local img_in = loadImage(string.format('%s/imgs/a%03d_e030.jpg', obj_list, k*(360/opt.nview))) batch_ims[n][k]:copy(img_in) end local vox_path = opt.data_vox_path .. '/' .. cls_files[file_idx] local vox_loader = mattorch.load(string.format('%s/model_%d.mat', vox_path, opt.vox_size)) local vox_instance = vox_loader.voxel batch_vox[n]:copy(vox_instance) end collectgarbage() return batch_ims, batch_vox, class_idx_batch end function dataLoader:size() return size end ================================================ FILE: utils/trainset_rotatorRNN_base.lua ================================================ -- code adapted from https://github.com/soumith/dcgan.torch.git require 'image' dir = require 'pl.dir' dataLoader = {} local categories = {} local files = {} local size = 0 for cat in io.lines('exp_' .. opt.exp_list .. '.txt') do print(cat) categories[#categories + 1] = cat local dirpath = opt.data_root .. '/' .. cat local list = opt.data_id_path .. '/' .. cat .. '_trainids.txt' cls_files = {} for line in io.lines(list) do cls_files[#cls_files + 1] = line size = size + 1 end files[#files + 1] = cls_files end -------------------------- local load_size = {3, opt.load_size} local function loadImage(path) local input = image.load(path, 3, 'float') input = image.scale(input, load_size[2], load_size[2]) return input end function dataLoader:sample(quantity) local class_idx_batch = torch.Tensor(quantity) for n = 1, quantity do class_idx_batch[n] = torch.randperm(#categories)[1] end local batch_im_in = torch.Tensor(quantity, 3, load_size[2], load_size[2]) local batch_rot = torch.Tensor(quantity, opt.na):zero() local batch_outputs = {} batch_outputs[1] = torch.Tensor(quantity, 3, load_size[2], load_size[2]) batch_outputs[2] = torch.Tensor(quantity, 1, load_size[2], load_size[2]) for n = 1, quantity do local cls_files = files[class_idx_batch[n]] local file_idx = torch.randperm(#cls_files)[1] local obj_list = opt.data_view_path .. '/' .. cls_files[file_idx] local view_in = torch.random(opt.nview) local rng_rot = math.random(3) local delta if rng_rot == 1 then delta = -1 batch_rot[n][3] = 1 elseif rng_rot == 2 then delta = 1 batch_rot[n][1] = 1 elseif rng_rot == 3 then delta = 0 batch_rot[n][2] = 1 end local view_out = view_in + delta if view_out > opt.nview then view_out = 1 end if view_out < 1 then view_out = opt.nview end local img_in = loadImage(string.format('%s/imgs/a%03d_e030.jpg', obj_list, view_in*(360/opt.nview))) local img_out = loadImage(string.format('%s/imgs/a%03d_e030.jpg', obj_list, view_out*(360/opt.nview))) local msk_out = loadImage(string.format('%s/masks/a%03d_e030.jpg', obj_list, view_out*(360/opt.nview))) batch_im_in[n]:copy(img_in) batch_outputs[1][n]:copy(img_out) batch_outputs[2][n]:copy(msk_out[1]) end collectgarbage() return batch_im_in, batch_outputs, batch_rot, class_idx_batch end function dataLoader:size() return size end ================================================ FILE: utils/trainset_rotatorRNN_curriculum.lua ================================================ -- code adapted from https://github.com/soumith/dcgan.torch.git require 'image' dir = require 'pl.dir' dataLoader = {} local categories = {} local files = {} local size = 0 for cat in io.lines('exp_' .. opt.exp_list .. '.txt') do print(cat) categories[#categories + 1] = cat local dirpath = opt.data_root .. '/' .. cat local list = opt.data_id_path .. '/' .. cat .. '_trainids.txt' cls_files = {} for line in io.lines(list) do cls_files[#cls_files + 1] = line size = size + 1 end files[#files + 1] = cls_files end ----------------------------------- local load_size = {3, opt.load_size} local function loadImage(path) local input = image.load(path, 3, 'float') input = image.scale(input, load_size[2], load_size[2]) return input end function dataLoader:sample(quantity) local class_idx_batch = torch.Tensor(quantity) for n = 1, quantity do class_idx_batch[n] = torch.randperm(#categories)[1] end local batch_im_in = torch.Tensor(quantity, 3, load_size[2], load_size[2]) local batch_rot = torch.Tensor(quantity, opt.na):zero() local batch_outputs = {} for k = 1, opt.kstep do batch_outputs[k*2-1] = torch.Tensor(quantity, 3, load_size[2], load_size[2]) batch_outputs[k*2] = torch.Tensor(quantity, 1, load_size[2], load_size[2]) end for n = 1, quantity do local cls_files = files[class_idx_batch[n]] local file_idx = torch.randperm(#cls_files)[1] local obj_list = opt.data_view_path .. '/' .. cls_files[file_idx] local view_in = torch.random(opt.nview) local rng_rot = math.random(2) local delta if rng_rot == 1 then delta = -1 batch_rot[n][3] = 1 elseif rng_rot == 2 then delta = 1 batch_rot[n][1] = 1 end local img_in = loadImage(string.format('%s/imgs/a%03d_e030.jpg', obj_list, view_in*(360/opt.nview))) batch_im_in[n]:copy(img_in) local view_out = view_in for k = 1, opt.kstep do view_out = view_out + delta if view_out > opt.nview then view_out = 1 end if view_out < 1 then view_out = opt.nview end local img_out = loadImage(string.format('%s/imgs/a%03d_e030.jpg', obj_list, view_out*(360/opt.nview))) local msk_out = loadImage(string.format('%s/masks/a%03d_e030.jpg', obj_list, view_out*(360/opt.nview))) batch_outputs[k*2-1][n]:copy(img_out) batch_outputs[k*2][n]:copy(msk_out[1]) end end collectgarbage() return batch_im_in, batch_outputs, batch_rot, class_idx_batch end function dataLoader:size() return size end ================================================ FILE: utils/valset_ptn.lua ================================================ -- code adapted from https://github.com/soumith/dcgan.torch.git require 'image' require 'mattorch' dir = require 'pl.dir' dataLoader = {} local categories = {} local files = {} local size = 0 for cat in io.lines('exp_' .. opt.exp_list .. '.txt') do print(cat) categories[#categories + 1] = cat local dirpath = opt.data_root .. '/' .. cat local list = opt.data_id_path .. '/' .. cat .. '_valids.txt' cls_files = {} for line in io.lines(list) do cls_files[#cls_files + 1] = line size = size + 1 end files[#files + 1] = cls_files end ------------------------------------ local load_size = {3, opt.load_size} local function loadImage(path) local input = image.load(path, 3, 'float') input = image.scale(input, load_size[2], load_size[2]) return input end ---------------------------------------------------- function dataLoader:sample(quantity) local class_idx_batch = torch.Tensor(quantity) for n = 1, quantity do class_idx_batch[n] = torch.randperm(#categories)[1] end local batch_ims = {} for n = 1, quantity do batch_ims[n] = torch.Tensor(opt.nview, 3, load_size[2], load_size[2]) end local batch_vox = torch.Tensor(quantity, 1, opt.vox_size, opt.vox_size, opt.vox_size) for n = 1, quantity do local cls_files cls_files = files[class_idx_batch[n]] local file_idx = torch.randperm(#cls_files)[1] local obj_list = opt.data_view_path .. '/' .. cls_files[file_idx] for k = 1, opt.nview do local img_in = loadImage(string.format('%s/imgs/a%03d_e030.jpg', obj_list, k*(360/opt.nview))) batch_ims[n][k]:copy(img_in) end local vox_path = opt.data_vox_path .. '/' .. cls_files[file_idx] local vox_loader = mattorch.load(string.format('%s/model_%d.mat', vox_path, opt.vox_size)) local vox_instance = vox_loader.voxel batch_vox[n]:copy(vox_instance) end collectgarbage() return batch_ims, batch_vox, class_idx_batch end function dataLoader:size() return size end ================================================ FILE: utils/valset_rotatorRNN_base.lua ================================================ -- code adapted from https://github.com/soumith/dcgan.torch.git require 'image' dir = require 'pl.dir' dataLoader = {} local categories = {} local files = {} local size = 0 for cat in io.lines('exp_' .. opt.exp_list .. '.txt') do print(cat) categories[#categories + 1] = cat local dirpath = opt.data_root .. '/' .. cat local list = opt.data_id_path .. '/' .. cat .. '_valids.txt' cls_files = {} for line in io.lines(list) do cls_files[#cls_files + 1] = line size = size + 1 end files[#files + 1] = cls_files end -------------------------- local load_size = {3, opt.load_size} local function loadImage(path) local input = image.load(path, 3, 'float') input = image.scale(input, load_size[2], load_size[2]) return input end function dataLoader:sample(quantity) local class_idx_batch = torch.Tensor(quantity) for n = 1, quantity do class_idx_batch[n] = torch.randperm(#categories)[1] end local batch_im_in = torch.Tensor(quantity, 3, load_size[2], load_size[2]) local batch_rot = torch.Tensor(quantity, opt.na):zero() local batch_outputs = {} batch_outputs[1] = torch.Tensor(quantity, 3, load_size[2], load_size[2]) batch_outputs[2] = torch.Tensor(quantity, 1, load_size[2], load_size[2]) for n = 1, quantity do local cls_files = files[class_idx_batch[n]] local file_idx = torch.randperm(#cls_files)[1] local obj_list = opt.data_view_path .. '/' .. cls_files[file_idx] local view_in = torch.random(opt.nview) local rng_rot = math.random(3) local delta if rng_rot == 1 then delta = -1 batch_rot[n][3] = 1 elseif rng_rot == 2 then delta = 1 batch_rot[n][1] = 1 elseif rng_rot == 3 then delta = 0 batch_rot[n][2] = 1 end local view_out = view_in + delta if view_out > opt.nview then view_out = 1 end if view_out < 1 then view_out = opt.nview end local img_in = loadImage(string.format('%s/imgs/a%03d_e030.jpg', obj_list, view_in*(360/opt.nview))) local img_out = loadImage(string.format('%s/imgs/a%03d_e030.jpg', obj_list, view_out*(360/opt.nview))) local msk_out = loadImage(string.format('%s/masks/a%03d_e030.jpg', obj_list, view_out*(360/opt.nview))) batch_im_in[n]:copy(img_in) batch_outputs[1][n]:copy(img_out) batch_outputs[2][n]:copy(msk_out[1]) end collectgarbage() return batch_im_in, batch_outputs, batch_rot, class_idx_batch end function dataLoader:size() return size end ================================================ FILE: utils/valset_rotatorRNN_curriculum.lua ================================================ -- code adapted from https://github.com/soumith/dcgan.torch.git require 'image' dir = require 'pl.dir' dataLoader = {} local categories = {} local files = {} local size = 0 for cat in io.lines('exp_' .. opt.exp_list .. '.txt') do print(cat) categories[#categories + 1] = cat local dirpath = opt.data_root .. '/' .. cat local list = opt.data_id_path .. '/' .. cat .. '_valids.txt' cls_files = {} for line in io.lines(list) do cls_files[#cls_files + 1] = line size = size + 1 end files[#files + 1] = cls_files end ----------------------------------- local load_size = {3, opt.load_size} local function loadImage(path) local input = image.load(path, 3, 'float') input = image.scale(input, load_size[2], load_size[2]) return input end function dataLoader:sample(quantity) local class_idx_batch = torch.Tensor(quantity) for n = 1, quantity do class_idx_batch[n] = torch.randperm(#categories)[1] end local batch_im_in = torch.Tensor(quantity, 3, load_size[2], load_size[2]) local batch_rot = torch.Tensor(quantity, opt.na):zero() local batch_outputs = {} for k = 1, opt.kstep do batch_outputs[k*2-1] = torch.Tensor(quantity, 3, load_size[2], load_size[2]) batch_outputs[k*2] = torch.Tensor(quantity, 1, load_size[2], load_size[2]) end for n = 1, quantity do local cls_files = files[class_idx_batch[n]] local file_idx = torch.randperm(#cls_files)[1] local obj_list = opt.data_view_path .. '/' .. cls_files[file_idx] local view_in = torch.random(opt.nview) local rng_rot = math.random(2) local delta if rng_rot == 1 then delta = -1 batch_rot[n][3] = 1 elseif rng_rot == 2 then delta = 1 batch_rot[n][1] = 1 end local img_in = loadImage(string.format('%s/imgs/a%03d_e030.jpg', obj_list, view_in*(360/opt.nview))) batch_im_in[n]:copy(img_in) local view_out = view_in for k = 1, opt.kstep do view_out = view_out + delta if view_out > opt.nview then view_out = 1 end if view_out < 1 then view_out = opt.nview end local img_out = loadImage(string.format('%s/imgs/a%03d_e030.jpg', obj_list, view_out*(360/opt.nview))) local msk_out = loadImage(string.format('%s/masks/a%03d_e030.jpg', obj_list, view_out*(360/opt.nview))) batch_outputs[k*2-1][n]:copy(img_out) batch_outputs[k*2][n]:copy(msk_out[1]) end end collectgarbage() return batch_im_in, batch_outputs, batch_rot, class_idx_batch end function dataLoader:size() return size end