Repository: phillipi/pix2pix Branch: master Commit: 89ff2a81ce44 Files: 27 Total size: 112.5 KB Directory structure: gitextract_rix8cnv2/ ├── .gitignore ├── LICENSE ├── README.md ├── data/ │ ├── data.lua │ ├── dataset.lua │ └── donkey_folder.lua ├── datasets/ │ ├── bibtex/ │ │ ├── cityscapes.tex │ │ ├── facades.tex │ │ ├── handbags.tex │ │ ├── shoes.tex │ │ └── transattr.tex │ └── download_dataset.sh ├── models/ │ └── download_model.sh ├── models.lua ├── scripts/ │ ├── combine_A_and_B.py │ ├── edges/ │ │ ├── PostprocessHED.m │ │ └── batch_hed.py │ ├── eval_cityscapes/ │ │ ├── caffemodel/ │ │ │ └── deploy.prototxt │ │ ├── cityscapes.py │ │ ├── download_fcn8s.sh │ │ ├── evaluate.py │ │ └── util.py │ └── receptive_field_sizes.m ├── test.lua ├── train.lua └── util/ ├── cudnn_convert_custom.lua └── util.lua ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # *~ *.DS_Store cache/ results/ checkpoints/ # luarocks build files *.src.rock *.zip *.tar.gz *.t7 # Object files *.o *.os *.ko *.obj *.elf # Precompiled Headers *.gch *.pch # Libraries *.lib *.a *.la *.lo *.def *.exp # Shared objects (inc. Windows DLLs) *.dll *.so *.so.* *.dylib # Executables *.exe *.out *.app *.i*86 *.x86_64 *.hex ================================================ FILE: LICENSE ================================================ Copyright (c) 2016, Phillip Isola and Jun-Yan Zhu All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----------------------------- LICENSE FOR DCGAN -------------------------------- BSD License For dcgan.torch software Copyright (c) 2015, Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name Facebook nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ # pix2pix [Project](https://phillipi.github.io/pix2pix/) | [Arxiv](https://arxiv.org/abs/1611.07004) | [PyTorch](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) Torch implementation for learning a mapping from input images to output images, for example: Image-to-Image Translation with Conditional Adversarial Networks [Phillip Isola](http://web.mit.edu/phillipi/), [Jun-Yan Zhu](https://www.cs.cmu.edu/~junyanz/), [Tinghui Zhou](https://people.eecs.berkeley.edu/~tinghuiz/), [Alexei A. Efros](https://people.eecs.berkeley.edu/~efros/) CVPR, 2017. On some tasks, decent results can be obtained fairly quickly and on small datasets. For example, to learn to generate facades (example shown above), we trained on just 400 images for about 2 hours (on a single Pascal Titan X GPU). However, for harder problems it may be important to train on far larger datasets, and for many hours or even days. **Note**: Please check out our [PyTorch](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix) implementation for pix2pix and CycleGAN. The PyTorch version is under active development and can produce results comparable to or better than this Torch version. ## Setup ### Prerequisites - Linux or OSX - NVIDIA GPU + CUDA CuDNN (CPU mode and CUDA without CuDNN may work with minimal modification, but untested) ### Getting Started - Install torch and dependencies from https://github.com/torch/distro - Install torch packages `nngraph` and `display` ```bash luarocks install nngraph luarocks install https://raw.githubusercontent.com/szym/display/master/display-scm-0.rockspec ``` - Clone this repo: ```bash git clone git@github.com:phillipi/pix2pix.git cd pix2pix ``` - Download the dataset (e.g., [CMP Facades](http://cmp.felk.cvut.cz/~tylecr1/facade/)): ```bash bash ./datasets/download_dataset.sh facades ``` - Train the model ```bash DATA_ROOT=./datasets/facades name=facades_generation which_direction=BtoA th train.lua ``` - (CPU only) The same training command without using a GPU or CUDNN. Setting the environment variables ```gpu=0 cudnn=0``` forces CPU only ```bash DATA_ROOT=./datasets/facades name=facades_generation which_direction=BtoA gpu=0 cudnn=0 batchSize=10 save_epoch_freq=5 th train.lua ``` - (Optionally) start the display server to view results as the model trains. ( See [Display UI](#display-ui) for more details): ```bash th -ldisplay.start 8000 0.0.0.0 ``` - Finally, test the model: ```bash DATA_ROOT=./datasets/facades name=facades_generation which_direction=BtoA phase=val th test.lua ``` The test results will be saved to an html file here: `./results/facades_generation/latest_net_G_val/index.html`. ## Train ```bash DATA_ROOT=/path/to/data/ name=expt_name which_direction=AtoB th train.lua ``` Switch `AtoB` to `BtoA` to train translation in opposite direction. Models are saved to `./checkpoints/expt_name` (can be changed by passing `checkpoint_dir=your_dir` in train.lua). See `opt` in train.lua for additional training options. ## Test ```bash DATA_ROOT=/path/to/data/ name=expt_name which_direction=AtoB phase=val th test.lua ``` This will run the model named `expt_name` in direction `AtoB` on all images in `/path/to/data/val`. Result images, and a webpage to view them, are saved to `./results/expt_name` (can be changed by passing `results_dir=your_dir` in test.lua). See `opt` in test.lua for additional testing options. ## Datasets Download the datasets using the following script. Some of the datasets are collected by other researchers. Please cite their papers if you use the data. ```bash bash ./datasets/download_dataset.sh dataset_name ``` - `facades`: 400 images from [CMP Facades dataset](http://cmp.felk.cvut.cz/~tylecr1/facade/). [[Citation](datasets/bibtex/facades.tex)] - `cityscapes`: 2975 images from the [Cityscapes training set](https://www.cityscapes-dataset.com/). [[Citation](datasets/bibtex/cityscapes.tex)] - `maps`: 1096 training images scraped from Google Maps - `edges2shoes`: 50k training images from [UT Zappos50K dataset](http://vision.cs.utexas.edu/projects/finegrained/utzap50k/). Edges are computed by [HED](https://github.com/s9xie/hed) edge detector + post-processing. [[Citation](datasets/bibtex/shoes.tex)] - `edges2handbags`: 137K Amazon Handbag images from [iGAN project](https://github.com/junyanz/iGAN). Edges are computed by [HED](https://github.com/s9xie/hed) edge detector + post-processing. [[Citation](datasets/bibtex/handbags.tex)] - `night2day`: around 20K natural scene images from [Transient Attributes dataset](http://transattr.cs.brown.edu/) [[Citation](datasets/bibtex/transattr.tex)]. To train a `day2night` pix2pix model, you need to add `which_direction=BtoA`. ## Models Download the pre-trained models with the following script. You need to rename the model (e.g., `facades_label2image` to `/checkpoints/facades/latest_net_G.t7`) after the download has finished. ```bash bash ./models/download_model.sh model_name ``` - `facades_label2image` (label -> facade): trained on the CMP Facades dataset. - `cityscapes_label2image` (label -> street scene): trained on the Cityscapes dataset. - `cityscapes_image2label` (street scene -> label): trained on the Cityscapes dataset. - `edges2shoes` (edge -> photo): trained on UT Zappos50K dataset. - `edges2handbags` (edge -> photo): trained on Amazon handbags images. - `day2night` (daytime scene -> nighttime scene): trained on around 100 [webcams](http://transattr.cs.brown.edu/). ## Setup Training and Test data ### Generating Pairs We provide a python script to generate training data in the form of pairs of images {A,B}, where A and B are two different depictions of the same underlying scene. For example, these might be pairs {label map, photo} or {bw image, color image}. Then we can learn to translate A to B or B to A: Create folder `/path/to/data` with subfolders `A` and `B`. `A` and `B` should each have their own subfolders `train`, `val`, `test`, etc. In `/path/to/data/A/train`, put training images in style A. In `/path/to/data/B/train`, put the corresponding images in style B. Repeat same for other data splits (`val`, `test`, etc). Corresponding images in a pair {A,B} must be the same size and have the same filename, e.g., `/path/to/data/A/train/1.jpg` is considered to correspond to `/path/to/data/B/train/1.jpg`. Once the data is formatted this way, call: ```bash python scripts/combine_A_and_B.py --fold_A /path/to/data/A --fold_B /path/to/data/B --fold_AB /path/to/data ``` This will combine each pair of images (A,B) into a single image file, ready for training. ### Notes on Colorization No need to run `combine_A_and_B.py` for colorization. Instead, you need to prepare some natural images and set `preprocess=colorization` in the script. The program will automatically convert each RGB image into Lab color space, and create `L -> ab` image pair during the training. Also set `input_nc=1` and `output_nc=2`. ### Extracting Edges We provide python and Matlab scripts to extract coarse edges from photos. Run `scripts/edges/batch_hed.py` to compute [HED](https://github.com/s9xie/hed) edges. Run `scripts/edges/PostprocessHED.m` to simplify edges with additional post-processing steps. Check the code documentation for more details. ### Evaluating Labels2Photos on Cityscapes We provide scripts for running the evaluation of the Labels2Photos task on the Cityscapes **validation** set. We assume that you have installed `caffe` (and `pycaffe`) in your system. If not, see the [official website](http://caffe.berkeleyvision.org/installation.html) for installation instructions. Once `caffe` is successfully installed, download the pre-trained FCN-8s semantic segmentation model (512MB) by running ```bash bash ./scripts/eval_cityscapes/download_fcn8s.sh ``` Then make sure `./scripts/eval_cityscapes/` is in your system's python path. If not, run the following command to add it ```bash export PYTHONPATH=${PYTHONPATH}:./scripts/eval_cityscapes/ ``` Now you can run the following command to evaluate your predictions: ```bash python ./scripts/eval_cityscapes/evaluate.py --cityscapes_dir /path/to/original/cityscapes/dataset/ --result_dir /path/to/your/predictions/ --output_dir /path/to/output/directory/ ``` Images stored under `--result_dir` should contain your model predictions on the Cityscapes **validation** split, and have the original Cityscapes naming convention (e.g., `frankfurt_000001_038418_leftImg8bit.png`). The script will output a text file under `--output_dir` containing the metric. **Further notes**: Our pre-trained FCN model is **not** supposed to work on Cityscapes in the original resolution (1024x2048) as it was trained on 256x256 images that are then upsampled to 1024x2048 during training. The purpose of the resizing during training was to 1) keep the label maps in the original high resolution untouched and 2) avoid the need to change the standard FCN training code and the architecture for Cityscapes. During test time, you need to synthesize 256x256 results. Our test code will automatically upsample your results to 1024x2048 before feeding them to the pre-trained FCN model. The output is at 1024x2048 resolution and will be compared to 1024x2048 ground truth labels. You do not need to resize the ground truth labels. The best way to verify whether everything is correct is to reproduce the numbers for real images in the paper first. To achieve it, you need to resize the original/real Cityscapes images (**not** labels) to 256x256 and feed them to the evaluation code. ## Display UI Optionally, for displaying images during training and test, use the [display package](https://github.com/szym/display). - Install it with: `luarocks install https://raw.githubusercontent.com/szym/display/master/display-scm-0.rockspec` - Then start the server with: `th -ldisplay.start` - Open this URL in your browser: [http://localhost:8000](http://localhost:8000) By default, the server listens on localhost. Pass `0.0.0.0` to allow external connections on any interface: ```bash th -ldisplay.start 8000 0.0.0.0 ``` Then open `http://(hostname):(port)/` in your browser to load the remote desktop. L1 error is plotted to the display by default. Set the environment variable `display_plot` to a comma-separated list of values `errL1`, `errG` and `errD` to visualize the L1, generator, and discriminator error respectively. For example, to plot only the generator and discriminator errors to the display instead of the default L1 error, set `display_plot="errG,errD"`. ## Citation If you use this code for your research, please cite our paper Image-to-Image Translation Using Conditional Adversarial Networks: ``` @article{pix2pix2017, title={Image-to-Image Translation with Conditional Adversarial Networks}, author={Isola, Phillip and Zhu, Jun-Yan and Zhou, Tinghui and Efros, Alexei A}, journal={CVPR}, year={2017} } ``` ## Cat Paper Collection If you love cats, and love reading cool graphics, vision, and learning papers, please check out the Cat Paper Collection: [[Github]](https://github.com/junyanz/CatPapers) [[Webpage]](https://www.cs.cmu.edu/~junyanz/cat/cat_papers.html) ## Acknowledgments Code borrows heavily from [DCGAN](https://github.com/soumith/dcgan.torch). The data loader is modified from [DCGAN](https://github.com/soumith/dcgan.torch) and [Context-Encoder](https://github.com/pathak22/context-encoder). ================================================ FILE: data/data.lua ================================================ --[[ This data loader is a modified version of the one from dcgan.torch (see https://github.com/soumith/dcgan.torch/blob/master/data/data.lua). Copyright (c) 2016, Deepak Pathak [See LICENSE file for details] ]]-- local Threads = require 'threads' Threads.serialization('threads.sharedserialize') local data = {} local result = {} local unpack = unpack and unpack or table.unpack function data.new(n, opt_) opt_ = opt_ or {} local self = {} for k,v in pairs(data) do self[k] = v end local donkey_file = 'donkey_folder.lua' if n > 0 then local options = opt_ self.threads = Threads(n, function() require 'torch' end, function(idx) opt = options tid = idx local seed = (opt.manualSeed and opt.manualSeed or 0) + idx torch.manualSeed(seed) torch.setnumthreads(1) print(string.format('Starting donkey with id: %d seed: %d', tid, seed)) assert(options, 'options not found') assert(opt, 'opt not given') print(opt) paths.dofile(donkey_file) end ) else if donkey_file then paths.dofile(donkey_file) end self.threads = {} function self.threads:addjob(f1, f2) f2(f1()) end function self.threads:dojob() end function self.threads:synchronize() end end local nSamples = 0 self.threads:addjob(function() return trainLoader:size() end, function(c) nSamples = c end) self.threads:synchronize() self._size = nSamples for i = 1, n do self.threads:addjob(self._getFromThreads, self._pushResult) end return self end function data._getFromThreads() assert(opt.batchSize, 'opt.batchSize not found') return trainLoader:sample(opt.batchSize) end function data._pushResult(...) local res = {...} if res == nil then self.threads:synchronize() end result[1] = res end function data:getBatch() self.threads:addjob(self._getFromThreads, self._pushResult) self.threads:dojob() local res = result[1] img_data = res[1] img_paths = res[3] result[1] = nil if torch.type(img_data) == 'table' then img_data = unpack(img_data) end return img_data, img_paths end function data:size() return self._size end return data ================================================ FILE: data/dataset.lua ================================================ --[[ Copyright (c) 2015-present, Facebook, Inc. All rights reserved. This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. An additional grant of patent rights can be found in the PATENTS file in the same directory. ]]-- require 'torch' torch.setdefaulttensortype('torch.FloatTensor') local ffi = require 'ffi' local class = require('pl.class') local dir = require 'pl.dir' local tablex = require 'pl.tablex' local argcheck = require 'argcheck' require 'sys' require 'xlua' require 'image' local dataset = torch.class('dataLoader') local initcheck = argcheck{ pack=true, help=[[ A dataset class for images in a flat folder structure (folder-name is class-name). Optimized for extremely large datasets (upwards of 14 million images). Tested only on Linux (as it uses command-line linux utilities to scale up) ]], {check=function(paths) local out = true; for k,v in ipairs(paths) do if type(v) ~= 'string' then print('paths can only be of string input'); out = false end end return out end, name="paths", type="table", help="Multiple paths of directories with images"}, {name="sampleSize", type="table", help="a consistent sample size to resize the images"}, {name="split", type="number", help="Percentage of split to go to Training" }, {name="serial_batches", type="number", help="if randomly sample training images"}, {name="samplingMode", type="string", help="Sampling mode: random | balanced ", default = "balanced"}, {name="verbose", type="boolean", help="Verbose mode during initialization", default = false}, {name="loadSize", type="table", help="a size to load the images to, initially", opt = true}, {name="forceClasses", type="table", help="If you want this loader to map certain classes to certain indices, " .. "pass a classes table that has {classname : classindex} pairs." .. " For example: {3 : 'dog', 5 : 'cat'}" .. "This function is very useful when you want two loaders to have the same " .. "class indices (trainLoader/testLoader for example)", opt = true}, {name="sampleHookTrain", type="function", help="applied to sample during training(ex: for lighting jitter). " .. "It takes the image path as input", opt = true}, {name="sampleHookTest", type="function", help="applied to sample during testing", opt = true}, } function dataset:__init(...) -- argcheck local args = initcheck(...) print(args) for k,v in pairs(args) do self[k] = v end if not self.loadSize then self.loadSize = self.sampleSize; end if not self.sampleHookTrain then self.sampleHookTrain = self.defaultSampleHook end if not self.sampleHookTest then self.sampleHookTest = self.defaultSampleHook end self.image_count = 1 -- find class names self.classes = {} local classPaths = {} if self.forceClasses then for k,v in pairs(self.forceClasses) do self.classes[k] = v classPaths[k] = {} end end local function tableFind(t, o) for k,v in pairs(t) do if v == o then return k end end end -- loop over each paths folder, get list of unique class names, -- also store the directory paths per class -- for each class, for k,path in ipairs(self.paths) do local dirs = {} -- hack dirs[1] = path for k,dirpath in ipairs(dirs) do local class = paths.basename(dirpath) local idx = tableFind(self.classes, class) if not idx then table.insert(self.classes, class) idx = #self.classes classPaths[idx] = {} end if not tableFind(classPaths[idx], dirpath) then table.insert(classPaths[idx], dirpath); end end end self.classIndices = {} for k,v in ipairs(self.classes) do self.classIndices[v] = k end -- define command-line tools, try your best to maintain OSX compatibility local wc = 'wc' local cut = 'cut' local find = 'find -H' -- if folder name is symlink, do find inside it after dereferencing if ffi.os == 'OSX' then wc = 'gwc' cut = 'gcut' find = 'gfind' end ---------------------------------------------------------------------- -- Options for the GNU find command local extensionList = {'jpg', 'png','JPG','PNG','JPEG', 'ppm', 'PPM', 'bmp', 'BMP'} local findOptions = ' -iname "*.' .. extensionList[1] .. '"' for i=2,#extensionList do findOptions = findOptions .. ' -o -iname "*.' .. extensionList[i] .. '"' end -- find the image path names self.imagePath = torch.CharTensor() -- path to each image in dataset self.imageClass = torch.LongTensor() -- class index of each image (class index in self.classes) self.classList = {} -- index of imageList to each image of a particular class self.classListSample = self.classList -- the main list used when sampling data print('running "find" on each class directory, and concatenate all' .. ' those filenames into a single file containing all image paths for a given class') -- so, generates one file per class local classFindFiles = {} for i=1,#self.classes do classFindFiles[i] = os.tmpname() end local combinedFindList = os.tmpname(); local tmpfile = os.tmpname() local tmphandle = assert(io.open(tmpfile, 'w')) -- iterate over classes for i, class in ipairs(self.classes) do -- iterate over classPaths for j,path in ipairs(classPaths[i]) do local command = find .. ' "' .. path .. '" ' .. findOptions .. ' >>"' .. classFindFiles[i] .. '" \n' tmphandle:write(command) end end io.close(tmphandle) os.execute('bash ' .. tmpfile) os.execute('rm -f ' .. tmpfile) print('now combine all the files to a single large file') local tmpfile = os.tmpname() local tmphandle = assert(io.open(tmpfile, 'w')) -- concat all finds to a single large file in the order of self.classes for i=1,#self.classes do local command = 'cat "' .. classFindFiles[i] .. '" >>' .. combinedFindList .. ' \n' tmphandle:write(command) end io.close(tmphandle) os.execute('bash ' .. tmpfile) os.execute('rm -f ' .. tmpfile) --========================================================================== print('load the large concatenated list of sample paths to self.imagePath') local cmd = wc .. " -L '" .. combinedFindList .. "' |" .. cut .. " -f1 -d' '" print('cmd..' .. cmd) local maxPathLength = tonumber(sys.fexecute(wc .. " -L '" .. combinedFindList .. "' |" .. cut .. " -f1 -d' '")) + 1 local length = tonumber(sys.fexecute(wc .. " -l '" .. combinedFindList .. "' |" .. cut .. " -f1 -d' '")) assert(length > 0, "Could not find any image file in the given input paths") assert(maxPathLength > 0, "paths of files are length 0?") self.imagePath:resize(length, maxPathLength):fill(0) local s_data = self.imagePath:data() local count = 0 for line in io.lines(combinedFindList) do ffi.copy(s_data, line) s_data = s_data + maxPathLength if self.verbose and count % 10000 == 0 then xlua.progress(count, length) end; count = count + 1 end self.numSamples = self.imagePath:size(1) if self.verbose then print(self.numSamples .. ' samples found.') end --========================================================================== print('Updating classList and imageClass appropriately') self.imageClass:resize(self.numSamples) local runningIndex = 0 for i=1,#self.classes do if self.verbose then xlua.progress(i, #(self.classes)) end local length = tonumber(sys.fexecute(wc .. " -l '" .. classFindFiles[i] .. "' |" .. cut .. " -f1 -d' '")) if length == 0 then error('Class has zero samples') else self.classList[i] = torch.range(runningIndex + 1, runningIndex + length):long() self.imageClass[{{runningIndex + 1, runningIndex + length}}]:fill(i) end runningIndex = runningIndex + length end --========================================================================== -- clean up temporary files print('Cleaning up temporary files') local tmpfilelistall = '' for i=1,#(classFindFiles) do tmpfilelistall = tmpfilelistall .. ' "' .. classFindFiles[i] .. '"' if i % 1000 == 0 then os.execute('rm -f ' .. tmpfilelistall) tmpfilelistall = '' end end os.execute('rm -f ' .. tmpfilelistall) os.execute('rm -f "' .. combinedFindList .. '"') --========================================================================== if self.split == 100 then self.testIndicesSize = 0 else print('Splitting training and test sets to a ratio of ' .. self.split .. '/' .. (100-self.split)) self.classListTrain = {} self.classListTest = {} self.classListSample = self.classListTrain local totalTestSamples = 0 -- split the classList into classListTrain and classListTest for i=1,#self.classes do local list = self.classList[i] local count = self.classList[i]:size(1) local splitidx = math.floor((count * self.split / 100) + 0.5) -- +round local perm = torch.randperm(count) self.classListTrain[i] = torch.LongTensor(splitidx) for j=1,splitidx do self.classListTrain[i][j] = list[perm[j]] end if splitidx == count then -- all samples were allocated to train set self.classListTest[i] = torch.LongTensor() else self.classListTest[i] = torch.LongTensor(count-splitidx) totalTestSamples = totalTestSamples + self.classListTest[i]:size(1) local idx = 1 for j=splitidx+1,count do self.classListTest[i][idx] = list[perm[j]] idx = idx + 1 end end end -- Now combine classListTest into a single tensor self.testIndices = torch.LongTensor(totalTestSamples) self.testIndicesSize = totalTestSamples local tdata = self.testIndices:data() local tidx = 0 for i=1,#self.classes do local list = self.classListTest[i] if list:dim() ~= 0 then local ldata = list:data() for j=0,list:size(1)-1 do tdata[tidx] = ldata[j] tidx = tidx + 1 end end end end end -- size(), size(class) function dataset:size(class, list) list = list or self.classList if not class then return self.numSamples elseif type(class) == 'string' then return list[self.classIndices[class]]:size(1) elseif type(class) == 'number' then return list[class]:size(1) end end -- getByClass function dataset:getByClass(class) local index = 0 if self.serial_batches == 1 then index = math.fmod(self.image_count-1, self.classListSample[class]:nElement())+1 self.image_count = self.image_count +1 else index = math.ceil(torch.uniform() * self.classListSample[class]:nElement()) end local imgpath = ffi.string(torch.data(self.imagePath[self.classListSample[class][index]])) return self:sampleHookTrain(imgpath), imgpath end -- converts a table of samples (and corresponding labels) to a clean tensor local function tableToOutput(self, dataTable, scalarTable) local data, scalarLabels, labels local quantity = #scalarTable assert(dataTable[1]:dim() == 3) data = torch.Tensor(quantity, self.sampleSize[1], self.sampleSize[2], self.sampleSize[3]) scalarLabels = torch.LongTensor(quantity):fill(-1111) for i=1,#dataTable do data[i]:copy(dataTable[i]) scalarLabels[i] = scalarTable[i] end return data, scalarLabels end -- sampler, samples from the training set. function dataset:sample(quantity) assert(quantity) local dataTable = {} local scalarTable = {} local samplePaths = {} for i=1,quantity do local class = torch.random(1, #self.classes) local out, imgpath = self:getByClass(class) table.insert(dataTable, out) table.insert(scalarTable, class) samplePaths[i] = imgpath end local data, scalarLabels = tableToOutput(self, dataTable, scalarTable) return data, scalarLabels, samplePaths-- filePaths end function dataset:get(i1, i2) local indices = torch.range(i1, i2); local quantity = i2 - i1 + 1; assert(quantity > 0) -- now that indices has been initialized, get the samples local dataTable = {} local scalarTable = {} for i=1,quantity do -- load the sample local imgpath = ffi.string(torch.data(self.imagePath[indices[i]])) local out = self:sampleHookTest(imgpath) table.insert(dataTable, out) table.insert(scalarTable, self.imageClass[indices[i]]) end local data, scalarLabels = tableToOutput(self, dataTable, scalarTable) return data, scalarLabels end return dataset ================================================ FILE: data/donkey_folder.lua ================================================ --[[ This data loader is a modified version of the one from dcgan.torch (see https://github.com/soumith/dcgan.torch/blob/master/data/donkey_folder.lua). Copyright (c) 2016, Deepak Pathak [See LICENSE file for details] Copyright (c) 2015-present, Facebook, Inc. All rights reserved. This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. An additional grant of patent rights can be found in the PATENTS file in the same directory. ]]-- require 'image' paths.dofile('dataset.lua') -- This file contains the data-loading logic and details. -- It is run by each data-loader thread. ------------------------------------------ -------- COMMON CACHES and PATHS -- Check for existence of opt.data print(os.getenv('DATA_ROOT')) opt.data = paths.concat(os.getenv('DATA_ROOT'), opt.phase) if not paths.dirp(opt.data) then error('Did not find directory: ' .. opt.data) end -- a cache file of the training metadata (if doesnt exist, will be created) local cache = "cache" local cache_prefix = opt.data:gsub('/', '_') os.execute('mkdir -p cache') local trainCache = paths.concat(cache, cache_prefix .. '_trainCache.t7') -------------------------------------------------------------------------------------------- local input_nc = opt.input_nc -- input channels local output_nc = opt.output_nc local loadSize = {input_nc, opt.loadSize} local sampleSize = {input_nc, opt.fineSize} local preprocessAandB = function(imA, imB) imA = image.scale(imA, loadSize[2], loadSize[2]) imB = image.scale(imB, loadSize[2], loadSize[2]) local perm = torch.LongTensor{3, 2, 1} imA = imA:index(1, perm)--:mul(256.0): brg, rgb imA = imA:mul(2):add(-1) imB = imB:index(1, perm) imB = imB:mul(2):add(-1) -- print(img:size()) assert(imA:max()<=1,"A: badly scaled inputs") assert(imA:min()>=-1,"A: badly scaled inputs") assert(imB:max()<=1,"B: badly scaled inputs") assert(imB:min()>=-1,"B: badly scaled inputs") local oW = sampleSize[2] local oH = sampleSize[2] local iH = imA:size(2) local iW = imA:size(3) if iH~=oH then h1 = math.ceil(torch.uniform(1e-2, iH-oH)) end if iW~=oW then w1 = math.ceil(torch.uniform(1e-2, iW-oW)) end if iH ~= oH or iW ~= oW then imA = image.crop(imA, w1, h1, w1 + oW, h1 + oH) imB = image.crop(imB, w1, h1, w1 + oW, h1 + oH) end if opt.flip == 1 and torch.uniform() > 0.5 then imA = image.hflip(imA) imB = image.hflip(imB) end return imA, imB end local function loadImageChannel(path) local input = image.load(path, 3, 'float') input = image.scale(input, loadSize[2], loadSize[2]) local oW = sampleSize[2] local oH = sampleSize[2] local iH = input:size(2) local iW = input:size(3) if iH~=oH then h1 = math.ceil(torch.uniform(1e-2, iH-oH)) end if iW~=oW then w1 = math.ceil(torch.uniform(1e-2, iW-oW)) end if iH ~= oH or iW ~= oW then input = image.crop(input, w1, h1, w1 + oW, h1 + oH) end if opt.flip == 1 and torch.uniform() > 0.5 then input = image.hflip(input) end local input_lab = image.rgb2lab(input) local imA = input_lab[{{1}, {}, {} }]:div(50.0) - 1.0 local imB = input_lab[{{2,3},{},{}}]:div(110.0) local imAB = torch.cat(imA, imB, 1) assert(imAB:max()<=1,"A: badly scaled inputs") assert(imAB:min()>=-1,"A: badly scaled inputs") return imAB end --local function loadImage local function loadImage(path) local input = image.load(path, 3, 'float') local h = input:size(2) local w = input:size(3) local imA = image.crop(input, 0, 0, w/2, h) local imB = image.crop(input, w/2, 0, w, h) return imA, imB end local function loadImageInpaint(path) local imB = image.load(path, 3, 'float') imB = image.scale(imB, loadSize[2], loadSize[2]) local perm = torch.LongTensor{3, 2, 1} imB = imB:index(1, perm)--:mul(256.0): brg, rgb imB = imB:mul(2):add(-1) assert(imB:max()<=1,"A: badly scaled inputs") assert(imB:min()>=-1,"A: badly scaled inputs") local oW = sampleSize[2] local oH = sampleSize[2] local iH = imB:size(2) local iW = imB:size(3) if iH~=oH then h1 = math.ceil(torch.uniform(1e-2, iH-oH)) end if iW~=oW then w1 = math.ceil(torch.uniform(1e-2, iW-oW)) end if iH ~= oH or iW ~= oW then imB = image.crop(imB, w1, h1, w1 + oW, h1 + oH) end local imA = imB:clone() imA[{{},{1 + oH/4, oH/2 + oH/4},{1 + oW/4, oW/2 + oW/4}}] = 1.0 if opt.flip == 1 and torch.uniform() > 0.5 then imA = image.hflip(imA) imB = image.hflip(imB) end imAB = torch.cat(imA, imB, 1) return imAB end -- channel-wise mean and std. Calculate or load them from disk later in the script. local mean,std -------------------------------------------------------------------------------- -- Hooks that are used for each image that is loaded -- function to load the image, jitter it appropriately (random crops etc.) local trainHook = function(self, path) collectgarbage() if opt.preprocess == 'regular' then local imA, imB = loadImage(path) imA, imB = preprocessAandB(imA, imB) imAB = torch.cat(imA, imB, 1) end if opt.preprocess == 'colorization' then imAB = loadImageChannel(path) end if opt.preprocess == 'inpaint' then imAB = loadImageInpaint(path) end return imAB end -------------------------------------- -- trainLoader print('trainCache', trainCache) print('Creating train metadata') print('serial batch:, ', opt.serial_batches) trainLoader = dataLoader{ paths = {opt.data}, loadSize = {input_nc, loadSize[2], loadSize[2]}, sampleSize = {input_nc+output_nc, sampleSize[2], sampleSize[2]}, split = 100, serial_batches = opt.serial_batches, verbose = true } trainLoader.sampleHookTrain = trainHook collectgarbage() -- do some sanity checks on trainLoader do local class = trainLoader.imageClass local nClasses = #trainLoader.classes assert(class:max() <= nClasses, "class logic has error") assert(class:min() >= 1, "class logic has error") end ================================================ FILE: datasets/bibtex/cityscapes.tex ================================================ @inproceedings{Cordts2016Cityscapes, title={The Cityscapes Dataset for Semantic Urban Scene Understanding}, author={Cordts, Marius and Omran, Mohamed and Ramos, Sebastian and Rehfeld, Timo and Enzweiler, Markus and Benenson, Rodrigo and Franke, Uwe and Roth, Stefan and Schiele, Bernt}, booktitle={Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, year={2016} } ================================================ FILE: datasets/bibtex/facades.tex ================================================ @INPROCEEDINGS{Tylecek13, author = {Radim Tyle{\v c}ek, Radim {\v S}{\' a}ra}, title = {Spatial Pattern Templates for Recognition of Objects with Regular Structure}, booktitle = {Proc. GCPR}, year = {2013}, address = {Saarbrucken, Germany}, } ================================================ FILE: datasets/bibtex/handbags.tex ================================================ @inproceedings{zhu2016generative, title={Generative Visual Manipulation on the Natural Image Manifold}, author={Zhu, Jun-Yan and Kr{\"a}henb{\"u}hl, Philipp and Shechtman, Eli and Efros, Alexei A.}, booktitle={Proceedings of European Conference on Computer Vision (ECCV)}, year={2016} } @InProceedings{xie15hed, author = {"Xie, Saining and Tu, Zhuowen"}, Title = {Holistically-Nested Edge Detection}, Booktitle = "Proceedings of IEEE International Conference on Computer Vision", Year = {2015}, } ================================================ FILE: datasets/bibtex/shoes.tex ================================================ @InProceedings{fine-grained, author = {A. Yu and K. Grauman}, title = {{F}ine-{G}rained {V}isual {C}omparisons with {L}ocal {L}earning}, booktitle = {Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2014} } @InProceedings{xie15hed, author = {"Xie, Saining and Tu, Zhuowen"}, Title = {Holistically-Nested Edge Detection}, Booktitle = "Proceedings of IEEE International Conference on Computer Vision", Year = {2015}, } ================================================ FILE: datasets/bibtex/transattr.tex ================================================ @article {Laffont14, title = {Transient Attributes for High-Level Understanding and Editing of Outdoor Scenes}, author = {Pierre-Yves Laffont and Zhile Ren and Xiaofeng Tao and Chao Qian and James Hays}, journal = {ACM Transactions on Graphics (proceedings of SIGGRAPH)}, volume = {33}, number = {4}, year = {2014} } ================================================ FILE: datasets/download_dataset.sh ================================================ FILE=$1 if [[ $FILE != "cityscapes" && $FILE != "night2day" && $FILE != "edges2handbags" && $FILE != "edges2shoes" && $FILE != "facades" && $FILE != "maps" ]]; then echo "Available datasets are cityscapes, night2day, edges2handbags, edges2shoes, facades, maps" exit 1 fi echo "Specified [$FILE]" URL=http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/$FILE.tar.gz TAR_FILE=./datasets/$FILE.tar.gz TARGET_DIR=./datasets/$FILE/ wget -N $URL -O $TAR_FILE mkdir -p $TARGET_DIR tar -zxvf $TAR_FILE -C ./datasets/ rm $TAR_FILE ================================================ FILE: models/download_model.sh ================================================ FILE=$1 URL=http://efrosgans.eecs.berkeley.edu/pix2pix/models/$FILE.t7 MODEL_FILE=./models/$FILE.t7 wget -N $URL -O $MODEL_FILE ================================================ FILE: models.lua ================================================ require 'nngraph' function defineG_encoder_decoder(input_nc, output_nc, ngf) local netG = nil -- input is (nc) x 256 x 256 local e1 = - nn.SpatialConvolution(input_nc, ngf, 4, 4, 2, 2, 1, 1) -- input is (ngf) x 128 x 128 local e2 = e1 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf, ngf * 2, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 2) -- input is (ngf * 2) x 64 x 64 local e3 = e2 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 2, ngf * 4, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 4) -- input is (ngf * 4) x 32 x 32 local e4 = e3 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 4, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 16 x 16 local e5 = e4 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 8 x 8 local e6 = e5 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 4 x 4 local e7 = e6 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 2 x 2 local e8 = e7 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) -- input is (ngf * 8) x 1 x 1 local d1 = e8 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) - nn.Dropout(0.5) -- input is (ngf * 8) x 2 x 2 local d2 = d1 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) - nn.Dropout(0.5) -- input is (ngf * 8) x 4 x 4 local d3 = d2 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) - nn.Dropout(0.5) -- input is (ngf * 8) x 8 x 8 local d4 = d3 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 16 x 16 local d5 = d4 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8, ngf * 4, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 4) -- input is (ngf * 4) x 32 x 32 local d6 = d5 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 4, ngf * 2, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 2) -- input is (ngf * 2) x 64 x 64 local d7 = d6 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 2, ngf, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf) -- input is (ngf) x128 x 128 local d8 = d7 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf, output_nc, 4, 4, 2, 2, 1, 1) -- input is (nc) x 256 x 256 local o1 = d8 - nn.Tanh() netG = nn.gModule({e1},{o1}) return netG end function defineG_unet(input_nc, output_nc, ngf) local netG = nil -- input is (nc) x 256 x 256 local e1 = - nn.SpatialConvolution(input_nc, ngf, 4, 4, 2, 2, 1, 1) -- input is (ngf) x 128 x 128 local e2 = e1 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf, ngf * 2, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 2) -- input is (ngf * 2) x 64 x 64 local e3 = e2 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 2, ngf * 4, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 4) -- input is (ngf * 4) x 32 x 32 local e4 = e3 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 4, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 16 x 16 local e5 = e4 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 8 x 8 local e6 = e5 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 4 x 4 local e7 = e6 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 2 x 2 local e8 = e7 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) -- input is (ngf * 8) x 1 x 1 local d1_ = e8 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) - nn.Dropout(0.5) -- input is (ngf * 8) x 2 x 2 local d1 = {d1_,e7} - nn.JoinTable(2) local d2_ = d1 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8 * 2, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) - nn.Dropout(0.5) -- input is (ngf * 8) x 4 x 4 local d2 = {d2_,e6} - nn.JoinTable(2) local d3_ = d2 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8 * 2, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) - nn.Dropout(0.5) -- input is (ngf * 8) x 8 x 8 local d3 = {d3_,e5} - nn.JoinTable(2) local d4_ = d3 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8 * 2, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 16 x 16 local d4 = {d4_,e4} - nn.JoinTable(2) local d5_ = d4 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8 * 2, ngf * 4, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 4) -- input is (ngf * 4) x 32 x 32 local d5 = {d5_,e3} - nn.JoinTable(2) local d6_ = d5 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 4 * 2, ngf * 2, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 2) -- input is (ngf * 2) x 64 x 64 local d6 = {d6_,e2} - nn.JoinTable(2) local d7_ = d6 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 2 * 2, ngf, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf) -- input is (ngf) x128 x 128 local d7 = {d7_,e1} - nn.JoinTable(2) local d8 = d7 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 2, output_nc, 4, 4, 2, 2, 1, 1) -- input is (nc) x 256 x 256 local o1 = d8 - nn.Tanh() netG = nn.gModule({e1},{o1}) --graph.dot(netG.fg,'netG') return netG end function defineG_unet_128(input_nc, output_nc, ngf) -- Two layer less than the default unet to handle 128x128 input local netG = nil -- input is (nc) x 128 x 128 local e1 = - nn.SpatialConvolution(input_nc, ngf, 4, 4, 2, 2, 1, 1) -- input is (ngf) x 64 x 64 local e2 = e1 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf, ngf * 2, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 2) -- input is (ngf * 2) x 32 x 32 local e3 = e2 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 2, ngf * 4, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 4) -- input is (ngf * 4) x 16 x 16 local e4 = e3 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 4, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 8 x 8 local e5 = e4 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 4 x 4 local e6 = e5 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) -- input is (ngf * 8) x 2 x 2 local e7 = e6 - nn.LeakyReLU(0.2, true) - nn.SpatialConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) -- input is (ngf * 8) x 1 x 1 local d1_ = e7 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) - nn.Dropout(0.5) -- input is (ngf * 8) x 2 x 2 local d1 = {d1_,e6} - nn.JoinTable(2) local d2_ = d1 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8 * 2, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) - nn.Dropout(0.5) -- input is (ngf * 8) x 4 x 4 local d2 = {d2_,e5} - nn.JoinTable(2) local d3_ = d2 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8 * 2, ngf * 8, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 8) - nn.Dropout(0.5) -- input is (ngf * 8) x 8 x 8 local d3 = {d3_,e4} - nn.JoinTable(2) local d4_ = d3 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 8 * 2, ngf * 4, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 4) -- input is (ngf * 8) x 16 x 16 local d4 = {d4_,e3} - nn.JoinTable(2) local d5_ = d4 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 4 * 2, ngf * 2, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf * 2) -- input is (ngf * 4) x 32 x 32 local d5 = {d5_,e2} - nn.JoinTable(2) local d6_ = d5 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 2 * 2, ngf, 4, 4, 2, 2, 1, 1) - nn.SpatialBatchNormalization(ngf) -- input is (ngf * 2) x 64 x 64 local d6 = {d6_,e1} - nn.JoinTable(2) local d7 = d6 - nn.ReLU(true) - nn.SpatialFullConvolution(ngf * 2, output_nc, 4, 4, 2, 2, 1, 1) -- input is (ngf) x128 x 128 local o1 = d7 - nn.Tanh() netG = nn.gModule({e1},{o1}) --graph.dot(netG.fg,'netG') return netG end function defineD_basic(input_nc, output_nc, ndf) n_layers = 3 return defineD_n_layers(input_nc, output_nc, ndf, n_layers) end -- rf=1 function defineD_pixelGAN(input_nc, output_nc, ndf) local netD = nn.Sequential() -- input is (nc) x 256 x 256 netD:add(nn.SpatialConvolution(input_nc+output_nc, ndf, 1, 1, 1, 1, 0, 0)) netD:add(nn.LeakyReLU(0.2, true)) -- state size: (ndf) x 256 x 256 netD:add(nn.SpatialConvolution(ndf, ndf * 2, 1, 1, 1, 1, 0, 0)) netD:add(nn.SpatialBatchNormalization(ndf * 2)):add(nn.LeakyReLU(0.2, true)) -- state size: (ndf*2) x 256 x 256 netD:add(nn.SpatialConvolution(ndf * 2, 1, 1, 1, 1, 1, 0, 0)) -- state size: 1 x 256 x 256 netD:add(nn.Sigmoid()) -- state size: 1 x 256 x 256 return netD end -- if n=0, then use pixelGAN (rf=1) -- else rf is 16 if n=1 -- 34 if n=2 -- 70 if n=3 -- 142 if n=4 -- 286 if n=5 -- 574 if n=6 function defineD_n_layers(input_nc, output_nc, ndf, n_layers) if n_layers==0 then return defineD_pixelGAN(input_nc, output_nc, ndf) else local netD = nn.Sequential() -- input is (nc) x 256 x 256 netD:add(nn.SpatialConvolution(input_nc+output_nc, ndf, 4, 4, 2, 2, 1, 1)) netD:add(nn.LeakyReLU(0.2, true)) local nf_mult = 1 local nf_mult_prev = 1 for n = 1, n_layers-1 do nf_mult_prev = nf_mult nf_mult = math.min(2^n,8) netD:add(nn.SpatialConvolution(ndf * nf_mult_prev, ndf * nf_mult, 4, 4, 2, 2, 1, 1)) netD:add(nn.SpatialBatchNormalization(ndf * nf_mult)):add(nn.LeakyReLU(0.2, true)) end -- state size: (ndf*M) x N x N nf_mult_prev = nf_mult nf_mult = math.min(2^n_layers,8) netD:add(nn.SpatialConvolution(ndf * nf_mult_prev, ndf * nf_mult, 4, 4, 1, 1, 1, 1)) netD:add(nn.SpatialBatchNormalization(ndf * nf_mult)):add(nn.LeakyReLU(0.2, true)) -- state size: (ndf*M*2) x (N-1) x (N-1) netD:add(nn.SpatialConvolution(ndf * nf_mult, 1, 4, 4, 1, 1, 1, 1)) -- state size: 1 x (N-2) x (N-2) netD:add(nn.Sigmoid()) -- state size: 1 x (N-2) x (N-2) return netD end end ================================================ FILE: scripts/combine_A_and_B.py ================================================ from pdb import set_trace as st import os import numpy as np import cv2 import argparse parser = argparse.ArgumentParser('create image pairs') parser.add_argument('--fold_A', dest='fold_A', help='input directory for image A', type=str, default='../dataset/50kshoes_edges') parser.add_argument('--fold_B', dest='fold_B', help='input directory for image B', type=str, default='../dataset/50kshoes_jpg') parser.add_argument('--fold_AB', dest='fold_AB', help='output directory', type=str, default='../dataset/test_AB') parser.add_argument('--num_imgs', dest='num_imgs', help='number of images',type=int, default=1000000) parser.add_argument('--use_AB', dest='use_AB', help='if true: (0001_A, 0001_B) to (0001_AB)',action='store_true') args = parser.parse_args() for arg in vars(args): print('[%s] = ' % arg, getattr(args, arg)) splits = filter( lambda f: not f.startswith('.'), os.listdir(args.fold_A)) # ignore hidden folders like .DS_Store for sp in splits: img_fold_A = os.path.join(args.fold_A, sp) img_fold_B = os.path.join(args.fold_B, sp) img_list = filter( lambda f: not f.startswith('.'), os.listdir(img_fold_A)) # ignore hidden folders like .DS_Store img_list = list(img_list) if args.use_AB: img_list = [img_path for img_path in img_list if '_A.' in img_path] num_imgs = min(args.num_imgs, len(img_list)) print('split = %s, use %d/%d images' % (sp, num_imgs, len(img_list))) img_fold_AB = os.path.join(args.fold_AB, sp) if not os.path.isdir(img_fold_AB): os.makedirs(img_fold_AB) print('split = %s, number of images = %d' % (sp, num_imgs)) for n in range(num_imgs): name_A = img_list[n] path_A = os.path.join(img_fold_A, name_A) if args.use_AB: name_B = name_A.replace('_A.', '_B.') else: name_B = name_A path_B = os.path.join(img_fold_B, name_B) if os.path.isfile(path_A) and os.path.isfile(path_B): name_AB = name_A if args.use_AB: name_AB = name_AB.replace('_A.', '.') # remove _A path_AB = os.path.join(img_fold_AB, name_AB) im_A = cv2.imread(path_A, cv2.IMREAD_COLOR) im_B = cv2.imread(path_B, cv2.IMREAD_COLOR) im_AB = np.concatenate([im_A, im_B], 1) cv2.imwrite(path_AB, im_AB) ================================================ FILE: scripts/edges/PostprocessHED.m ================================================ %%% Prerequisites % You need to get the cpp file edgesNmsMex.cpp from https://raw.githubusercontent.com/pdollar/edges/master/private/edgesNmsMex.cpp % and compile it in Matlab: mex edgesNmsMex.cpp % You also need to download and install Piotr's Computer Vision Matlab Toolbox: https://pdollar.github.io/toolbox/ %%% parameters % hed_mat_dir: the hed mat file directory (the output of 'batch_hed.py') % edge_dir: the output HED edges directory % image_width: resize the edge map to [image_width, image_width] % threshold: threshold for image binarization (default 25.0/255.0) % small_edge: remove small edges (default 5) function [] = PostprocessHED(hed_mat_dir, edge_dir, image_width, threshold, small_edge) if ~exist(edge_dir, 'dir') mkdir(edge_dir); end fileList = dir(fullfile(hed_mat_dir, '*.mat')); nFiles = numel(fileList); fprintf('find %d mat files\n', nFiles); for n = 1 : nFiles if mod(n, 1000) == 0 fprintf('process %d/%d images\n', n, nFiles); end fileName = fileList(n).name; filePath = fullfile(hed_mat_dir, fileName); jpgName = strrep(fileName, '.mat', '.jpg'); edge_path = fullfile(edge_dir, jpgName); if ~exist(edge_path, 'file') E = GetEdge(filePath); E = imresize(E,[image_width,image_width]); E_simple = SimpleEdge(E, threshold, small_edge); E_simple = uint8(E_simple*255); imwrite(E_simple, edge_path, 'Quality',100); end end end function [E] = GetEdge(filePath) load(filePath); E = 1-edge_predict; end function [E4] = SimpleEdge(E, threshold, small_edge) if nargin <= 1 threshold = 25.0/255.0; end if nargin <= 2 small_edge = 5; end if ndims(E) == 3 E = E(:,:,1); end E1 = 1 - E; E2 = EdgeNMS(E1); E3 = double(E2>=max(eps,threshold)); E3 = bwmorph(E3,'thin',inf); E4 = bwareaopen(E3, small_edge); E4=1-E4; end function [E_nms] = EdgeNMS( E ) E=single(E); [Ox,Oy] = gradient2(convTri(E,4)); [Oxx,~] = gradient2(Ox); [Oxy,Oyy] = gradient2(Oy); O = mod(atan(Oyy.*sign(-Oxy)./(Oxx+1e-5)),pi); E_nms = edgesNmsMex(E,O,1,5,1.01,1); end ================================================ FILE: scripts/edges/batch_hed.py ================================================ # HED batch processing script; modified from https://github.com/s9xie/hed/blob/master/examples/hed/HED-tutorial.ipynb # Step 1: download the hed repo: https://github.com/s9xie/hed # Step 2: download the models and protoxt, and put them under {caffe_root}/examples/hed/ # Step 3: put this script under {caffe_root}/examples/hed/ # Step 4: run the following script: # python batch_hed.py --images_dir=/data/to/path/photos/ --hed_mat_dir=/data/to/path/hed_mat_files/ # The code sometimes crashes after computation is done. Error looks like "Check failed: ... driver shutting down". You can just kill the job. # For large images, it will produce gpu memory issue. Therefore, you better resize the images before running this script. # Step 5: run the MATLAB post-processing script "PostprocessHED.m" import scipy.io as sio import caffe import sys import numpy as np from PIL import Image import os import argparse def parse_args(): parser = argparse.ArgumentParser(description='batch proccesing: photos->edges') parser.add_argument('--caffe_root', dest='caffe_root', help='caffe root', default='../../', type=str) parser.add_argument('--caffemodel', dest='caffemodel', help='caffemodel', default='./hed_pretrained_bsds.caffemodel', type=str) parser.add_argument('--prototxt', dest='prototxt', help='caffe prototxt file', default='./deploy.prototxt', type=str) parser.add_argument('--images_dir', dest='images_dir', help='directory to store input photos', type=str) parser.add_argument('--hed_mat_dir', dest='hed_mat_dir', help='directory to store output hed edges in mat file', type=str) parser.add_argument('--border', dest='border', help='padding border', type=int, default=128) parser.add_argument('--gpu_id', dest='gpu_id', help='gpu id', type=int, default=1) args = parser.parse_args() return args args = parse_args() for arg in vars(args): print('[%s] =' % arg, getattr(args, arg)) # Make sure that caffe is on the python path: caffe_root = args.caffe_root # this file is expected to be in {caffe_root}/examples/hed/ sys.path.insert(0, caffe_root + 'python') if not os.path.exists(args.hed_mat_dir): print('create output directory %s' % args.hed_mat_dir) os.makedirs(args.hed_mat_dir) imgList = os.listdir(args.images_dir) nImgs = len(imgList) print('#images = %d' % nImgs) caffe.set_mode_gpu() caffe.set_device(args.gpu_id) # load net net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST) # pad border border = args.border for i in range(nImgs): if i % 500 == 0: print('processing image %d/%d' % (i, nImgs)) im = Image.open(os.path.join(args.images_dir, imgList[i])) in_ = np.array(im, dtype=np.float32) in_ = np.pad(in_, ((border, border), (border, border), (0, 0)), 'reflect') in_ = in_[:, :, 0:3] in_ = in_[:, :, ::-1] in_ -= np.array((104.00698793, 116.66876762, 122.67891434)) in_ = in_.transpose((2, 0, 1)) # remove the following two lines if testing with cpu # shape for input (data blob is N x C x H x W), set data net.blobs['data'].reshape(1, *in_.shape) net.blobs['data'].data[...] = in_ # run net and take argmax for prediction net.forward() fuse = net.blobs['sigmoid-fuse'].data[0][0, :, :] # get rid of the border fuse = fuse[(border+35):(-border+35), (border+35):(-border+35)] # save hed file to the disk name, ext = os.path.splitext(imgList[i]) sio.savemat(os.path.join(args.hed_mat_dir, name + '.mat'), {'edge_predict': fuse}) ================================================ FILE: scripts/eval_cityscapes/caffemodel/deploy.prototxt ================================================ layer { name: "data" type: "Input" top: "data" input_param { shape { dim: 1 dim: 3 dim: 500 dim: 500 } } } layer { name: "conv1_1" type: "Convolution" bottom: "data" top: "conv1_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 64 pad: 100 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu1_1" type: "ReLU" bottom: "conv1_1" top: "conv1_1" } layer { name: "conv1_2" type: "Convolution" bottom: "conv1_1" top: "conv1_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 64 pad: 1 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu1_2" type: "ReLU" bottom: "conv1_2" top: "conv1_2" } layer { name: "pool1" type: "Pooling" bottom: "conv1_2" top: "pool1" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv2_1" type: "Convolution" bottom: "pool1" top: "conv2_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu2_1" type: "ReLU" bottom: "conv2_1" top: "conv2_1" } layer { name: "conv2_2" type: "Convolution" bottom: "conv2_1" top: "conv2_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu2_2" type: "ReLU" bottom: "conv2_2" top: "conv2_2" } layer { name: "pool2" type: "Pooling" bottom: "conv2_2" top: "pool2" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv3_1" type: "Convolution" bottom: "pool2" top: "conv3_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu3_1" type: "ReLU" bottom: "conv3_1" top: "conv3_1" } layer { name: "conv3_2" type: "Convolution" bottom: "conv3_1" top: "conv3_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu3_2" type: "ReLU" bottom: "conv3_2" top: "conv3_2" } layer { name: "conv3_3" type: "Convolution" bottom: "conv3_2" top: "conv3_3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu3_3" type: "ReLU" bottom: "conv3_3" top: "conv3_3" } layer { name: "pool3" type: "Pooling" bottom: "conv3_3" top: "pool3" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv4_1" type: "Convolution" bottom: "pool3" top: "conv4_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu4_1" type: "ReLU" bottom: "conv4_1" top: "conv4_1" } layer { name: "conv4_2" type: "Convolution" bottom: "conv4_1" top: "conv4_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu4_2" type: "ReLU" bottom: "conv4_2" top: "conv4_2" } layer { name: "conv4_3" type: "Convolution" bottom: "conv4_2" top: "conv4_3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu4_3" type: "ReLU" bottom: "conv4_3" top: "conv4_3" } layer { name: "pool4" type: "Pooling" bottom: "conv4_3" top: "pool4" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv5_1" type: "Convolution" bottom: "pool4" top: "conv5_1" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu5_1" type: "ReLU" bottom: "conv5_1" top: "conv5_1" } layer { name: "conv5_2" type: "Convolution" bottom: "conv5_1" top: "conv5_2" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu5_2" type: "ReLU" bottom: "conv5_2" top: "conv5_2" } layer { name: "conv5_3" type: "Convolution" bottom: "conv5_2" top: "conv5_3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu5_3" type: "ReLU" bottom: "conv5_3" top: "conv5_3" } layer { name: "pool5" type: "Pooling" bottom: "conv5_3" top: "pool5" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "fc6_cs" type: "Convolution" bottom: "pool5" top: "fc6_cs" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 4096 pad: 0 kernel_size: 7 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu6_cs" type: "ReLU" bottom: "fc6_cs" top: "fc6_cs" } layer { name: "fc7_cs" type: "Convolution" bottom: "fc6_cs" top: "fc7_cs" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 4096 pad: 0 kernel_size: 1 stride: 1 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu7_cs" type: "ReLU" bottom: "fc7_cs" top: "fc7_cs" } layer { name: "score_fr" type: "Convolution" bottom: "fc7_cs" top: "score_fr" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 20 pad: 0 kernel_size: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "upscore2" type: "Deconvolution" bottom: "score_fr" top: "upscore2" param { lr_mult: 1 } convolution_param { num_output: 20 bias_term: false kernel_size: 4 stride: 2 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "score_pool4" type: "Convolution" bottom: "pool4" top: "score_pool4" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 20 pad: 0 kernel_size: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "score_pool4c" type: "Crop" bottom: "score_pool4" bottom: "upscore2" top: "score_pool4c" crop_param { axis: 2 offset: 5 } } layer { name: "fuse_pool4" type: "Eltwise" bottom: "upscore2" bottom: "score_pool4c" top: "fuse_pool4" eltwise_param { operation: SUM } } layer { name: "upscore_pool4" type: "Deconvolution" bottom: "fuse_pool4" top: "upscore_pool4" param { lr_mult: 1 } convolution_param { num_output: 20 bias_term: false kernel_size: 4 stride: 2 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "score_pool3" type: "Convolution" bottom: "pool3" top: "score_pool3" param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 20 pad: 0 kernel_size: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "score_pool3c" type: "Crop" bottom: "score_pool3" bottom: "upscore_pool4" top: "score_pool3c" crop_param { axis: 2 offset: 9 } } layer { name: "fuse_pool3" type: "Eltwise" bottom: "upscore_pool4" bottom: "score_pool3c" top: "fuse_pool3" eltwise_param { operation: SUM } } layer { name: "upscore8" type: "Deconvolution" bottom: "fuse_pool3" top: "upscore8" param { lr_mult: 1 } convolution_param { num_output: 20 bias_term: false kernel_size: 16 stride: 8 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "score" type: "Crop" bottom: "upscore8" bottom: "data" top: "score" crop_param { axis: 2 offset: 31 } } ================================================ FILE: scripts/eval_cityscapes/cityscapes.py ================================================ # The following code is modified from https://github.com/shelhamer/clockwork-fcn import sys import os import glob import numpy as np from PIL import Image class cityscapes: def __init__(self, data_path): # data_path something like /data2/cityscapes self.dir = data_path self.classes = ['road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle'] self.mean = np.array((72.78044, 83.21195, 73.45286), dtype=np.float32) # import cityscapes label helper and set up label mappings sys.path.insert(0, '{}/scripts/helpers/'.format(self.dir)) labels = __import__('labels') self.id2trainId = {label.id: label.trainId for label in labels.labels} # dictionary mapping from raw IDs to train IDs self.trainId2color = {label.trainId: label.color for label in labels.labels} # dictionary mapping train IDs to colors as 3-tuples def get_dset(self, split): ''' List images as (city, id) for the specified split TODO(shelhamer) generate splits from cityscapes itself, instead of relying on these separately made text files. ''' if split == 'train': dataset = open('{}/ImageSets/segFine/train.txt'.format(self.dir)).read().splitlines() else: dataset = open('{}/ImageSets/segFine/val.txt'.format(self.dir)).read().splitlines() return [(item.split('/')[0], item.split('/')[1]) for item in dataset] def load_image(self, split, city, idx): im = Image.open('{}/leftImg8bit_sequence/{}/{}/{}_leftImg8bit.png'.format(self.dir, split, city, idx)) return im def assign_trainIds(self, label): """ Map the given label IDs to the train IDs appropriate for training Use the label mapping provided in labels.py from the cityscapes scripts """ label = np.array(label, dtype=np.float32) if sys.version_info[0] < 3: for k, v in self.id2trainId.iteritems(): label[label == k] = v else: for k, v in self.id2trainId.items(): label[label == k] = v return label def load_label(self, split, city, idx): """ Load label image as 1 x height x width integer array of label indices. The leading singleton dimension is required by the loss. """ label = Image.open('{}/gtFine/{}/{}/{}_gtFine_labelIds.png'.format(self.dir, split, city, idx)) label = self.assign_trainIds(label) # get proper labels for eval label = np.array(label, dtype=np.uint8) label = label[np.newaxis, ...] return label def preprocess(self, im): """ Preprocess loaded image (by load_image) for Caffe: - cast to float - switch channels RGB -> BGR - subtract mean - transpose to channel x height x width order """ in_ = np.array(im, dtype=np.float32) in_ = in_[:, :, ::-1] in_ -= self.mean in_ = in_.transpose((2, 0, 1)) return in_ def palette(self, label): ''' Map trainIds to colors as specified in labels.py ''' if label.ndim == 3: label= label[0] color = np.empty((label.shape[0], label.shape[1], 3)) if sys.version_info[0] < 3: for k, v in self.trainId2color.iteritems(): color[label == k, :] = v else: for k, v in self.trainId2color.items(): color[label == k, :] = v return color def make_boundaries(label, thickness=None): """ Input is an image label, output is a numpy array mask encoding the boundaries of the objects Extract pixels at the true boundary by dilation - erosion of label. Don't just pick the void label as it is not exclusive to the boundaries. """ assert(thickness is not None) import skimage.morphology as skm void = 255 mask = np.logical_and(label > 0, label != void)[0] selem = skm.disk(thickness) boundaries = np.logical_xor(skm.dilation(mask, selem), skm.erosion(mask, selem)) return boundaries def list_label_frames(self, split): """ Select labeled frames from a split for evaluation collected as (city, shot, idx) tuples """ def file2idx(f): """Helper to convert file path into frame ID""" city, shot, frame = (os.path.basename(f).split('_')[:3]) return "_".join([city, shot, frame]) frames = [] cities = [os.path.basename(f) for f in glob.glob('{}/gtFine/{}/*'.format(self.dir, split))] for c in cities: files = sorted(glob.glob('{}/gtFine/{}/{}/*labelIds.png'.format(self.dir, split, c))) frames.extend([file2idx(f) for f in files]) return frames def collect_frame_sequence(self, split, idx, length): """ Collect sequence of frames preceding (and including) a labeled frame as a list of Images. Note: 19 preceding frames are provided for each labeled frame. """ SEQ_LEN = length city, shot, frame = idx.split('_') frame = int(frame) frame_seq = [] for i in range(frame - SEQ_LEN, frame + 1): frame_path = '{0}/leftImg8bit_sequence/val/{1}/{1}_{2}_{3:0>6d}_leftImg8bit.png'.format( self.dir, city, shot, i) frame_seq.append(Image.open(frame_path)) return frame_seq ================================================ FILE: scripts/eval_cityscapes/download_fcn8s.sh ================================================ URL=http://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/fcn-8s-cityscapes/fcn-8s-cityscapes.caffemodel OUTPUT_FILE=./scripts/eval_cityscapes/caffemodel/fcn-8s-cityscapes.caffemodel wget -N $URL -O $OUTPUT_FILE ================================================ FILE: scripts/eval_cityscapes/evaluate.py ================================================ import os import sys import caffe import argparse import numpy as np import scipy.misc from PIL import Image from util import * from cityscapes import cityscapes parser = argparse.ArgumentParser() parser.add_argument("--cityscapes_dir", type=str, required=True, help="Path to the original cityscapes dataset") parser.add_argument("--result_dir", type=str, required=True, help="Path to the generated images to be evaluated") parser.add_argument("--output_dir", type=str, required=True, help="Where to save the evaluation results") parser.add_argument("--caffemodel_dir", type=str, default='./scripts/eval_cityscapes/caffemodel/', help="Where the FCN-8s caffemodel stored") parser.add_argument("--gpu_id", type=int, default=0, help="Which gpu id to use") parser.add_argument("--split", type=str, default='val', help="Data split to be evaluated") parser.add_argument("--save_output_images", type=int, default=0, help="Whether to save the FCN output images") args = parser.parse_args() def main(): if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) if args.save_output_images > 0: output_image_dir = args.output_dir + 'image_outputs/' if not os.path.isdir(output_image_dir): os.makedirs(output_image_dir) CS = cityscapes(args.cityscapes_dir) n_cl = len(CS.classes) label_frames = CS.list_label_frames(args.split) caffe.set_device(args.gpu_id) caffe.set_mode_gpu() net = caffe.Net(args.caffemodel_dir + '/deploy.prototxt', args.caffemodel_dir + 'fcn-8s-cityscapes.caffemodel', caffe.TEST) hist_perframe = np.zeros((n_cl, n_cl)) for i, idx in enumerate(label_frames): if i % 10 == 0: print('Evaluating: %d/%d' % (i, len(label_frames))) city = idx.split('_')[0] # idx is city_shot_frame label = CS.load_label(args.split, city, idx) im_file = args.result_dir + '/' + idx + '_leftImg8bit.png' im = np.array(Image.open(im_file)) # im = scipy.misc.imresize(im, (256, 256)) im = scipy.misc.imresize(im, (label.shape[1], label.shape[2])) out = segrun(net, CS.preprocess(im)) hist_perframe += fast_hist(label.flatten(), out.flatten(), n_cl) if args.save_output_images > 0: label_im = CS.palette(label) pred_im = CS.palette(out) scipy.misc.imsave(output_image_dir + '/' + str(i) + '_pred.jpg', pred_im) scipy.misc.imsave(output_image_dir + '/' + str(i) + '_gt.jpg', label_im) scipy.misc.imsave(output_image_dir + '/' + str(i) + '_input.jpg', im) mean_pixel_acc, mean_class_acc, mean_class_iou, per_class_acc, per_class_iou = get_scores(hist_perframe) with open(args.output_dir + '/evaluation_results.txt', 'w') as f: f.write('Mean pixel accuracy: %f\n' % mean_pixel_acc) f.write('Mean class accuracy: %f\n' % mean_class_acc) f.write('Mean class IoU: %f\n' % mean_class_iou) f.write('************ Per class numbers below ************\n') for i, cl in enumerate(CS.classes): while len(cl) < 15: cl = cl + ' ' f.write('%s: acc = %f, iou = %f\n' % (cl, per_class_acc[i], per_class_iou[i])) main() ================================================ FILE: scripts/eval_cityscapes/util.py ================================================ # The following code is modified from https://github.com/shelhamer/clockwork-fcn import numpy as np import scipy.io as sio def get_out_scoremap(net): return net.blobs['score'].data[0].argmax(axis=0).astype(np.uint8) def feed_net(net, in_): """ Load prepared input into net. """ net.blobs['data'].reshape(1, *in_.shape) net.blobs['data'].data[...] = in_ def segrun(net, in_): feed_net(net, in_) net.forward() return get_out_scoremap(net) def fast_hist(a, b, n): # print('saving') # sio.savemat('/tmp/fcn_debug/xx.mat', {'a':a, 'b':b, 'n':n}) k = np.where((a >= 0) & (a < n))[0] bc = np.bincount(n * a[k].astype(int) + b[k], minlength=n**2) if len(bc) != n**2: # ignore this example if dimension mismatch return 0 return bc.reshape(n, n) def get_scores(hist): # Mean pixel accuracy acc = np.diag(hist).sum() / (hist.sum() + 1e-12) # Per class accuracy cl_acc = np.diag(hist) / (hist.sum(1) + 1e-12) # Per class IoU iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist) + 1e-12) return acc, np.nanmean(cl_acc), np.nanmean(iu), cl_acc, iu ================================================ FILE: scripts/receptive_field_sizes.m ================================================ % modified from: https://github.com/rbgirshick/rcnn/blob/master/utils/receptive_field_sizes.m % % RCNN LICENSE: % % Copyright (c) 2014, The Regents of the University of California (Regents) % All rights reserved. % % Redistribution and use in source and binary forms, with or without % modification, are permitted provided that the following conditions are met: % % 1. Redistributions of source code must retain the above copyright notice, this % list of conditions and the following disclaimer. % 2. Redistributions in binary form must reproduce the above copyright notice, % this list of conditions and the following disclaimer in the documentation % and/or other materials provided with the distribution. % % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND % ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED % WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE % DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR % ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES % (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; % LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND % ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT % (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. function receptive_field_sizes() % compute input size from a given output size f = @(output_size, ksize, stride) (output_size - 1) * stride + ksize; %% n=1 discriminator % fix the output size to 1 and derive the receptive field in the input out = ... f(f(f(1, 4, 1), ... % conv2 -> conv3 4, 1), ... % conv1 -> conv2 4, 2); % input -> conv1 fprintf('n=1 discriminator receptive field size: %d\n', out); %% n=2 discriminator % fix the output size to 1 and derive the receptive field in the input out = ... f(f(f(f(1, 4, 1), ... % conv3 -> conv4 4, 1), ... % conv2 -> conv3 4, 2), ... % conv1 -> conv2 4, 2); % input -> conv1 fprintf('n=2 discriminator receptive field size: %d\n', out); %% n=3 discriminator % fix the output size to 1 and derive the receptive field in the input out = ... f(f(f(f(f(1, 4, 1), ... % conv4 -> conv5 4, 1), ... % conv3 -> conv4 4, 2), ... % conv2 -> conv3 4, 2), ... % conv1 -> conv2 4, 2); % input -> conv1 fprintf('n=3 discriminator receptive field size: %d\n', out); %% n=4 discriminator % fix the output size to 1 and derive the receptive field in the input out = ... f(f(f(f(f(f(1, 4, 1), ... % conv5 -> conv6 4, 1), ... % conv4 -> conv5 4, 2), ... % conv3 -> conv4 4, 2), ... % conv2 -> conv3 4, 2), ... % conv1 -> conv2 4, 2); % input -> conv1 fprintf('n=4 discriminator receptive field size: %d\n', out); %% n=5 discriminator % fix the output size to 1 and derive the receptive field in the input out = ... f(f(f(f(f(f(f(1, 4, 1), ... % conv6 -> conv7 4, 1), ... % conv5 -> conv6 4, 2), ... % conv4 -> conv5 4, 2), ... % conv3 -> conv4 4, 2), ... % conv2 -> conv3 4, 2), ... % conv1 -> conv2 4, 2); % input -> conv1 fprintf('n=5 discriminator receptive field size: %d\n', out); ================================================ FILE: test.lua ================================================ -- usage: DATA_ROOT=/path/to/data/ name=expt1 which_direction=BtoA th test.lua -- -- code derived from https://github.com/soumith/dcgan.torch -- require 'image' require 'nn' require 'nngraph' util = paths.dofile('util/util.lua') torch.setdefaulttensortype('torch.FloatTensor') opt = { DATA_ROOT = '', -- path to images (should have subfolders 'train', 'val', etc) batchSize = 1, -- # images in batch loadSize = 256, -- scale images to this size fineSize = 256, -- then crop to this size flip=0, -- horizontal mirroring data augmentation display = 1, -- display samples while training. 0 = false display_id = 200, -- display window id. gpu = 1, -- gpu = 0 is CPU mode. gpu=X is GPU mode on GPU X how_many = 'all', -- how many test images to run (set to all to run on every image found in the data/phase folder) which_direction = 'AtoB', -- AtoB or BtoA phase = 'val', -- train, val, test ,etc preprocess = 'regular', -- for special purpose preprocessing, e.g., for colorization, change this (selects preprocessing functions in util.lua) aspect_ratio = 1.0, -- aspect ratio of result images name = '', -- name of experiment, selects which model to run, should generally should be passed on command line input_nc = 3, -- # of input image channels output_nc = 3, -- # of output image channels serial_batches = 1, -- if 1, takes images in order to make batches, otherwise takes them randomly serial_batch_iter = 1, -- iter into serial image list cudnn = 1, -- set to 0 to not use cudnn (untested) checkpoints_dir = './checkpoints', -- loads models from here results_dir='./results/', -- saves results here which_epoch = 'latest', -- which epoch to test? set to 'latest' to use latest cached model } -- one-line argument parser. parses enviroment variables to override the defaults for k,v in pairs(opt) do opt[k] = tonumber(os.getenv(k)) or os.getenv(k) or opt[k] end opt.nThreads = 1 -- test only works with 1 thread... print(opt) if opt.display == 0 then opt.display = false end opt.manualSeed = torch.random(1, 10000) -- set seed print("Random Seed: " .. opt.manualSeed) torch.manualSeed(opt.manualSeed) torch.setdefaulttensortype('torch.FloatTensor') opt.netG_name = opt.name .. '/' .. opt.which_epoch .. '_net_G' local data_loader = paths.dofile('data/data.lua') print('#threads...' .. opt.nThreads) local data = data_loader.new(opt.nThreads, opt) print("Dataset Size: ", data:size()) -- translation direction local idx_A = nil local idx_B = nil local input_nc = opt.input_nc local output_nc = opt.output_nc if opt.which_direction=='AtoB' then idx_A = {1, input_nc} idx_B = {input_nc+1, input_nc+output_nc} elseif opt.which_direction=='BtoA' then idx_A = {input_nc+1, input_nc+output_nc} idx_B = {1, input_nc} else error(string.format('bad direction %s',opt.which_direction)) end ---------------------------------------------------------------------------- local input = torch.FloatTensor(opt.batchSize,3,opt.fineSize,opt.fineSize) local target = torch.FloatTensor(opt.batchSize,3,opt.fineSize,opt.fineSize) print('checkpoints_dir', opt.checkpoints_dir) local netG = util.load(paths.concat(opt.checkpoints_dir, opt.netG_name .. '.t7'), opt) --netG:evaluate() print(netG) function TableConcat(t1,t2) for i=1,#t2 do t1[#t1+1] = t2[i] end return t1 end if opt.how_many=='all' then opt.how_many=data:size() end opt.how_many=math.min(opt.how_many, data:size()) local filepaths = {} -- paths to images tested on for n=1,math.floor(opt.how_many/opt.batchSize) do print('processing batch ' .. n) local data_curr, filepaths_curr = data:getBatch() filepaths_curr = util.basename_batch(filepaths_curr) print('filepaths_curr: ', filepaths_curr) input = data_curr[{ {}, idx_A, {}, {} }] target = data_curr[{ {}, idx_B, {}, {} }] if opt.gpu > 0 then input = input:cuda() end if opt.preprocess == 'colorization' then local output_AB = netG:forward(input):float() local input_L = input:float() output = util.deprocessLAB_batch(input_L, output_AB) local target_AB = target:float() target = util.deprocessLAB_batch(input_L, target_AB) input = util.deprocessL_batch(input_L) else output = util.deprocess_batch(netG:forward(input)) input = util.deprocess_batch(input):float() output = output:float() target = util.deprocess_batch(target):float() end paths.mkdir(paths.concat(opt.results_dir, opt.netG_name .. '_' .. opt.phase)) local image_dir = paths.concat(opt.results_dir, opt.netG_name .. '_' .. opt.phase, 'images') paths.mkdir(image_dir) paths.mkdir(paths.concat(image_dir,'input')) paths.mkdir(paths.concat(image_dir,'output')) paths.mkdir(paths.concat(image_dir,'target')) for i=1, opt.batchSize do image.save(paths.concat(image_dir,'input',filepaths_curr[i]), image.scale(input[i],input[i]:size(2),input[i]:size(3)/opt.aspect_ratio)) image.save(paths.concat(image_dir,'output',filepaths_curr[i]), image.scale(output[i],output[i]:size(2),output[i]:size(3)/opt.aspect_ratio)) image.save(paths.concat(image_dir,'target',filepaths_curr[i]), image.scale(target[i],target[i]:size(2),target[i]:size(3)/opt.aspect_ratio)) end print('Saved images to: ', image_dir) if opt.display then if opt.preprocess == 'regular' then disp = require 'display' disp.image(util.scaleBatch(input,100,100),{win=opt.display_id, title='input'}) disp.image(util.scaleBatch(output,100,100),{win=opt.display_id+1, title='output'}) disp.image(util.scaleBatch(target,100,100),{win=opt.display_id+2, title='target'}) print('Displayed images') end end filepaths = TableConcat(filepaths, filepaths_curr) end -- make webpage io.output(paths.concat(opt.results_dir,opt.netG_name .. '_' .. opt.phase, 'index.html')) io.write('') io.write('') for i=1, #filepaths do io.write('') io.write('') io.write('') io.write('') io.write('') io.write('') end io.write('
Image #InputOutputGround Truth
' .. filepaths[i] .. '
') ================================================ FILE: train.lua ================================================ -- usage example: DATA_ROOT=/path/to/data/ which_direction=BtoA name=expt1 th train.lua -- -- code derived from https://github.com/soumith/dcgan.torch -- require 'torch' require 'nn' require 'optim' util = paths.dofile('util/util.lua') require 'image' require 'models' opt = { DATA_ROOT = '', -- path to images (should have subfolders 'train', 'val', etc) batchSize = 1, -- # images in batch loadSize = 286, -- scale images to this size fineSize = 256, -- then crop to this size ngf = 64, -- # of gen filters in first conv layer ndf = 64, -- # of discrim filters in first conv layer input_nc = 3, -- # of input image channels output_nc = 3, -- # of output image channels niter = 200, -- # of iter at starting learning rate lr = 0.0002, -- initial learning rate for adam beta1 = 0.5, -- momentum term of adam ntrain = math.huge, -- # of examples per epoch. math.huge for full dataset flip = 1, -- if flip the images for data argumentation display = 1, -- display samples while training. 0 = false display_id = 10, -- display window id. display_plot = 'errL1', -- which loss values to plot over time. Accepted values include a comma seperated list of: errL1, errG, and errD gpu = 1, -- gpu = 0 is CPU mode. gpu=X is GPU mode on GPU X name = '', -- name of the experiment, should generally be passed on the command line which_direction = 'AtoB', -- AtoB or BtoA phase = 'train', -- train, val, test, etc preprocess = 'regular', -- for special purpose preprocessing, e.g., for colorization, change this (selects preprocessing functions in util.lua) nThreads = 2, -- # threads for loading data save_epoch_freq = 50, -- save a model every save_epoch_freq epochs (does not overwrite previously saved models) save_latest_freq = 5000, -- save the latest model every latest_freq sgd iterations (overwrites the previous latest model) print_freq = 50, -- print the debug information every print_freq iterations display_freq = 100, -- display the current results every display_freq iterations save_display_freq = 5000, -- save the current display of results every save_display_freq_iterations continue_train=0, -- if continue training, load the latest model: 1: true, 0: false serial_batches = 0, -- if 1, takes images in order to make batches, otherwise takes them randomly serial_batch_iter = 1, -- iter into serial image list checkpoints_dir = './checkpoints', -- models are saved here cudnn = 1, -- set to 0 to not use cudnn condition_GAN = 1, -- set to 0 to use unconditional discriminator use_GAN = 1, -- set to 0 to turn off GAN term use_L1 = 1, -- set to 0 to turn off L1 term which_model_netD = 'basic', -- selects model to use for netD which_model_netG = 'unet', -- selects model to use for netG n_layers_D = 0, -- only used if which_model_netD=='n_layers' lambda = 100, -- weight on L1 term in objective } -- one-line argument parser. parses enviroment variables to override the defaults for k,v in pairs(opt) do opt[k] = tonumber(os.getenv(k)) or os.getenv(k) or opt[k] end print(opt) local input_nc = opt.input_nc local output_nc = opt.output_nc -- translation direction local idx_A = nil local idx_B = nil if opt.which_direction=='AtoB' then idx_A = {1, input_nc} idx_B = {input_nc+1, input_nc+output_nc} elseif opt.which_direction=='BtoA' then idx_A = {input_nc+1, input_nc+output_nc} idx_B = {1, input_nc} else error(string.format('bad direction %s',opt.which_direction)) end if opt.display == 0 then opt.display = false end opt.manualSeed = torch.random(1, 10000) -- fix seed print("Random Seed: " .. opt.manualSeed) torch.manualSeed(opt.manualSeed) torch.setdefaulttensortype('torch.FloatTensor') -- create data loader local data_loader = paths.dofile('data/data.lua') print('#threads...' .. opt.nThreads) local data = data_loader.new(opt.nThreads, opt) print("Dataset Size: ", data:size()) ---------------------------------------------------------------------------- local function weights_init(m) local name = torch.type(m) if name:find('Convolution') then m.weight:normal(0.0, 0.02) m.bias:fill(0) elseif name:find('BatchNormalization') then if m.weight then m.weight:normal(1.0, 0.02) end if m.bias then m.bias:fill(0) end end end local ndf = opt.ndf local ngf = opt.ngf local real_label = 1 local fake_label = 0 function defineG(input_nc, output_nc, ngf) local netG = nil if opt.which_model_netG == "encoder_decoder" then netG = defineG_encoder_decoder(input_nc, output_nc, ngf) elseif opt.which_model_netG == "unet" then netG = defineG_unet(input_nc, output_nc, ngf) elseif opt.which_model_netG == "unet_128" then netG = defineG_unet_128(input_nc, output_nc, ngf) else error("unsupported netG model") end netG:apply(weights_init) return netG end function defineD(input_nc, output_nc, ndf) local netD = nil if opt.condition_GAN==1 then input_nc_tmp = input_nc else input_nc_tmp = 0 -- only penalizes structure in output channels end if opt.which_model_netD == "basic" then netD = defineD_basic(input_nc_tmp, output_nc, ndf) elseif opt.which_model_netD == "n_layers" then netD = defineD_n_layers(input_nc_tmp, output_nc, ndf, opt.n_layers_D) else error("unsupported netD model") end netD:apply(weights_init) return netD end -- load saved models and finetune if opt.continue_train == 1 then print('loading previously trained netG...') netG = util.load(paths.concat(opt.checkpoints_dir, opt.name, 'latest_net_G.t7'), opt) print('loading previously trained netD...') netD = util.load(paths.concat(opt.checkpoints_dir, opt.name, 'latest_net_D.t7'), opt) else print('define model netG...') netG = defineG(input_nc, output_nc, ngf) print('define model netD...') netD = defineD(input_nc, output_nc, ndf) end print(netG) print(netD) local criterion = nn.BCECriterion() local criterionAE = nn.AbsCriterion() --------------------------------------------------------------------------- optimStateG = { learningRate = opt.lr, beta1 = opt.beta1, } optimStateD = { learningRate = opt.lr, beta1 = opt.beta1, } ---------------------------------------------------------------------------- local real_A = torch.Tensor(opt.batchSize, input_nc, opt.fineSize, opt.fineSize) local real_B = torch.Tensor(opt.batchSize, output_nc, opt.fineSize, opt.fineSize) local fake_B = torch.Tensor(opt.batchSize, output_nc, opt.fineSize, opt.fineSize) local real_AB = torch.Tensor(opt.batchSize, output_nc + input_nc*opt.condition_GAN, opt.fineSize, opt.fineSize) local fake_AB = torch.Tensor(opt.batchSize, output_nc + input_nc*opt.condition_GAN, opt.fineSize, opt.fineSize) local errD, errG, errL1 = 0, 0, 0 local epoch_tm = torch.Timer() local tm = torch.Timer() local data_tm = torch.Timer() ---------------------------------------------------------------------------- if opt.gpu > 0 then print('transferring to gpu...') require 'cunn' cutorch.setDevice(opt.gpu) real_A = real_A:cuda(); real_B = real_B:cuda(); fake_B = fake_B:cuda(); real_AB = real_AB:cuda(); fake_AB = fake_AB:cuda(); if opt.cudnn==1 then netG = util.cudnn(netG); netD = util.cudnn(netD); end netD:cuda(); netG:cuda(); criterion:cuda(); criterionAE:cuda(); print('done') else print('running model on CPU') end local parametersD, gradParametersD = netD:getParameters() local parametersG, gradParametersG = netG:getParameters() if opt.display then disp = require 'display' end function createRealFake() -- load real data_tm:reset(); data_tm:resume() local real_data, data_path = data:getBatch() data_tm:stop() real_A:copy(real_data[{ {}, idx_A, {}, {} }]) real_B:copy(real_data[{ {}, idx_B, {}, {} }]) if opt.condition_GAN==1 then real_AB = torch.cat(real_A,real_B,2) else real_AB = real_B -- unconditional GAN, only penalizes structure in B end -- create fake fake_B = netG:forward(real_A) if opt.condition_GAN==1 then fake_AB = torch.cat(real_A,fake_B,2) else fake_AB = fake_B -- unconditional GAN, only penalizes structure in B end end -- create closure to evaluate f(X) and df/dX of discriminator local fDx = function(x) netD:apply(function(m) if torch.type(m):find('Convolution') then m.bias:zero() end end) netG:apply(function(m) if torch.type(m):find('Convolution') then m.bias:zero() end end) gradParametersD:zero() -- Real local output = netD:forward(real_AB) local label = torch.FloatTensor(output:size()):fill(real_label) if opt.gpu>0 then label = label:cuda() end local errD_real = criterion:forward(output, label) local df_do = criterion:backward(output, label) netD:backward(real_AB, df_do) -- Fake local output = netD:forward(fake_AB) label:fill(fake_label) local errD_fake = criterion:forward(output, label) local df_do = criterion:backward(output, label) netD:backward(fake_AB, df_do) errD = (errD_real + errD_fake)/2 return errD, gradParametersD end -- create closure to evaluate f(X) and df/dX of generator local fGx = function(x) netD:apply(function(m) if torch.type(m):find('Convolution') then m.bias:zero() end end) netG:apply(function(m) if torch.type(m):find('Convolution') then m.bias:zero() end end) gradParametersG:zero() -- GAN loss local df_dg = torch.zeros(fake_B:size()) if opt.gpu>0 then df_dg = df_dg:cuda(); end if opt.use_GAN==1 then local output = netD.output -- netD:forward{input_A,input_B} was already executed in fDx, so save computation local label = torch.FloatTensor(output:size()):fill(real_label) -- fake labels are real for generator cost if opt.gpu>0 then label = label:cuda(); end errG = criterion:forward(output, label) local df_do = criterion:backward(output, label) df_dg = netD:updateGradInput(fake_AB, df_do):narrow(2,fake_AB:size(2)-output_nc+1, output_nc) else errG = 0 end -- unary loss local df_do_AE = torch.zeros(fake_B:size()) if opt.gpu>0 then df_do_AE = df_do_AE:cuda(); end if opt.use_L1==1 then errL1 = criterionAE:forward(fake_B, real_B) df_do_AE = criterionAE:backward(fake_B, real_B) else errL1 = 0 end netG:backward(real_A, df_dg + df_do_AE:mul(opt.lambda)) return errG, gradParametersG end -- train local best_err = nil paths.mkdir(opt.checkpoints_dir) paths.mkdir(opt.checkpoints_dir .. '/' .. opt.name) -- save opt file = torch.DiskFile(paths.concat(opt.checkpoints_dir, opt.name, 'opt.txt'), 'w') file:writeObject(opt) file:close() -- parse diplay_plot string into table opt.display_plot = string.split(string.gsub(opt.display_plot, "%s+", ""), ",") for k, v in ipairs(opt.display_plot) do if not util.containsValue({"errG", "errD", "errL1"}, v) then error(string.format('bad display_plot value "%s"', v)) end end -- display plot config local plot_config = { title = "Loss over time", labels = {"epoch", unpack(opt.display_plot)}, ylabel = "loss", } -- display plot vars local plot_data = {} local plot_win local counter = 0 for epoch = 1, opt.niter do epoch_tm:reset() for i = 1, math.min(data:size(), opt.ntrain), opt.batchSize do tm:reset() -- load a batch and run G on that batch createRealFake() -- (1) Update D network: maximize log(D(x,y)) + log(1 - D(x,G(x))) if opt.use_GAN==1 then optim.adam(fDx, parametersD, optimStateD) end -- (2) Update G network: maximize log(D(x,G(x))) + L1(y,G(x)) optim.adam(fGx, parametersG, optimStateG) -- display counter = counter + 1 if counter % opt.display_freq == 0 and opt.display then createRealFake() if opt.preprocess == 'colorization' then local real_A_s = util.scaleBatch(real_A:float(),100,100) local fake_B_s = util.scaleBatch(fake_B:float(),100,100) local real_B_s = util.scaleBatch(real_B:float(),100,100) disp.image(util.deprocessL_batch(real_A_s), {win=opt.display_id, title=opt.name .. ' input'}) disp.image(util.deprocessLAB_batch(real_A_s, fake_B_s), {win=opt.display_id+1, title=opt.name .. ' output'}) disp.image(util.deprocessLAB_batch(real_A_s, real_B_s), {win=opt.display_id+2, title=opt.name .. ' target'}) else disp.image(util.deprocess_batch(util.scaleBatch(real_A:float(),100,100)), {win=opt.display_id, title=opt.name .. ' input'}) disp.image(util.deprocess_batch(util.scaleBatch(fake_B:float(),100,100)), {win=opt.display_id+1, title=opt.name .. ' output'}) disp.image(util.deprocess_batch(util.scaleBatch(real_B:float(),100,100)), {win=opt.display_id+2, title=opt.name .. ' target'}) end end -- write display visualization to disk -- runs on the first batchSize images in the opt.phase set if counter % opt.save_display_freq == 0 and opt.display then local serial_batches=opt.serial_batches opt.serial_batches=1 opt.serial_batch_iter=1 local image_out = nil local N_save_display = 10 local N_save_iter = torch.max(torch.Tensor({1, torch.floor(N_save_display/opt.batchSize)})) for i3=1, N_save_iter do createRealFake() print('save to the disk') if opt.preprocess == 'colorization' then for i2=1, fake_B:size(1) do if image_out==nil then image_out = torch.cat(util.deprocessL(real_A[i2]:float()),util.deprocessLAB(real_A[i2]:float(), fake_B[i2]:float()),3)/255.0 else image_out = torch.cat(image_out, torch.cat(util.deprocessL(real_A[i2]:float()),util.deprocessLAB(real_A[i2]:float(), fake_B[i2]:float()),3)/255.0, 2) end end else for i2=1, fake_B:size(1) do if image_out==nil then image_out = torch.cat(util.deprocess(real_A[i2]:float()),util.deprocess(fake_B[i2]:float()),3) else image_out = torch.cat(image_out, torch.cat(util.deprocess(real_A[i2]:float()),util.deprocess(fake_B[i2]:float()),3), 2) end end end end image.save(paths.concat(opt.checkpoints_dir, opt.name , counter .. '_train_res.png'), image_out) opt.serial_batches=serial_batches end -- logging and display plot if counter % opt.print_freq == 0 then local loss = {errG=errG and errG or -1, errD=errD and errD or -1, errL1=errL1 and errL1 or -1} local curItInBatch = ((i-1) / opt.batchSize) local totalItInBatch = math.floor(math.min(data:size(), opt.ntrain) / opt.batchSize) print(('Epoch: [%d][%8d / %8d]\t Time: %.3f DataTime: %.3f ' .. ' Err_G: %.4f Err_D: %.4f ErrL1: %.4f'):format( epoch, curItInBatch, totalItInBatch, tm:time().real / opt.batchSize, data_tm:time().real / opt.batchSize, errG, errD, errL1)) local plot_vals = { epoch + curItInBatch / totalItInBatch } for k, v in ipairs(opt.display_plot) do if loss[v] ~= nil then plot_vals[#plot_vals + 1] = loss[v] end end -- update display plot if opt.display then table.insert(plot_data, plot_vals) plot_config.win = plot_win plot_win = disp.plot(plot_data, plot_config) end end -- save latest model if counter % opt.save_latest_freq == 0 then print(('saving the latest model (epoch %d, iters %d)'):format(epoch, counter)) torch.save(paths.concat(opt.checkpoints_dir, opt.name, 'latest_net_G.t7'), netG:clearState()) torch.save(paths.concat(opt.checkpoints_dir, opt.name, 'latest_net_D.t7'), netD:clearState()) end end parametersD, gradParametersD = nil, nil -- nil them to avoid spiking memory parametersG, gradParametersG = nil, nil if epoch % opt.save_epoch_freq == 0 then torch.save(paths.concat(opt.checkpoints_dir, opt.name, epoch .. '_net_G.t7'), netG:clearState()) torch.save(paths.concat(opt.checkpoints_dir, opt.name, epoch .. '_net_D.t7'), netD:clearState()) end print(('End of epoch %d / %d \t Time Taken: %.3f'):format( epoch, opt.niter, epoch_tm:time().real)) parametersD, gradParametersD = netD:getParameters() -- reflatten the params and get them parametersG, gradParametersG = netG:getParameters() end ================================================ FILE: util/cudnn_convert_custom.lua ================================================ -- modified from https://github.com/NVIDIA/torch-cudnn/blob/master/convert.lua -- removed error on nngraph -- modules that can be converted to nn seamlessly local layer_list = { 'BatchNormalization', 'SpatialBatchNormalization', 'SpatialConvolution', 'SpatialCrossMapLRN', 'SpatialFullConvolution', 'SpatialMaxPooling', 'SpatialAveragePooling', 'ReLU', 'Tanh', 'Sigmoid', 'SoftMax', 'LogSoftMax', 'VolumetricBatchNormalization', 'VolumetricConvolution', 'VolumetricFullConvolution', 'VolumetricMaxPooling', 'VolumetricAveragePooling', } -- goes over a given net and converts all layers to dst backend -- for example: net = cudnn_convert_custom(net, cudnn) -- same as cudnn.convert with gModule check commented out function cudnn_convert_custom(net, dst, exclusion_fn) return net:replace(function(x) --if torch.type(x) == 'nn.gModule' then -- io.stderr:write('Warning: cudnn.convert does not work with nngraph yet. Ignoring nn.gModule') -- return x --end local y = 0 local src = dst == nn and cudnn or nn local src_prefix = src == nn and 'nn.' or 'cudnn.' local dst_prefix = dst == nn and 'nn.' or 'cudnn.' local function convert(v) local y = {} torch.setmetatable(y, dst_prefix..v) if v == 'ReLU' then y = dst.ReLU() end -- because parameters for k,u in pairs(x) do y[k] = u end if src == cudnn and x.clearDesc then x.clearDesc(y) end if src == cudnn and v == 'SpatialAveragePooling' then y.divide = true y.count_include_pad = v.mode == 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING' end if src == nn and string.find(v, 'Convolution') then y.groups = 1 end return y end if exclusion_fn and exclusion_fn(x) then return x end local t = torch.typename(x) if t == 'nn.SpatialConvolutionMM' then y = convert('SpatialConvolution') elseif t == 'inn.SpatialCrossResponseNormalization' then y = convert('SpatialCrossMapLRN') else for i,v in ipairs(layer_list) do if torch.typename(x) == src_prefix..v then y = convert(v) end end end return y == 0 and x or y end) end ================================================ FILE: util/util.lua ================================================ -- -- code derived from https://github.com/soumith/dcgan.torch -- local util = {} require 'torch' function util.normalize(img) -- rescale image to 0 .. 1 local min = img:min() local max = img:max() img = torch.FloatTensor(img:size()):copy(img) img:add(-min):mul(1/(max-min)) return img end function util.normalizeBatch(batch) for i = 1, batch:size(1) do batch[i] = util.normalize(batch[i]:squeeze()) end return batch end function util.basename_batch(batch) for i = 1, #batch do batch[i] = paths.basename(batch[i]) end return batch end -- default preprocessing -- -- Preprocesses an image before passing it to a net -- Converts from RGB to BGR and rescales from [0,1] to [-1,1] function util.preprocess(img) -- RGB to BGR local perm = torch.LongTensor{3, 2, 1} img = img:index(1, perm) -- [0,1] to [-1,1] img = img:mul(2):add(-1) -- check that input is in expected range assert(img:max()<=1,"badly scaled inputs") assert(img:min()>=-1,"badly scaled inputs") return img end -- Undo the above preprocessing. function util.deprocess(img) -- BGR to RGB local perm = torch.LongTensor{3, 2, 1} img = img:index(1, perm) -- [-1,1] to [0,1] img = img:add(1):div(2) return img end function util.preprocess_batch(batch) for i = 1, batch:size(1) do batch[i] = util.preprocess(batch[i]:squeeze()) end return batch end function util.deprocess_batch(batch) for i = 1, batch:size(1) do batch[i] = util.deprocess(batch[i]:squeeze()) end return batch end -- preprocessing specific to colorization function util.deprocessLAB(L, AB) local L2 = torch.Tensor(L:size()):copy(L) if L2:dim() == 3 then L2 = L2[{1, {}, {} }] end local AB2 = torch.Tensor(AB:size()):copy(AB) AB2 = torch.clamp(AB2, -1.0, 1.0) -- local AB2 = AB L2 = L2:add(1):mul(50.0) AB2 = AB2:mul(110.0) L2 = L2:reshape(1, L2:size(1), L2:size(2)) im_lab = torch.cat(L2, AB2, 1) im_rgb = torch.clamp(image.lab2rgb(im_lab):mul(255.0), 0.0, 255.0)/255.0 return im_rgb end function util.deprocessL(L) local L2 = torch.Tensor(L:size()):copy(L) L2 = L2:add(1):mul(255.0/2.0) if L2:dim()==2 then L2 = L2:reshape(1,L2:size(1),L2:size(2)) end L2 = L2:repeatTensor(L2,3,1,1)/255.0 return L2 end function util.deprocessL_batch(batch) local batch_new = {} for i = 1, batch:size(1) do batch_new[i] = util.deprocessL(batch[i]:squeeze()) end return batch_new end function util.deprocessLAB_batch(batchL, batchAB) local batch = {} for i = 1, batchL:size(1) do batch[i] = util.deprocessLAB(batchL[i]:squeeze(), batchAB[i]:squeeze()) end return batch end function util.scaleBatch(batch,s1,s2) local scaled_batch = torch.Tensor(batch:size(1),batch:size(2),s1,s2) for i = 1, batch:size(1) do scaled_batch[i] = image.scale(batch[i],s1,s2):squeeze() end return scaled_batch end function util.toTrivialBatch(input) return input:reshape(1,input:size(1),input:size(2),input:size(3)) end function util.fromTrivialBatch(input) return input[1] end function util.scaleImage(input, loadSize) -- replicate bw images to 3 channels if input:size(1)==1 then input = torch.repeatTensor(input,3,1,1) end input = image.scale(input, loadSize, loadSize) return input end function util.getAspectRatio(path) local input = image.load(path, 3, 'float') local ar = input:size(3)/input:size(2) return ar end function util.loadImage(path, loadSize, nc) local input = image.load(path, 3, 'float') input= util.preprocess(util.scaleImage(input, loadSize)) if nc == 1 then input = input[{{1}, {}, {}}] end return input end -- TO DO: loading code is rather hacky; clean it up and make sure it works on all types of nets / cpu/gpu configurations function util.load(filename, opt) if opt.cudnn>0 then require 'cudnn' end if opt.gpu > 0 then require 'cunn' end local net = torch.load(filename) if opt.gpu > 0 then net:cuda() -- calling cuda on cudnn saved nngraphs doesn't change all variables to cuda, so do it below if net.forwardnodes then for i=1,#net.forwardnodes do if net.forwardnodes[i].data.module then net.forwardnodes[i].data.module:cuda() end end end else net:float() end net:apply(function(m) if m.weight then m.gradWeight = m.weight:clone():zero(); m.gradBias = m.bias:clone():zero(); end end) return net end function util.cudnn(net) require 'cudnn' require 'util/cudnn_convert_custom' return cudnn_convert_custom(net, cudnn) end function util.containsValue(table, value) for k, v in pairs(table) do if v == value then return true end end return false end return util