Repository: hbilen/WSDDN Branch: master Commit: bfdaa3f9ffed Files: 21 Total size: 67.3 KB Directory structure: gitextract_kzv54tvv/ ├── .gitignore ├── README.md ├── core/ │ ├── wsddn_demo.m │ ├── wsddn_get_batch.m │ ├── wsddn_init.m │ ├── wsddn_test.m │ └── wsddn_train.m ├── matlab/ │ └── +dagnn/ │ ├── BiasSamples.m │ ├── LayerAP.m │ ├── LossTopBoxSmoothProb.m │ ├── SoftMax2.m │ ├── SumOverDim.m │ └── Times.m ├── pascal/ │ ├── nms.m │ ├── setup_voc07_eb.m │ ├── setup_voc07_ssw.m │ ├── wsddnVOCap.m │ ├── wsddnVOCevaldet.m │ ├── wsddnVOChash_init.m │ └── wsddnVOChash_lookup.m └── setup_WSDDN.m ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *~ ================================================ FILE: README.md ================================================ # Weakly Supervised Deep Detection Networks (WSDDN) ## Installation 1. Download and install [MatConvNet](http://www.vlfeat.org/matconvnet/install/) 2. Install this module with the package manager of MatConvNet [`vl_contrib`](http://www.vlfeat.org/matconvnet/mfiles/vl_contrib/#notes): ``` vl_contrib('install', 'WSDDN') ; vl_contrib('setup', 'WSDDN') ; ``` 3. If you want to train a WSDDN model, `wsddn_train` will automatically download the items below: a. [PASCAL VOC 2007 devkit and dataset](http://host.robots.ox.ac.uk/pascal/VOC/) under `data` folder b. Pre-computed edge-boxes for [trainval](http://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/EdgeBoxesVOC2007trainval.mat) and [test](http://groups.inf.ed.ac.uk/hbilen-data/WSDDN/EdgeBoxesVOC2007test.mat) splits: c. Pre-trained network from [MatConvNet website](http://www.vlfeat.org/matconvnet/models) 4. You can also download the pre-trained WSDDN model ([VGGF-EB-BoxSc-SpReg](http://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/wsddn.mat)). Note that it gives slightly different performance reported than in the paper (34.4% mAP instead of 34.5% mAP) ## Demo After completing the installation and downloading the required files, you are ready for the demo ```matlab cd scripts; opts.modelPath = '....' ; opts.imdbPath = '....' ; opts.gpu = .... ; wsddn_demo(opts) ; ``` ## Test ```matlab addpath scripts; opts.modelPath = '....' ; opts.imdbPath = '....' ; opts.gpu = .... ; opts.vis = true ; % visualize wsddn_test(opts) ; ``` ## Train Download an ImageNet pre-trained model from [http://www.vlfeat.org/matconvnet/pretrained/](http://www.vlfeat.org/matconvnet/pretrained/) ```matlab addpath scripts; opts.modelPath = '....' ; opts.imdbPath = '....' ; opts.train.gpus = .... ; [net,info] = wsddn_train(opts) ; ``` ## Citing WSDDN If you find the code useful, please cite: ```latex @inproceedings{Bilen16, author = "Bilen, H. and Vedaldi, A.", title = "Weakly Supervised Deep Detection Networks", booktitle = "Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition", year = "2016" } ``` ## Acknowledgement Many thanks to Sam Albanie for his help with contrib package manager and other nameless heros who diligently found my bugs. ### License The analysis work performed with the program(s) must be non-proprietary work. Licensee and its contract users must be or be affiliated with an academic facility. Licensee may additionally permit individuals who are students at such academic facility to access and use the program(s). Such students will be considered contract users of licensee. The program(s) may not be used for commercial competitive analysis (such as benchmarking) or for any commercial activity, including consulting. ================================================ FILE: core/wsddn_demo.m ================================================ function wsddn_demo(varargin) % @author: Hakan Bilen % wsddn_demo : this script shows a detection demo opts.dataDir = fullfile(vl_rootnn, 'data') ; opts.expDir = fullfile(vl_rootnn, 'exp') ; opts.imdbPath = fullfile(vl_rootnn, 'data', 'imdbs', 'imdb-eb.mat'); opts.modelPath = fullfile(vl_rootnn, 'exp', 'net.mat') ; opts.proposalType = 'eb' ; opts.proposalDir = fullfile(vl_rootnn, 'data','EdgeBoxes') ; % if you have limited gpu memory (<6gb), you can change the next 2 params opts.maxNumProposals = inf; % limit number % opts.imageScales = [480,576,688,864,1200]; % scales opts.imageScales = [480,576,688,864,1200]; % scales opts.gpu = [] ; opts.train.prefetch = true ; opts.numFetchThreads = 1 ; opts = vl_argparse(opts, varargin) ; display(opts); if ~exist(fullfile(opts.dataDir,'VOCdevkit','VOCcode','VOCinit.m'),'file') error('VOCdevkit is not installed'); end addpath(fullfile(opts.dataDir,'VOCdevkit','VOCcode')); opts.train.expDir = opts.expDir ; % ------------------------------------------------------------------------- % Network initialization % ------------------------------------------------------------------------- if ~exist(opts.modelPath, 'file') url = 'http://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/wsddn.mat' ; fprintf('Downloading %s to %s\n', url, opts.modelPath) ; urlwrite(url, opts.modelPath) ; end net = load(opts.modelPath); net = dagnn.DagNN.loadobj(net) ; net.mode = 'test' ; if ~isempty(opts.gpu) gpuDevice(opts.gpu) ; net.move('gpu') ; end if isfield(net,'normalization') bopts = net.normalization; else bopts = net.meta.normalization; end bopts.rgbVariance = [] ; bopts.interpolation = net.meta.normalization.interpolation; bopts.jitterBrightness = 0 ; bopts.imageScales = opts.imageScales; bopts.numThreads = opts.numFetchThreads; bs = find(arrayfun(@(a) isa(a.block, 'dagnn.BiasSamples'), net.layers)==1); bopts.addBiasSamples = ~isempty(bs) ; bopts.vgg16 = any(arrayfun(@(a) strcmp(a.name, 'relu5_1'), net.layers)==1) ; % ------------------------------------------------------------------------- % Database initialization % ------------------------------------------------------------------------- fprintf('loading imdb...'); if exist(opts.imdbPath,'file')==2 imdb = load(opts.imdbPath) ; else imdb = setup_voc07_eb('dataDir',opts.dataDir, ... 'proposalDir',opts.proposalDir,'loadTest',1); save(opts.imdbPath,'-struct', 'imdb', '-v7.3'); end fprintf('done\n'); minSize = 20; imdb = fixBBoxes(imdb, minSize, opts.maxNumProposals); % -------------------------------------------------------------------- % Detect % -------------------------------------------------------------------- % query images testIdx = [12,15]; VOCinit; cats = VOCopts.classes; ovTh = 0.4; % nms threshold scTh = 0.1; % det confidence threshold bopts.useGpu = numel(opts.gpu) > 0 ; detLayer = find(arrayfun(@(a) strcmp(a.name, 'xTimes'), net.vars)==1); net.vars(detLayer(1)).precious = 1; % run detection rcolors = randi(255,3,numel(cats)); for t=1:numel(testIdx) batch = testIdx(t); scoret = []; for s=1:numel(opts.imageScales) for f=1:2 % add flips inputs = getBatch(bopts, imdb, batch, opts.imageScales(s), f-1 ); net.eval(inputs) ; if isempty(scoret) scoret = squeeze(gather(net.vars(detLayer).value)); else scoret = scoret + squeeze(gather(net.vars(detLayer).value)); end end end % divide by number of scales and flips scoret = scoret / (2 * numel(opts.imageScales)); im = imread(fullfile(imdb.imageDir,imdb.images.name{testIdx(t)})); for cls = 1:numel(cats) scores = scoret; boxes = double(imdb.images.boxes{testIdx(t)}); boxesSc = [boxes,scores(cls,:)']; boxesSc = boxesSc(boxesSc(:,5)>scTh,:); if isempty(boxesSc), continue; end; pick = nms(boxesSc, ovTh); boxesSc = boxesSc(pick,:); im = bbox_draw(im,boxesSc(1,1:4),rcolors(:,cls),2); fprintf('%s %.2f\n',cats{cls},boxesSc(1,5)); end imshow(im); pause() ; if exist('zs_dispFig', 'file'), zs_dispFig ; end end % -------------------------------------------------------------------- function inputs = getBatch(opts, imdb, batch, scale, flip) % -------------------------------------------------------------------- opts.scale = scale; opts.flip = flip; is_vgg16 = opts.vgg16 ; opts = rmfield(opts,'vgg16') ; images = strcat([imdb.imageDir filesep], imdb.images.name(batch)) ; opts.prefetch = (nargout == 0); [im,rois] = wsddn_get_batch(images, imdb, batch, opts); rois = single(rois'); if opts.useGpu > 0 im = gpuArray(im) ; rois = gpuArray(rois) ; end rois = rois([1 3 2 5 4],:) ; ss = [16 16] ; if is_vgg16 o0 = 8.5 ; o1 = 9.5 ; else o0 = 18 ; o1 = 9.5 ; end rois = [ rois(1,:); floor((rois(2,:) - o0 + o1) / ss(1) + 0.5) + 1; floor((rois(3,:) - o0 + o1) / ss(2) + 0.5) + 1; ceil((rois(4,:) - o0 - o1) / ss(1) - 0.5) + 1; ceil((rois(5,:) - o0 - o1) / ss(2) - 0.5) + 1]; inputs = {'input', im, 'rois', rois} ; if opts.addBiasSamples && isfield(imdb.images,'boxScores') boxScore = reshape(imdb.images.boxScores{batch},[1 1 1 numel(imdb.images.boxScores{batch})]); inputs{end+1} = 'boxScore'; inputs{end+1} = boxScore ; end % ------------------------------------------------------------------------- function imdb = fixBBoxes(imdb, minSize, maxNum) % ------------------------------------------------------------------------- for i=1:numel(imdb.images.name) bbox = imdb.images.boxes{i}; % remove small bbox isGood = (bbox(:,3)>=bbox(:,1)+minSize) & (bbox(:,4)>=bbox(:,2)+minSize); bbox = bbox(isGood,:); % remove duplicate ones [dummy, uniqueIdx] = unique(bbox, 'rows', 'first'); uniqueIdx = sort(uniqueIdx); bbox = bbox(uniqueIdx,:); % limit number for training if imdb.images.set(i)~=3 nB = min(size(bbox,1),maxNum); else nB = size(bbox,1); end if isfield(imdb.images,'boxScores') imdb.images.boxScores{i} = imdb.images.boxScores{i}(uniqueIdx); imdb.images.boxScores{i} = imdb.images.boxScores{i}(1:nB); end imdb.images.boxes{i} = bbox(1:nB,:); % [h,w,~] = size(imdb.images.data{i}); % imdb.images.boxes{i} = [1 1 h w]; end % ------------------------------------------------------------------------- function im = bbox_draw(im,roi,color,t) % DRAWRECT % IM : input image % ROI : rectangle % COLOR : % T : thickness [h,w,d] = size(im); assert(d == numel(color)); if any(roi(:,1)>h) || any(roi(:,3)>h) || any(roi(:,2)>w) || any(roi(:,4)>w) error('Wrong bounding box coord!\n'); end for c=1:d im(max(roi(1)-t,1):min(roi(1)+t,h),max(roi(2)-t,1):min(roi(4)+t,w),c) = color(c); im(max(roi(3)-t,1):min(roi(3)+t,h),max(roi(2)-t,1):min(roi(4)+t,w),c) = color(c); im(max(roi(1)-t,1):min(roi(3)+t,h),max(roi(2)-t,1):min(roi(2)+t,w),c) = color(c); im(max(roi(1)-t,1):min(roi(3)+t,h),max(roi(4)-t,1):min(roi(4)+t,w),c) = color(c); end ================================================ FILE: core/wsddn_get_batch.m ================================================ function [imo,rois] = wsddn_get_batch(images, imdb, batch, opts) % cnn_wsddn_get_batch Load, preprocess, and pack images for CNN evaluation if isempty(images) imo = [] ; rois = [] ; return ; end % fetch is true if images is a list of filenames (instead of % a cell array of images) fetch = ischar(images{1}) ; % prefetch is used to load images in a separate thread prefetch = fetch & opts.prefetch ; % pick size imSize = imdb.images.size(batch(1),:); factor = min(opts.scale(1)/imSize(1),opts.scale(1)/imSize(2)); height = floor(factor*imSize(1)); if prefetch vl_imreadjpeg(images, 'numThreads',opts.numThreads,'Resize',height,'prefetch') ; imo = [] ; rois = [] ; return ; end if fetch ims = vl_imreadjpeg(images,'numThreads',opts.numThreads,'Resize',height) ; else ims = images ; end for i=1:numel(images) % acquire image if isempty(ims{i}) imt = imread(images{i}) ; if size(imt,3) == 1 imt = cat(3, imt, imt, imt) ; end ims{i} = imresize(imt,factor,'Method',opts.interpolation); ims{i} = single(ims{i}) ; % faster than im2single (and multiplies by 255) end end bboxes = cell(1,numel(batch)); nBoxes = 0; for b=1:numel(batch) bboxes{b} = double(imdb.images.boxes{batch(b)}); nBoxes = nBoxes + size(bboxes{b},1); end rois = zeros(nBoxes,5); countr = 0; maxW = 0; maxH = 0; for b=1:numel(batch) hw = imdb.images.size(batch(b),:); h = hw(1); w = hw(2); imsz = size(ims{b}); if opts.flip(b) im = ims{b}; ims{b} = im(:,end:-1:1,:); bbox = bboxes{b}; bbox(:,[2,4]) = w + 1 - bbox(:,[4,2]); bboxes{b} = bbox; end maxH = max(imsz(1),maxH); maxW = max(imsz(2),maxW); % adapt bounding boxes into new coord bbox = bboxes{b}; if any(bbox(:)<=0) error('bbox error'); end nB = size(bbox,1); tbbox = scale_box(bbox,[h,w],imsz); if any(tbbox(:)<=0) error('tbbox error'); end rois(countr+1:countr+nB,:) = [b*ones(nB,1),tbbox]; countr = countr + nB; end % rois = single(rois); depth = size(ims{1},3); imo = zeros(maxH,maxW,depth,numel(batch),'single'); if isempty(opts.averageImage) avgIm = []; elseif numel(opts.averageImage)==depth avgIm = opts.averageImage; end for b=1:numel(batch) sz = size(ims{b}); imo(1:sz(1),1:sz(2),:,b) = single(ims{b}); if ~isempty(avgIm) imo(1:sz(1),1:sz(2),:,b) = single(bsxfun(@minus,imo(1:sz(1),1:sz(2),:,b),opts.averageImage)); end if ~isempty(opts.rgbVariance) imo(1:sz(1),1:sz(2),:,b) = bsxfun(@plus, imo(1:sz(1),1:sz(2),:,b), ... reshape(opts.rgbVariance * randn(3,1), 1,1,3)) ; end end function boxOut = scale_box(boxIn,szIn,szOut) h = szIn(1); w = szIn(2); bxr = 0.5 * (boxIn(:,2)+boxIn(:,4)) / w; byr = 0.5 * (boxIn(:,1)+boxIn(:,3)) / h; bwr = (boxIn(:,4)-boxIn(:,2)+1) / w; bhr = (boxIn(:,3)-boxIn(:,1)+1) / h; % boxIn center in new coord byhat = (szOut(1) * byr); bxhat = (szOut(2) * bxr); % relative width, height bhhat = szOut(1) * bhr; bwhat = szOut(2) * bwr; % transformed boxIn boxOut = [max(1,round(byhat - 0.5 * bhhat)),... max(1,round(bxhat - 0.5 * bwhat)), ... min(szOut(1),round(byhat + 0.5 * bhhat)),... min(szOut(2),round(bxhat + 0.5 * bwhat))]; ================================================ FILE: core/wsddn_init.m ================================================ % -------------------------------------------------------------------- function net = wsddn_init(net,varargin) % -------------------------------------------------------------------- % @author: Hakan Bilen % wsddn_init : this script initalise WSDDN model opts.addBiasSamples = 1 ; opts.softmaxTempCls = 1 ; opts.softmaxTempDet = 2 ; opts.addLossSmooth = 1 ; opts.averageImage = [] ; opts.rgbVariance = [] ; opts.numClasses = 1 ; opts.classNames = {''} ; opts = vl_argparse(opts, varargin) ; % add drop-out layers relu6p = find(cellfun(@(a) strcmp(a.name, 'relu6'), net.layers)==1); relu7p = find(cellfun(@(a) strcmp(a.name, 'relu7'), net.layers)==1); drop6 = struct('type', 'dropout', 'rate', 0.5, 'name','drop6'); drop7 = struct('type', 'dropout', 'rate', 0.5, 'name','drop7'); net.layers = [net.layers(1:relu6p) drop6 net.layers(relu6p+1:relu7p) drop7 net.layers(relu7p+1:end)]; % change loss fc layer fc8p = (cellfun(@(a) strcmp(a.name, 'fc8'), net.layers)==1); net.layers{fc8p}.weights{1} = 0.01 * ... randn(1,1,size(net.layers{fc8p}.weights{1},3),opts.numClasses,'single'); net.layers{fc8p}.weights{2} = zeros(1, opts.numClasses, 'single'); net.layers{fc8p}.name = 'fc8C'; net.layers(end) = [] ; % add loss (this will be changed to binary log at the end) % net.layers{end} = struct('name','loss', 'type','softmaxloss') ; % add detection layer clsLayerPos = (cellfun(@(a) strcmp(a.name, 'fc8C'), net.layers)==1); detLayer = net.layers{clsLayerPos}; detLayer.weights{1} = 0.01 * randn(1,1,size(detLayer.weights{1},3),opts.numClasses,'single'); % detLayer.weights{1} = zeros(1,1,size(detLayer.weights{1},3),opts.numClasses,'single'); detLayer.weights{2} = zeros(1, opts.numClasses, 'single'); detLayer.name = 'fc8R'; % remove pool5 pPool5 = find(cellfun(@(a) strcmp(a.name, 'pool5'), net.layers)==1); net.layers = [net.layers([1:pPool5-1,pPool5+1:end]) detLayer]; % convert to dagnn net = dagnn.DagNN.fromSimpleNN(net, 'canonicalNames', true) ; % fix fc8R pFc8R = (arrayfun(@(a) strcmp(a.name, 'fc8R'), net.layers)==1); pFc8C = (arrayfun(@(a) strcmp(a.name, 'fc8C'), net.layers)==1); net.layers(pFc8R).inputs = net.layers(pFc8C).inputs; net.layers(pFc8R).inputIndexes = net.layers(pFc8C).inputIndexes; % add spp pRelu5 = (arrayfun(@(a) strcmp(a.name, 'relu5'), net.layers)==1); vggdeep = 0; if all(pRelu5==0) pRelu5 = (arrayfun(@(a) strcmp(a.name, 'relu5_3'), net.layers)==1); assert(any(pRelu5==1)); vggdeep = 1; end pFc6 = (arrayfun(@(a) strcmp(a.name, 'fc6'), net.layers)==1); % add spp (offset1 = rf offset, offset2 = shrinking factor) % offset1=18 offset2=9.5 levels=6 for vgg-f and vgg-m-1024 % offset1=8.5 offset2=9.5 levels=7 for vgg-very-deep-16 if vggdeep net.addLayer('SPP', dagnn.ROIPooling('subdivisions',[7 7],... 'transform',1), ... {net.layers(pRelu5).outputs{1},'rois'}, ... 'xSPP'); else net.addLayer('SPP', dagnn.ROIPooling('subdivisions',[6 6],... 'transform',1), ... {net.layers(pRelu5).outputs{1},'rois'}, ... 'xSPP'); end if opts.addBiasSamples % add boost net.addLayer('boostBox', ... dagnn.BiasSamples('scale',10), ... {'xSPP','boxScore'},'xBoostBox'); net.layers(pFc6).inputs{1} = 'xBoostBox'; else net.layers(pFc6).inputs{1} = 'xSPP'; end % add softmax layer for det pFc8R = (arrayfun(@(a) strcmp(a.name, 'fc8R'), net.layers)==1); net.addLayer('softmaxDet', ... dagnn.SoftMax2('dim',4, 'temp',opts.softmaxTempDet), ... net.layers(pFc8R).outputs{1},'xSoftmaxDet'); % add softmax layers for cls pFc8C = (arrayfun(@(a) strcmp(a.name, 'fc8C'), net.layers)==1); net.layers(pFc8C).outputs{1} = 'xfc8C'; net.addLayer('softmaxCls', ... dagnn.SoftMax2('dim',3, 'temp',opts.softmaxTempCls), ... net.layers(pFc8C).outputs{1},'xSoftmaxCls'); % add times layer net.addLayer('timesCR', ... dagnn.Times(), ... {'xSoftmaxCls','xSoftmaxDet'},'xTimes'); % add sum layer net.addLayer('sum', ... dagnn.SumOverDim('dim',4), ... 'xTimes','prediction'); % add classification AP net.addLayer('mAP', dagnn.LayerAP('cls_index',1:opts.numClasses), ... {'prediction','label', 'ids'}, 'mAP') ; net.addLayer('loss', dagnn.Loss('loss','binarylog'), ... {'prediction','label'}, 'objective') ; % no decay for bias for i=2:2:numel(net.params) net.params(i).weightDecay = 0; end if opts.addLossSmooth net.addLayer('LossTopBoxSmooth',dagnn.LossTopBoxSmoothProb('minOverlap',0.6),... {net.layers(pFc8R).inputs{1},'boxes','xTimes','label'},... 'lossTopB'); end meta = net.meta ; net.meta = [] ; net.meta.normalization.interpolation = meta.normalization.interpolation ; net.meta.normalization.averageImage = opts.averageImage ; net.meta.normalization.rgbVariance = opts.rgbVariance ; net.meta.classes.name = {'aeroplane', 'bicycle', 'bird', ... 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', ... 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', ... 'sofa', 'train', 'tvmonitor', 'background' }; ================================================ FILE: core/wsddn_test.m ================================================ function aps = wsddn_test(varargin) % @author: Hakan Bilen % wsddn_test : this script evaluates detection performance in PASCAL VOC % dataset for given a WSDDN model opts.dataDir = fullfile(vl_rootnn, 'data') ; opts.expDir = fullfile(vl_rootnn, 'exp') ; opts.imdbPath = fullfile(vl_rootnn, 'data', 'imdbs', 'imdb-eb.mat'); opts.modelPath = fullfile(vl_rootnn, 'exp', 'net.mat') ; opts.proposalType = 'eb' ; opts.proposalDir = fullfile(vl_rootnn, 'data','EdgeBoxes') ; % if you have limited gpu memory (<6gb), you can change the next 2 params opts.maxNumProposals = inf; % limit number opts.imageScales = [480,576,688,864,1200]; % scales opts.gpu = [] ; opts.train.prefetch = true ; opts.vis = 0 ; opts.numFetchThreads = 1 ; opts = vl_argparse(opts, varargin) ; display(opts); if ~exist(fullfile(opts.dataDir,'VOCdevkit','VOCcode','VOCinit.m'),'file') error('VOCdevkit is not installed'); end addpath(fullfile(opts.dataDir,'VOCdevkit','VOCcode')); opts.train.expDir = opts.expDir ; % ------------------------------------------------------------------------- % Network initialization % ------------------------------------------------------------------------- net = load(opts.modelPath); % figure(2) ; if isfield(net,'net') net = net.net; end net = dagnn.DagNN.loadobj(net) ; net.mode = 'test' ; if ~isempty(opts.gpu) gpuDevice(opts.gpu) ; net.move('gpu') ; end if isfield(net,'normalization') bopts = net.normalization; else bopts = net.meta.normalization; end bopts.rgbVariance = [] ; bopts.interpolation = net.meta.normalization.interpolation; bopts.jitterBrightness = 0 ; bopts.imageScales = opts.imageScales; bopts.numThreads = opts.numFetchThreads; bs = find(arrayfun(@(a) isa(a.block, 'dagnn.BiasSamples'), net.layers)==1); bopts.addBiasSamples = ~isempty(bs) ; bopts.vgg16 = any(arrayfun(@(a) strcmp(a.name, 'relu5_1'), net.layers)==1) ; % ------------------------------------------------------------------------- % Database initialization % ------------------------------------------------------------------------- fprintf('loading imdb...'); if exist(opts.imdbPath,'file')==2 imdb = load(opts.imdbPath) ; else imdb = cnn_voc07_eb_setup_data('dataDir',opts.dataDir, ... 'proposalDir',opts.proposalDir,'loadTest',1); save(opts.imdbPath,'-struct', 'imdb', '-v7.3'); end fprintf('done\n'); minSize = 20; imdb = fixBBoxes(imdb, minSize, opts.maxNumProposals); VOCinit; VOCopts.testset = 'test'; VOCopts.annopath = fullfile(opts.dataDir,'VOCdevkit','VOC2007','Annotations','%s.xml'); VOCopts.imgsetpath = fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','%s.txt'); VOCopts.localdir = fullfile(opts.dataDir,'VOCdevkit','local','VOC2007'); cats = VOCopts.classes; ovTh = 0.4; scTh = 1e-3; % -------------------------------------------------------------------- % Detect % -------------------------------------------------------------------- if strcmp(VOCopts.testset,'test') testIdx = find(imdb.images.set == 3); elseif strcmp(VOCopts.testset,'trainval') testIdx = find(imdb.images.set < 3); end bopts.useGpu = numel(opts.gpu) > 0 ; scores = cell(1,numel(testIdx)); boxes = imdb.images.boxes(testIdx); names = imdb.images.name(testIdx); detLayer = find(arrayfun(@(a) strcmp(a.name, 'xTimes'), net.vars)==1); net.vars(detLayer(1)).precious = 1; % run detection start = tic ; for t=1:numel(testIdx) batch = testIdx(t); scoret = []; for s=1:numel(opts.imageScales) for f=1:2 % add flips inputs = getBatch(bopts, imdb, batch, opts.imageScales(s), f-1 ); net.eval(inputs) ; if isempty(scoret) scoret = squeeze(gather(net.vars(detLayer).value)); else scoret = scoret + squeeze(gather(net.vars(detLayer).value)); end end end scores{t} = scoret; % show speed time = toc(start) ; n = t * 2 * numel(opts.imageScales) ; % number of images processed overall speed = n/time ; if mod(t,10)==0 fprintf('test %d / %d speed %.1f Hz\n',t,numel(testIdx),speed); end if opts.vis for cls = 1:numel(cats) idx = (scores{t}(cls,:)>0.05); if sum(idx)==0, continue;end % divide by number of scales and flips im = imread(fullfile(imdb.imageDir,imdb.images.name{testIdx(t)})); boxest = double(imdb.images.boxes{testIdx(t)}(idx,:)); scorest = scores{t}(cls,idx)' / (2 * numel(opts.imageScales)); boxesSc = [boxest,scorest]; pick = nms(boxesSc, ovTh); boxesSc = boxesSc(pick,:); figure(1) ; im = bbox_draw(im,boxesSc(1,[2 1 4 3 5])); fprintf('%s %.2f',cats{cls},boxesSc(1,5)); fprintf('\n') ; title(cats{cls}); pause; end end end dets.names = names; dets.scores = scores; dets.boxes = boxes; % -------------------------------------------------------------------- % PASCAL VOC evaluation % -------------------------------------------------------------------- aps = zeros(numel(cats),1); for cls = 1:numel(cats) vocDets.confidence = []; vocDets.bbox = []; vocDets.ids = []; for i=1:numel(dets.names) scores = double(dets.scores{i}); boxes = double(dets.boxes{i}); boxesSc = [boxes,scores(cls,:)']; boxesSc = boxesSc(boxesSc(:,5)>scTh,:); pick = nms(boxesSc, ovTh); boxesSc = boxesSc(pick,:); vocDets.confidence = [vocDets.confidence;boxesSc(:,5)]; vocDets.bbox = [vocDets.bbox;boxesSc(:,[2 1 4 3])]; vocDets.ids = [vocDets.ids; repmat({dets.names{i}(1:6)},size(boxesSc,1),1)]; end [rec,prec,ap] = wsddnVOCevaldet(VOCopts,cats{cls},vocDets,0); fprintf('%s %.1f\n',cats{cls},100*ap); aps(cls) = ap; end % -------------------------------------------------------------------- function inputs = getBatch(opts, imdb, batch, scale, flip) % -------------------------------------------------------------------- opts.scale = scale; opts.flip = flip; is_vgg16 = opts.vgg16 ; opts = rmfield(opts,'vgg16') ; images = strcat([imdb.imageDir filesep], imdb.images.name(batch)) ; opts.prefetch = (nargout == 0); [im,rois] = wsddn_get_batch(images, imdb, batch, opts); rois = single(rois'); if opts.useGpu > 0 im = gpuArray(im) ; rois = gpuArray(rois) ; end rois = rois([1 3 2 5 4],:) ; ss = [16 16] ; if is_vgg16 o0 = 8.5 ; o1 = 9.5 ; else o0 = 18 ; o1 = 9.5 ; end rois = [ rois(1,:); floor((rois(2,:) - o0 + o1) / ss(1) + 0.5) + 1; floor((rois(3,:) - o0 + o1) / ss(2) + 0.5) + 1; ceil((rois(4,:) - o0 - o1) / ss(1) - 0.5) + 1; ceil((rois(5,:) - o0 - o1) / ss(2) - 0.5) + 1]; inputs = {'input', im, 'rois', rois} ; if opts.addBiasSamples && isfield(imdb.images,'boxScores') boxScore = reshape(imdb.images.boxScores{batch},[1 1 1 numel(imdb.images.boxScores{batch})]); inputs{end+1} = 'boxScore'; inputs{end+1} = boxScore ; end % ------------------------------------------------------------------------- function imdb = fixBBoxes(imdb, minSize, maxNum) for i=1:numel(imdb.images.name) bbox = imdb.images.boxes{i}; % remove small bbox isGood = (bbox(:,3)>=bbox(:,1)+minSize) & (bbox(:,4)>=bbox(:,2)+minSize); bbox = bbox(isGood,:); % remove duplicate ones [dummy, uniqueIdx] = unique(bbox, 'rows', 'first'); uniqueIdx = sort(uniqueIdx); bbox = bbox(uniqueIdx,:); % limit number for training if imdb.images.set(i)~=3 nB = min(size(bbox,1),maxNum); else nB = size(bbox,1); end if isfield(imdb.images,'boxScores') imdb.images.boxScores{i} = imdb.images.boxScores{i}(isGood); imdb.images.boxScores{i} = imdb.images.boxScores{i}(uniqueIdx); imdb.images.boxScores{i} = imdb.images.boxScores{i}(1:nB); end imdb.images.boxes{i} = bbox(1:nB,:); % [h,w,~] = size(imdb.images.data{i}); % imdb.images.boxes{i} = [1 1 h w]; end %-------------------------------------------------------------------------% function im = bbox_draw(im,boxes,c,t) % copied from Ross Girshick % Fast R-CNN % Copyright (c) 2015 Microsoft % Licensed under The MIT License [see LICENSE for details] % Written by Ross Girshick % -------------------------------------------------------- % source: https://github.com/rbgirshick/fast-rcnn/blob/master/matlab/showboxes.m % % % Fast R-CNN % % Copyright (c) Microsoft Corporation % % All rights reserved. % % MIT License % % Permission is hereby granted, free of charge, to any person obtaining a % copy of this software and associated documentation files (the "Software"), % to deal in the Software without restriction, including without limitation % the rights to use, copy, modify, merge, publish, distribute, sublicense, % and/or sell copies of the Software, and to permit persons to whom the % Software is furnished to do so, subject to the following conditions: % % The above copyright notice and this permission notice shall be included % in all copies or substantial portions of the Software. % % THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR % IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, % FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL % THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR % OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, % ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR % OTHER DEALINGS IN THE SOFTWARE. image(im); axis image; axis off; set(gcf, 'Color', 'white'); if nargin<3 c = 'r'; t = 2; end s = '-'; if ~isempty(boxes) x1 = boxes(:, 1); y1 = boxes(:, 2); x2 = boxes(:, 3); y2 = boxes(:, 4); line([x1 x1 x2 x2 x1]', [y1 y2 y2 y1 y1]', ... 'color', c, 'linewidth', t, 'linestyle', s); for i = 1:size(boxes, 1) text(double(x1(i)), double(y1(i)) - 2, ... sprintf('%.4f', boxes(i, end)), ... 'backgroundcolor', 'b', 'color', 'w', 'FontSize', 8); end end ================================================ FILE: core/wsddn_train.m ================================================ function [net, info] = wsddn_train(varargin) % @author: Hakan Bilen % wsddn_train: training script for WSDDN opts.dataDir = fullfile(vl_rootnn, 'data') ; opts.expDir = fullfile(vl_rootnn, 'exp') ; opts.imdbPath = fullfile(vl_rootnn, 'data', 'imdbs', 'imdb-eb.mat'); opts.modelPath = fullfile(vl_rootnn, 'models', 'imagenet-vgg-f.mat') ; opts.proposalType = 'eb' ; opts.proposalDir = fullfile(vl_rootnn, 'data', 'EdgeBoxes') ; opts.addBiasSamples = 1; % add Box Scores opts.addLossSmooth = 1; % add Spatial Regulariser opts.softmaxTempCls = 1; % softmax temp for cls opts.softmaxTempDet = 2; % softmax temp for det opts.maxScale = 2000 ; % if you have limited gpu memory (<6gb), you can change the next 2 params opts.maxNumProposals = inf; % limit number (eg 1500) opts.imageScales = [480,576,688,864,1200]; % scales opts.minBoxSize = 20; % minimum bounding box size opts.train.gpus = [] ; opts.train.continue = true ; opts.train.prefetch = true ; opts.train.learningRate = 1e-5 * [ones(1,10) 0.1*ones(1,10)] ; opts.train.weightDecay = 0.0005; opts.train.numEpochs = 20; opts.train.derOutputs = {'objective', 1} ; opts.numFetchThreads = 1 ; opts = vl_argparse(opts, varargin) ; display(opts); opts.train.batchSize = 1 ; opts.train.expDir = opts.expDir ; opts.train.numEpochs = numel(opts.train.learningRate) ; %% ------------------------------------------------------------------------- % Database initialization % ------------------------------------------------------------------------- fprintf('loading imdb...'); if exist(opts.imdbPath,'file')==2 imdb = load(opts.imdbPath) ; else if strcmp(opts.proposalType,'ssw') imdb = setup_voc07_ssw('dataDir',opts.dataDir, ... 'proposalDir',opts.proposalDir,'loadTest',1); elseif strcmp(opts.proposalType,'eb') imdb = setup_voc07_eb('dataDir',opts.dataDir, ... 'proposalDir',opts.proposalDir,'loadTest',1); else error('undefined proposal type %s\n',opts.proposalType) end imdbFolder = fileparts(opts.imdbPath); if ~exist(imdbFolder,'dir') mkdir(imdbFolder); end save(opts.imdbPath,'-struct', 'imdb', '-v7.3'); end fprintf('done\n'); imdb = fixBBoxes(imdb, opts.minBoxSize, opts.maxNumProposals); % use train + val for training imdb.images.set(imdb.images.set == 2) = 1; trainIdx = find(imdb.images.set == 1); %% Compute image statistics (mean, RGB covariances, etc.) imageStatsPath = fullfile(opts.dataDir, 'imageStats.mat') ; if exist(imageStatsPath,'file') load(imageStatsPath, 'averageImage', 'rgbMean', 'rgbCovariance') ; else images = imdb.images.name(imdb.images.set == 1) ; images = strcat([imdb.imageDir filesep],images) ; [averageImage, rgbMean, rgbCovariance] = getImageStats(images, ... 'imageSize', [256 256], ... 'numThreads', opts.numFetchThreads, ... 'gpus', opts.train.gpus) ; save(imageStatsPath, 'averageImage', 'rgbMean', 'rgbCovariance') ; end [v,d] = eig(rgbCovariance) ; rgbDeviation = v*sqrt(d) ; clear v d ; %% ------------------------------------------------------------------------ % Network initialization % ------------------------------------------------------------------------- nopts.addBiasSamples = opts.addBiasSamples; % add Box Scores (only with Edge Boxes) nopts.addLossSmooth = opts.addLossSmooth; % add Spatial Regulariser nopts.softmaxTempCls = opts.softmaxTempCls; % softmax temp for cls nopts.softmaxTempDet = opts.softmaxTempDet; % softmax temp for det nopts.averageImage = reshape(rgbMean,[1 1 3]) ; % nopts.rgbVariance = 0.1 * rgbDeviation ; nopts.rgbVariance = [] ; nopts.numClasses = numel(imdb.classes.name) ; nopts.classNames = imdb.classes.name ; if ~exist(opts.modelPath,'file') [pname,fname,ext] = fileparts(opts.modelPath) ; if ~exist(pname,'dir') mkdir(pname) ; end fprintf('Downloading %s to %s\n', [fname ext], pname) ; urlwrite(sprintf('http://www.vlfeat.org/matconvnet/models/%s',[fname ext]),... opts.modelPath) ; end net = load(opts.modelPath); net = wsddn_init(net,nopts); if nopts.addLossSmooth opts.train.derOutputs = {'objective', 1, 'lossTopB', 1e-4} ; end if ~exist(opts.expDir,'dir') mkdir(opts.expDir) ; end %% ------------------------------------------------------------------------- % Database stats % ------------------------------------------------------------------------- bopts = net.meta.normalization; net.meta.augmentation.jitterBrightness = 0 ; % bopts.interpolation = 'bilinear'; bopts.jitterBrightness = net.meta.augmentation.jitterBrightness ; bopts.imageScales = opts.imageScales; bopts.numThreads = opts.numFetchThreads; bopts.addLossSmooth = opts.addLossSmooth; bopts.addBiasSamples = opts.addBiasSamples; bopts.maxScale = opts.maxScale ; bopts.vgg16 = any(arrayfun(@(a) strcmp(a.name, 'relu5_1'), net.layers)==1) ; %% ------------------------------------------------------------------- % Train % -------------------------------------------------------------------- % avoid test data valIdx = find(imdb.images.set == 3); valIdx = valIdx(1:5:end) ; % valIdx = []; %% bopts.useGpu = numel(opts.train.gpus) > 0 ; bopts.prefetch = opts.train.prefetch; info = cnn_train_dag(net, imdb, @(i,b) ... getBatch(bopts,i,b), ... opts.train, 'train', trainIdx, ... 'val', valIdx) ; %% ------------------------------------------------------------------- % Deploy network % -------------------------------------------------------------------- if ~exist(fullfile(opts.expDir,'net.mat'),'file') removeLoss = {'dagnn.Loss','dagnn.DropOut'}; for i=1:numel(removeLoss) dagRemoveLayersOfType(net,removeLoss{i}) ; end net.mode = 'test' ; net_ = net ; net = net_.saveobj() ; save(fullfile(opts.expDir,'net.mat'), '-struct','net'); end % -------------------------------------------------------------------- function inputs = getBatch(opts, imdb, batch) % -------------------------------------------------------------------- if isempty(batch) inputs = {'input', [], 'label', [], 'rois', [], 'ids', []}; return; end opts.scale = opts.imageScales(randi(numel(opts.imageScales))); opts.flip = randi(2,numel(batch),1)-1; % random flip is_vgg16 = opts.vgg16 ; opts = rmfield(opts,'vgg16') ; images = strcat([imdb.imageDir filesep], imdb.images.name(batch)) ; opts.prefetch = (nargout == 0); [im,rois] = wsddn_get_batch(images, imdb, batch, opts); if nargout>0 rois = single(rois') ; labels = imdb.images.label(:,batch) ; labels = reshape(labels,[1 1 size(labels,1) numel(batch)]); if opts.useGpu > 0 im = gpuArray(im) ; rois = gpuArray(rois) ; end if ~isempty(rois) rois = rois([1 3 2 5 4],:) ; end ss = [16 16] ; if is_vgg16 o0 = 8.5 ; o1 = 9.5 ; else o0 = 18 ; o1 = 9.5 ; end rois = [ rois(1,:); ... floor((rois(2,:) - o0 + o1) / ss(1) + 0.5) + 1; floor((rois(3,:) - o0 + o1) / ss(2) + 0.5) + 1; ceil((rois(4,:) - o0 - o1) / ss(1) - 0.5) + 1; ceil((rois(5,:) - o0 - o1) / ss(2) - 0.5) + 1]; inputs = {'input', im, 'label', labels, 'rois', rois, 'ids', batch} ; if opts.addLossSmooth inputs{end+1} = 'boxes' ; inputs{end+1} = imdb.images.boxes{batch} ; end if opts.addBiasSamples==1 boxScore = reshape(imdb.images.boxScores{batch},[1 1 1 numel(imdb.images.boxScores{batch})]); inputs{end+1} = 'boxScore'; inputs{end+1} = boxScore ; end end % ------------------------------------------------------------------------- function imdb = fixBBoxes(imdb, minSize, maxNum) % ------------------------------------------------------------------------- for i=1:numel(imdb.images.name) bbox = imdb.images.boxes{i}; % remove small bbox isGood = (bbox(:,3)>=bbox(:,1)+minSize) & (bbox(:,4)>=bbox(:,2)+minSize); bbox = bbox(isGood,:); % remove duplicate ones [dummy, uniqueIdx] = unique(bbox, 'rows', 'first'); uniqueIdx = sort(uniqueIdx); bbox = bbox(uniqueIdx,:); % limit number for training if imdb.images.set(i)~=3 nB = min(size(bbox,1),maxNum); else nB = size(bbox,1); end if isfield(imdb.images,'boxScores') imdb.images.boxScores{i} = imdb.images.boxScores{i}(isGood); imdb.images.boxScores{i} = imdb.images.boxScores{i}(uniqueIdx); imdb.images.boxScores{i} = imdb.images.boxScores{i}(1:nB); end imdb.images.boxes{i} = bbox(1:nB,:); % [h,w,~] = size(imdb.images.data{i}); % imdb.images.boxes{i} = [1 1 h w]; end % ------------------------------------------------------------------------- function layers = dagFindLayersOfType(net, type) % ------------------------------------------------------------------------- layers = [] ; for l = 1:numel(net.layers) if isa(net.layers(l).block, type) layers{1,end+1} = net.layers(l).name ; end end % ------------------------------------------------------------------------- function dagRemoveLayersOfType(net, type) % ------------------------------------------------------------------------- names = dagFindLayersOfType(net, type) ; for i = 1:numel(names) layer = net.layers(net.getLayerIndex(names{i})) ; net.removeLayer(names{i}) ; net.renameVar(layer.outputs{1}, layer.inputs{1}, 'quiet', true) ; end ================================================ FILE: matlab/+dagnn/BiasSamples.m ================================================ classdef BiasSamples < dagnn.ElementWise % @author: Hakan Bilen properties scale = single(1) end properties (Transient) boxCoefs = [] end methods function outputs = forward(obj, inputs, params) if numel(inputs) ~= 2 error('Number of inputs is not 2'); end obj.boxCoefs = single(1)+obj.scale*inputs{2}; outputs{1} = bsxfun(@times,inputs{1},obj.boxCoefs); end function [derInputs, derParams] = backward(obj, inputs, params, derOutputs) derInputs = cell(1,2) ; obj.boxCoefs = single(1)+obj.scale*inputs{2}; derInputs{1} = bsxfun(@times,derOutputs{1},obj.boxCoefs) ; derParams = {} ; end function obj = BiasSamples(varargin) obj.load(varargin) ; end function reset(obj) obj.boxCoefs = [] ; end function rfs = getReceptiveFields(obj) rfs.size = [1 1] ; rfs.stride = [1 1] ; rfs.offset = [1 1] ; end function outputSizes = getOutputSizes(obj, inputSizes) outputSizes = inputSizes(1) ; end end end ================================================ FILE: matlab/+dagnn/LayerAP.m ================================================ classdef LayerAP < dagnn.Loss % @author: Hakan Bilen % 11 step average precision properties cls_index = 1 resetLayer = false gtLabels = [] scores = [] ids = [] aps = [] voc07 = true % 11 step classNames = {} end methods function outputs = forward(obj, inputs, params) if obj.resetLayer obj.gtLabels = [] ; obj.scores = [] ; obj.ids = [] ; obj.aps = [] ; obj.resetLayer = false ; end if numel(inputs)==2 obj.scores = [obj.scores gather(squeeze(inputs{1}(:,:,obj.cls_index,:)))]; obj.gtLabels = [obj.gtLabels gather(squeeze(inputs{2}(:,:,obj.cls_index,:)))]; elseif numel(inputs)>2 scoresCur = gather(squeeze(inputs{1}(:,:,obj.cls_index,:))); gtLabelsCur = gather(squeeze(inputs{2}(:,:,obj.cls_index,:))); idsCur = gather(squeeze(inputs{3})); [lia,locb] = ismember(idsCur,obj.ids); if any(lia) obj.scores = [obj.scores scoresCur(~lia,:)]; obj.gtLabels = [obj.gtLabels gtLabelsCur(~lia,:)]; obj.ids = [obj.ids(:) ; idsCur(~lia,:)]; nz = find(lia); for i=1:numel(nz) obj.scores(locb(nz(i)),:) = obj.scores(locb(nz(i)),:) + ... scoresCur(nz(i),:); end else obj.scores = [obj.scores scoresCur]; obj.gtLabels = [obj.gtLabels gtLabelsCur]; obj.ids = [obj.ids(:) ; idsCur]'; end else error('wrong number of inputs'); end obj.aps = obj.compute_average_precision(); obj.average = 100 * mean(obj.aps); outputs{1} = 100 * mean(obj.aps); end function [derInputs, derParams] = backward(obj, inputs, params, derOutputs) derInputs = cell(1,numel(inputs)); derInputs{1} = derOutputs{1} ; derParams = {} ; end function reset(obj) obj.resetLayer = true ; % obj.average = 0 ; % obj.aps = 0 ; % obj.gtLabels = []; % obj.scores = []; % obj.ids = []; end function printAP(obj) if isempty(obj.classNames) for i=1:numel(obj.aps) fprintf('class-%d %.1f\n',i,100*obj.aps(i)) ; end else for i=1:numel(obj.aps) fprintf('%-50s %.1f\n',obj.classNames{i},100*obj.aps(i)) ; end end end function aps = compute_average_precision(obj) assert(all(size(obj.scores)==size(obj.gtLabels))); % nImg = size(obj.scores,1); nCls = numel(obj.cls_index); aps = zeros(1,nCls); for c=1:nCls gt = obj.gtLabels(c,:); conf = obj.scores(c,:) ; if sum(gt>0)==0, continue ; end % compute average precision if obj.voc07 [rec,prec,ap]=obj.VOC07ap(conf,gt) ; else [rec,prec,ap]=obj.THUMOSeventclspr(conf,gt) ; end aps(c) = ap; end end function [rec,prec,ap]=VOC07ap(obj,conf,gt) [~,si]=sort(-conf); tp=gt(si)>0; fp=gt(si)<0; fp=cumsum(fp); tp=cumsum(tp); rec=tp/sum(gt>0); prec=tp./(fp+tp); ap=0; for t=0:0.1:1 p=max(prec(rec>=t)); if isempty(p) p=0; end ap=ap+p/11; end end function [rec,prec,ap]=THUMOSeventclspr(obj,conf,gt) [so,sortind]=sort(-conf); tp=gt(sortind)==1; fp=gt(sortind)~=1; npos=length(find(gt==1)); % compute precision/recall fp=cumsum(fp); tp=cumsum(tp); rec=tp/npos; prec=tp./(fp+tp); % compute average precision ap=0; tmp=gt(sortind)==1; for i=1:length(conf) if tmp(i)==1 ap=ap+prec(i); end end ap=ap/npos; end function obj = LayerAP(varargin) obj.load(varargin) ; obj.loss = 'average_precision' ; end end end ================================================ FILE: matlab/+dagnn/LossTopBoxSmoothProb.m ================================================ classdef LossTopBoxSmoothProb < dagnn.Loss % given top scoring box, it finds other boxes with at least overlap of % minOverlap and calculates the euclidean dist between top and other % boxes properties (Transient) gtIdx = [] boxIdx = [] probs = [] minOverlap = 0.5 nBoxes = 10 end methods function outputs = forward(obj, inputs, params) if numel(inputs) ~= 4 error('Number of inputs is not 2'); end obj.gtIdx = []; obj.boxIdx = []; obj.probs = []; boxes = double(gather(inputs{2})'); scores = gather(squeeze(inputs{3})); labels = gather(squeeze(inputs{4})); if numel(boxes)<5 return; end outputs{1} = zeros(1,'like',inputs{1}); for c=1:numel(labels) if labels(c)<=0 continue; end [so, si] = sort(scores(c,:),'descend'); obj.gtIdx{c} = si(1); gtBox = boxes(:,obj.gtIdx{c}); gtArea = (gtBox(3)-gtBox(1)+1) .* (gtBox(4)-gtBox(2)+1); bbs = boxes(:,si(2:min(obj.nBoxes,end)))'; y1 = bbs(:,1); x1 = bbs(:,2); y2 = bbs(:,3); x2 = bbs(:,4); area = (x2-x1+1) .* (y2-y1+1); yy1 = max(gtBox(1), y1); xx1 = max(gtBox(2), x1); yy2 = min(gtBox(3), y2); xx2 = min(gtBox(4), x2); w = max(0.0, xx2-xx1+1); h = max(0.0, yy2-yy1+1); inter = w.*h; o = find((inter ./ (gtArea + area - inter))>obj.minOverlap); if isempty(o) continue; end obj.boxIdx{c} = si(o+1); obj.probs{c} = so(o+1); d = bsxfun(@minus,inputs{1}(:,:,:,obj.boxIdx{c}),inputs{1}(:,:,:,obj.gtIdx{c})); d = bsxfun(@times,d,obj.probs{c}); outputs{1} = outputs{1} + 0.5 * sum(d(:).^2); end n = obj.numAveraged ; m = n + 1 ; obj.average = (n * obj.average + gather(outputs{1})) / m ; obj.numAveraged = m ; end function [derInputs, derParams] = backward(obj, inputs, params, derOutputs) derInputs = cell(1,4) ; derInputs{1} = zeros(size(inputs{1}),'like',inputs{1}); for c=1:numel(obj.boxIdx) if isempty(obj.boxIdx{c}), continue; end derInputs{1}(:,:,:,obj.boxIdx{c}) = ... bsxfun(@minus,inputs{1}(:,:,:,obj.boxIdx{c}),inputs{1}(:,:,:,obj.gtIdx{c})); derInputs{1}(:,:,:,obj.boxIdx{c}) = bsxfun(@times,... reshape(obj.probs{c},[1 1 1 numel(obj.probs{c})]),derInputs{1}(:,:,:,obj.boxIdx{c})); derInputs{1}(:,:,:,obj.gtIdx{c}) = -sum(derInputs{1}(:,:,:,obj.boxIdx{c}),4); end derInputs{1} = derInputs{1} * derOutputs{1}; % fprintf('LossTopBox l2 %f ',sqrt(sum(derInputs{1}(:).^2))); derParams = {} ; end function obj = LossTopBoxSmoothProb(varargin) obj.load(varargin) ; obj.loss = 'LossTopBoxSmoothProb'; end function reset(obj) obj.gtIdx = []; obj.boxIdx = []; obj.probs = []; obj.average = 0 ; obj.numAveraged = 0 ; end end end ================================================ FILE: matlab/+dagnn/SoftMax2.m ================================================ classdef SoftMax2 < dagnn.ElementWise % @author: Hakan Bilen % Softmax2 : it is a more generic softmax layer with a dimension and temperature parameter properties dim = 3; temp = 1; scale = 1; end methods function outputs = forward(self, inputs, params) inputs{1} = inputs{1} / self.temp; order = 1:numel(size(inputs{1})); if self.dim~=3 order([3 self.dim]) = [self.dim 3]; inputs{1} = permute(inputs{1},order); end outputs{1} = vl_nnsoftmax(inputs{1}) ; if self.dim~=3 outputs{1} = permute(outputs{1},order) ; end end function [derInputs, derParams] = backward(self, inputs, params, derOutputs) inputs{1} = inputs{1} / self.temp; order = 1:numel(size(inputs{1})); if self.dim~=3 order(3) = self.dim; order(self.dim) = 3; inputs{1} = permute(inputs{1},order); derOutputs{1} = permute(derOutputs{1},order); end derInputs{1} = vl_nnsoftmax(inputs{1}, derOutputs{1}) ; if self.dim~=3 derInputs{1} = permute(derInputs{1},order) ; end derParams = {} ; end function obj = SoftMax2(varargin) obj.load(varargin) ; obj.dim = single(obj.dim); obj.temp = single(obj.temp); obj.scale = single(obj.scale); end end end ================================================ FILE: matlab/+dagnn/SumOverDim.m ================================================ classdef SumOverDim < dagnn.ElementWise % @author: Hakan Bilen % SumOverDim is the sum of the elements of inputs{1} over dimension dim properties dim = 3; end methods function outputs = forward(obj, inputs, params) outputs{1} = sum(inputs{1},obj.dim) ; end function [derInputs, derParams] = backward(obj, inputs, params, derOutputs) ndims = ones(1,numel(size(inputs{1}))); ndims(obj.dim) = size(inputs{1},obj.dim); derInputs{1} = repmat(derOutputs{1},ndims); derParams = {} ; end function outputSizes = getOutputSizes(obj, inputSizes) outputSizes{1} = inputSizes{1} ; outputSizes{1}(obj.dim) = 1; end function obj = SumOverDim(varargin) obj.load(varargin) ; obj.dim = obj.dim; end end end ================================================ FILE: matlab/+dagnn/Times.m ================================================ classdef Times < dagnn.ElementWise % @author: Hakan Bilen % Times (multiply) DagNN layer % The Times layer takes the multiplication of two inputs and store the result % as its only output. methods function outputs = forward(obj, inputs, params) if numel(inputs) ~= 2 error('Number of inputs is not 2'); end outputs{1} = inputs{1} .* inputs{2} ; end function [derInputs, derParams] = backward(obj, inputs, params, derOutputs) derInputs = cell(1,2) ; derInputs{1} = derOutputs{1} .* inputs{2} ; derInputs{2} = derOutputs{1} .* inputs{1} ; derParams = {} ; end function obj = Times(varargin) obj.load(varargin) ; end function rfs = getReceptiveFields(obj) rfs.size = [1 1] ; rfs.stride = [1 1] ; rfs.offset = [1 1] ; end function outputSizes = getOutputSizes(obj, inputSizes) outputSizes = inputSizes(1) ; end end end ================================================ FILE: pascal/nms.m ================================================ function pick = nms(boxes, overlap) % top = nms(boxes, overlap) % Non-maximum suppression. (FAST VERSION) % Greedily select high-scoring detections and skip detections % that are significantly covered by a previously selected % detection. % % NOTE: This is adapted from Pedro Felzenszwalb's version (nms.m), % but an inner loop has been eliminated to significantly speed it % up in the case of a large number of boxes % Copyright (C) 2011-12 by Tomasz Malisiewicz % All rights reserved. % % This file is part of the Exemplar-SVM library and is made % available under the terms of the MIT license (see COPYING file). % Project homepage: https://github.com/quantombone/exemplarsvm if isempty(boxes) pick = []; return; end x1 = boxes(:,1); y1 = boxes(:,2); x2 = boxes(:,3); y2 = boxes(:,4); if size(boxes,2)==4 s = ones(1,size(boxes,1)); else s = boxes(:,end); end area = (x2-x1+1) .* (y2-y1+1); [~, I] = sort(s); pick = s*0; counter = 1; while ~isempty(I) last = length(I); i = I(last); pick(counter) = i; counter = counter + 1; xx1 = max(x1(i), x1(I(1:last-1))); yy1 = max(y1(i), y1(I(1:last-1))); xx2 = min(x2(i), x2(I(1:last-1))); yy2 = min(y2(i), y2(I(1:last-1))); w = max(0.0, xx2-xx1+1); h = max(0.0, yy2-yy1+1); inter = w.*h; o = inter ./ (area(i) + area(I(1:last-1)) - inter); % I = I(find(o<=overlap)); I = I((o<=overlap)); end pick = pick(1:(counter-1)); ================================================ FILE: pascal/setup_voc07_eb.m ================================================ function imdb = setup_voc07_eb(varargin) % cnn_voc07_eb_setup_data Initialize PASCAL VOC2007 data with edge % boxes % Warning! boxes are in the format of ([y1 x1 y2 x2]) opts.dataDir = fullfile('data') ; opts.proposalDir = fullfile(opts.dataDir,'EB'); opts.loadTest = 1; opts = vl_argparse(opts, varargin) ; % ------------------------------------------------------------------------- % Load selective search win % ------------------------------------------------------------------------- %% Get selective search windows files = {'EdgeBoxesVOC2007trainval.mat', ... 'EdgeBoxesVOC2007test.mat'} ; if ~exist(opts.proposalDir, 'dir') mkdir(opts.proposalDir) ; end for i=1:numel(files) outPath = fullfile(opts.proposalDir, files{i}) ; if ~exist(outPath, 'file') url = sprintf('http://groups.inf.ed.ac.uk/hbilen-data/data/WSDDN/%s',files{i}) ; fprintf('Downloading %s to %s\n', url, outPath) ; urlwrite(url,outPath) ; end end if ~isempty(opts.proposalDir) t1 = load([opts.proposalDir,filesep,files{1}]); if opts.loadTest t2 = load([opts.proposalDir,filesep,files{2}]); ssw.id = [str2double(t1.images) str2double(t2.images)]; ssw.boxes = cat(2,t1.boxes,t2.boxes); ssw.boxScores = cat(2,t1.boxScores,t2.boxScores); else ssw.id = str2double(t1.images); ssw.boxes = t1.boxes; ssw.boxScores = t1.boxScores; end [~,si] = sort(ssw.id); ssw.id = ssw.id(si); ssw.boxes = ssw.boxes(si); ssw.boxScores = ssw.boxScores(si); end % ------------------------------------------------------------------------- % Load categories metadata % ------------------------------------------------------------------------- cats = {'aeroplane','bicycle','bird','boat','bottle','bus','car',... 'cat','chair','cow','diningtable','dog','horse','motorbike','person',... 'pottedplant','sheep','sofa','train','tvmonitor'}; if ~exist(opts.dataDir,'dir') error('wrong data folder!'); end % Download VOC Devkit and data if ~exist(fullfile(opts.dataDir,'VOCdevkit'),'dir') files = {'VOCtest_06-Nov-2007.tar',... 'VOCtrainval_06-Nov-2007.tar',... 'VOCdevkit_08-Jun-2007.tar'} ; for i=1:numel(files) if ~exist(fullfile(opts.dataDir, files{i}), 'file') outPath = fullfile(opts.dataDir,files{i}) ; url = sprintf('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/%s',files{i}) ; fprintf('Downloading %s to %s\n', url, outPath) ; urlwrite(url,outPath) ; untar(outPath,opts.dataDir); end end end addpath(fullfile(opts.dataDir, 'VOCdevkit', 'VOCcode')); traindata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','train.txt')); valdata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','val.txt')); testdata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','test.txt')); assert(numel(traindata)==2501); assert(numel(valdata)==2510); assert(numel(testdata)==4952); imdb.classes.name = cats ; imdb.classes.description = cats ; imdb.imageDir = fullfile(opts.dataDir, fullfile('VOCdevkit','VOC2007','JPEGImages')) ; % ------------------------------------------------------------------------- % Training images % -------------------------------------------------------------------------% names = cell(1,numel(traindata)); labels = zeros(numel(traindata),numel(cats)); % load image names for t=1:numel(traindata) names{t} = sprintf('%06d.jpg',traindata(t)); % data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t})); end % load binary labels for c=1:numel(cats) t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_train.txt'])); labels(:,c) = t(:,2); end imdb.images.id = traindata'; imdb.images.name = names ; imdb.images.set = ones(1, numel(names)) ; imdb.images.label = labels' ; % imdb.images.data = data; % ------------------------------------------------------------------------- % Validation images % ------------------------------------------------------------------------- names = cell(1,numel(valdata)); labels = zeros(numel(valdata),numel(cats)); % data = cell(1,numel(valdata)); % load image names for t=1:numel(valdata) names{t} = sprintf('%06d.jpg',valdata(t)); % data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t})); end % load binary labels for c=1:numel(cats) t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_val.txt'])); labels(:,c) = t(:,2); end imdb.images.id = horzcat(imdb.images.id, valdata') ; imdb.images.name = horzcat(imdb.images.name, names) ; imdb.images.set = horzcat(imdb.images.set, 2*ones(1,numel(names))) ; imdb.images.label = horzcat(imdb.images.label, labels') ; % imdb.images.data = horzcat(imdb.images.data, data) ; % % ------------------------------------------------------------------------- % % Test images % % ------------------------------------------------------------------------- % % if opts.loadTest names = cell(1,numel(testdata)); labels = zeros(numel(testdata),numel(cats)); % data = cell(1,numel(testdata)); % load image names for t=1:numel(testdata) names{t} = sprintf('%06d.jpg',testdata(t)); % data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t})); end % load binary labels for c=1:numel(cats) t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_test.txt'])); labels(:,c) = t(:,2); end imdb.images.id = horzcat(imdb.images.id, testdata') ; imdb.images.name = horzcat(imdb.images.name, names) ; imdb.images.set = horzcat(imdb.images.set, 3 * ones(1,numel(names))) ; imdb.images.label = horzcat(imdb.images.label, labels') ; % imdb.images.data = horzcat(imdb.images.data, data) ; end % ------------------------------------------------------------------------- % Postprocessing % ------------------------------------------------------------------------- [~,sorti] = sort(imdb.images.id); imdb.images.id = imdb.images.id(sorti); imdb.images.name = imdb.images.name(sorti) ; imdb.images.set = imdb.images.set(sorti) ; imdb.images.label = single(imdb.images.label(:,sorti)) ; imdb.images.size = zeros(numel(imdb.images.name),2); if ~isempty(opts.proposalDir) imdb.images.boxes = ssw.boxes; imdb.images.boxScores = ssw.boxScores; assert(all(ssw.id==imdb.images.id)); end % this is zero as scores of selective search windows are not much % informative if ~isempty(opts.proposalDir) % imdb.images.boxScores = cell(size(imdb.images.boxes)); for i=1:numel(imdb.images.boxes) imdb.images.boxes{i} = int16(imdb.images.boxes{i}); imdb.images.boxScores{i} = single(imdb.images.boxScores{i}); imf = imfinfo(fullfile(imdb.imageDir,imdb.images.name{i})); imdb.images.size(i,:) = [imf.Height,imf.Width]; maxBoxes = max(imdb.images.boxes{i}); if imdb.images.size(i,1)< max(maxBoxes([1,3])) error('Wrong box coordinates'); end if imdb.images.size(i,2)< max(maxBoxes([2,4])) error('Wrong box coordinates'); end end end end ================================================ FILE: pascal/setup_voc07_ssw.m ================================================ function imdb = setup_voc07_ssw(varargin) % setup_voc07_ssw Initialize PASCAL VOC2007 data with selective % search windows % Warning! boxes are in the format of ([y1 x1 y2 x2]) opts.dataDir = fullfile('data') ; opts.proposalDir = fullfile(opts.dataDir,'SSW'); opts.loadTest = 1; opts = vl_argparse(opts, varargin) ; % ------------------------------------------------------------------------- % Load selective search win % ------------------------------------------------------------------------- %% get selective search windows files = {'SelectiveSearchVOC2007trainval.mat', ... 'SelectiveSearchVOC2007test.mat'} ; if ~exist(opts.proposalDir, 'dir') mkdir(opts.proposalDir) ; end for i=1:numel(files) if ~exist(fullfile(opts.proposalDir, files{i}), 'file') url = sprintf('http://koen.me/research/downloads/%s',files{i}) ; fprintf('downloading %s\n', url) ; urlwrite(url,[opts.proposalDir filesep files{i}]); end end if ~isempty(opts.proposalDir) t1 = load([opts.proposalDir,filesep,files{1}]); if opts.loadTest t2 = load([opts.proposalDir,filesep,files{2}]); ssw.id = [str2double(t1.images);str2double(t2.images)]'; ssw.boxes = cat(2,t1.boxes,t2.boxes); else ssw.id = str2double(t1.images)'; ssw.boxes = t1.boxes; end [~,si] = sort(ssw.id); ssw.id = ssw.id(si); ssw.boxes = ssw.boxes(si); end % ------------------------------------------------------------------------- % Load categories metadata % ------------------------------------------------------------------------- cats = {'aeroplane','bicycle','bird','boat','bottle','bus','car',... 'cat','chair','cow','diningtable','dog','horse','motorbike','person',... 'pottedplant','sheep','sofa','train','tvmonitor'}; if ~exist(opts.dataDir,'dir') error('wrong data folder!'); end if ~exist(opts.dataDir,'dir') error('wrong data folder!'); end % Download VOC Devkit and data if ~exist(fullfile(opts.dataDir,'VOCdevkit'),'dir') files = {'VOCtest_06-Nov-2007.tar',... 'VOCtrainval_06-Nov-2007.tar',... 'VOCdevkit_08-Jun-2007.tar'} ; for i=1:numel(files) if ~exist(fullfile(opts.dataDir, files{i}), 'file') outPath = fullfile(opts.dataDir,files{i}) ; url = sprintf('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/%s',files{i}) ; fprintf('Downloading %s to %s\n', url, outPath) ; urlwrite(url,outPath) ; untar(outPath,opts.dataDir); end end end addpath(fullfile(opts.dataDir, 'VOCdevkit', 'VOCcode')); traindata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','train.txt')); valdata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','val.txt')); testdata = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main','test.txt')); assert(numel(traindata)==2501); assert(numel(valdata)==2510); assert(numel(testdata)==4952); imdb.classes.name = cats ; imdb.classes.description = cats ; imdb.imageDir = fullfile(opts.dataDir, fullfile('VOCdevkit','VOC2007','JPEGImages')) ; % ------------------------------------------------------------------------- % Training images % -------------------------------------------------------------------------% names = cell(1,numel(traindata)); labels = zeros(numel(traindata),numel(cats)); % load image names for t=1:numel(traindata) names{t} = sprintf('%06d.jpg',traindata(t)); % data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t})); end % load binary labels for c=1:numel(cats) t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_train.txt'])); labels(:,c) = t(:,2); end imdb.images.id = traindata'; imdb.images.name = names ; imdb.images.set = ones(1, numel(names)) ; imdb.images.label = labels' ; % imdb.images.data = data; % ------------------------------------------------------------------------- % Validation images % ------------------------------------------------------------------------- names = cell(1,numel(valdata)); labels = zeros(numel(valdata),numel(cats)); % data = cell(1,numel(valdata)); % load image names for t=1:numel(valdata) names{t} = sprintf('%06d.jpg',valdata(t)); % data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t})); end % load binary labels for c=1:numel(cats) t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_val.txt'])); labels(:,c) = t(:,2); end imdb.images.id = horzcat(imdb.images.id, valdata') ; imdb.images.name = horzcat(imdb.images.name, names) ; imdb.images.set = horzcat(imdb.images.set, 2*ones(1,numel(names))) ; imdb.images.label = horzcat(imdb.images.label, labels') ; % imdb.images.data = horzcat(imdb.images.data, data) ; % % ------------------------------------------------------------------------- % % Test images % % ------------------------------------------------------------------------- % % if opts.loadTest names = cell(1,numel(testdata)); labels = zeros(numel(testdata),numel(cats)); % data = cell(1,numel(testdata)); % load image names for t=1:numel(testdata) names{t} = sprintf('%06d.jpg',testdata(t)); % data{t} = imread(sprintf('%s/%s',imdb.imageDir,names{t})); end % load binary labels for c=1:numel(cats) t = importdata(fullfile(opts.dataDir,'VOCdevkit','VOC2007','ImageSets','Main',[cats{c},'_test.txt'])); labels(:,c) = t(:,2); end imdb.images.id = horzcat(imdb.images.id, testdata') ; imdb.images.name = horzcat(imdb.images.name, names) ; imdb.images.set = horzcat(imdb.images.set, 3 * ones(1,numel(names))) ; imdb.images.label = horzcat(imdb.images.label, labels') ; % imdb.images.data = horzcat(imdb.images.data, data) ; end % ------------------------------------------------------------------------- % Postprocessing % ------------------------------------------------------------------------- [~,sorti] = sort(imdb.images.id); imdb.images.id = imdb.images.id(sorti); imdb.images.name = imdb.images.name(sorti) ; imdb.images.set = imdb.images.set(sorti) ; imdb.images.label = single(imdb.images.label(:,sorti)) ; imdb.images.size = zeros(numel(imdb.images.name),2); if ~isempty(opts.proposalDir) imdb.images.boxes = ssw.boxes; assert(all(ssw.id==imdb.images.id)); end % this is zero as scores of selective search windows are not much % informative if ~isempty(opts.proposalDir) imdb.images.boxScores = cell(size(imdb.images.boxes)); for i=1:numel(imdb.images.boxes) imdb.images.boxes{i} = int16(imdb.images.boxes{i}); imdb.images.boxScores{i} = zeros(size(imdb.images.boxes{i},1),1,'single'); imf = imfinfo(fullfile(imdb.imageDir,imdb.images.name{i})); imdb.images.size(i,:) = [imf.Height,imf.Width]; end end end ================================================ FILE: pascal/wsddnVOCap.m ================================================ function ap = wsddnVOCap(rec,prec) % From the PASCAL VOC 2011 devkit mrec=[0 ; rec ; 1]; mpre=[0 ; prec ; 0]; for i=numel(mpre)-1:-1:1 mpre(i)=max(mpre(i),mpre(i+1)); end i=find(mrec(2:end)~=mrec(1:end-1))+1; ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); ================================================ FILE: pascal/wsddnVOCevaldet.m ================================================ function [rec,prec,ap] = wsddnVOCevaldet(VOCopts,cls,res,draw) % load test set tic; VOCopts.annocachepath=[VOCopts.localdir '%s_anno_cache.mat']; cp=sprintf(VOCopts.annocachepath,VOCopts.testset); if exist(cp,'file') fprintf('%s: pr: loading ground truth\n',cls); load(cp,'gtids','recs'); else [gtids,t]=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s %d'); for i=1:length(gtids) % display progress if toc>1 fprintf('%s: pr: load: %d/%d\n',cls,i,length(gtids)); drawnow; tic; end % read annotation recs(i)=PASreadrecord(sprintf(VOCopts.annopath,gtids{i})); end save(cp,'gtids','recs'); end fprintf('%s: pr: evaluating detections\n',cls); % hash image ids hash=wsddnVOChash_init(gtids); % extract ground truth objects npos=0; gt(length(gtids))=struct('BB',[],'diff',[],'det',[]); for i=1:length(gtids) % extract objects of class clsinds=strmatch(cls,{recs(i).objects(:).class},'exact'); gt(i).BB=cat(1,recs(i).objects(clsinds).bbox)'; gt(i).diff=[recs(i).objects(clsinds).difficult]; gt(i).det=false(length(clsinds),1); npos=npos+sum(~gt(i).diff); end % load results ids = res.ids; confidence = res.confidence; BB = res.bbox'; % sort detections by decreasing confidence [sc,si]=sort(-confidence); ids=ids(si); BB=BB(:,si); % assign detections to ground truth objects nd=length(confidence); tp=zeros(nd,1); fp=zeros(nd,1); tic; for d=1:nd % display progress if toc>1 fprintf('%s: pr: compute: %d/%d\n',cls,d,nd); drawnow; tic; end % find ground truth image i=wsddnVOChash_lookup(hash,ids{d}); if isempty(i) error('unrecognized image "%s"',ids{d}); elseif length(i)>1 error('multiple image "%s"',ids{d}); end % assign detection to ground truth object if any bb=BB(:,d); ovmax=-inf; for j=1:size(gt(i).BB,2) bbgt=gt(i).BB(:,j); bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))]; iw=bi(3)-bi(1)+1; ih=bi(4)-bi(2)+1; if iw>0 & ih>0 % compute overlap as area of intersection / area of union ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+... (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-... iw*ih; ov=iw*ih/ua; if ov>ovmax ovmax=ov; jmax=j; end end end % assign detection as true positive/don't care/false positive if ovmax>=VOCopts.minoverlap if ~gt(i).diff(jmax) if ~gt(i).det(jmax) tp(d)=1; % true positive gt(i).det(jmax)=true; else fp(d)=1; % false positive (multiple detection) end end else fp(d)=1; % false positive end end % compute precision/recall fp=cumsum(fp); tp=cumsum(tp); rec=tp/npos; prec=tp./(fp+tp); ap=wsddnVOCap(rec,prec); if draw % plot precision/recall plot(rec,prec,'-'); grid; xlabel 'recall' ylabel 'precision' title(sprintf('class: %s, subset: %s, AP = %.3f',cls,VOCopts.testset,ap)); end ================================================ FILE: pascal/wsddnVOChash_init.m ================================================ function hash = wsddnVOChash_init(strs) % From the PASCAL VOC 2011 devkit hsize=4999; hash.key=cell(hsize,1); hash.val=cell(hsize,1); for i=1:numel(strs) s=strs{i}; h=mod(str2double(s([4 6:end])),hsize)+1; j=numel(hash.key{h})+1; hash.key{h}{j}=strs{i}; hash.val{h}(j)=i; end ================================================ FILE: pascal/wsddnVOChash_lookup.m ================================================ function ind = wsddnVOChash_lookup(hash,s) % From the PASCAL VOC 2011 devkit hsize=numel(hash.key); h=mod(str2double(s([4 6:end])),hsize)+1; ind=hash.val{h}(strmatch(s,hash.key{h},'exact')); ================================================ FILE: setup_WSDDN.m ================================================ function setup_WSDDN() %SETUP_WSDDN Sets up WSDDN, by adding its folders to the Matlab path root = fileparts(mfilename('fullpath')) ; addpath(root, [root '/matlab'], [root '/pascal'], [root '/core']) ; addpath([vl_rootnn '/examples/']) ; addpath([vl_rootnn '/examples/imagenet/']) ;