Repository: daizuozhuo/batch-feature-erasing-network Branch: master Commit: 21c99abb8d85 Files: 26 Total size: 77.7 KB Directory structure: gitextract_91jrirfa/ ├── .gitignore ├── LICENSE ├── README.md ├── config.py ├── datasets/ │ ├── __init__.py │ ├── data_loader.py │ ├── data_manager.py │ └── samplers.py ├── main_reid.py ├── models/ │ ├── __init__.py │ ├── networks.py │ └── resnet.py ├── requirements.txt ├── trainers/ │ ├── __init__.py │ ├── evaluator.py │ ├── re_ranking.py │ └── trainer.py └── utils/ ├── DistWeightDevianceLoss.py ├── LiftedStructure.py ├── __init__.py ├── loss.py ├── meters.py ├── random_erasing.py ├── serialization.py ├── transforms.py └── validation_metrics.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .idea **__pycache__ .DS_Store data/ pytorch-ckpt/ .vscode/ ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2018 Dai Zuozhuo Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Batch DropBlock Network for Person Re-identification and Beyond Official source code of paper https://arxiv.org/abs/1811.07130 ## Update on 2019.3.15 Update CUHK03 results. ## Update on 2019.1.29 Traning scripts are released. The best Markt1501 result is 95.3%! Please look at the training section of README.md. ## Update on 2019.1.23 In-Shop Clothes Retrieval dataset and pretrained model are released!. The rank-1 result is 89.5 which is a litter bit higher than paper reported. ## This paper is accepted by ICCV 2019. Please cite if you use this code in your research. ``` @article{dai2018batch, title={Batch DropBlock Network for Person Re-identification and Beyond}, author={Dai, Zuozhuo and Chen, Mingqiang and Gu, Xiaodong and Zhu, Siyu and Tan, Ping}, journal={arXiv preprint arXiv:1811.07130}, year={2018} } ``` ## Setup running environment This project requires python3, cython, torch, torchvision, scikit-learn, tensorboardX, fire. The baseline source code is borrowed from https://github.com/L1aoXingyu/reid_baseline. ## Prepare dataset Create a directory to store reid datasets under this repo via ```bash cd reid mkdir data ``` For market1501 dataset, 1. Download Market1501 dataset to `data/` from http://www.liangzheng.org/Project/project_reid.html 2. Extract dataset and rename to `market1501`. The data structure would like: ``` market1501/ bounding_box_test/ bounding_box_train/ query/ ``` For CUHK03 dataset, 1. Download CUHK03-NP dataset from https://github.com/zhunzhong07/person-re-ranking/tree/master/CUHK03-NP 2. Extract dataset and rename folers inside it to cuhk-detect and cuhk-label. For DukeMTMC-reID dataset, Dowload from https://github.com/layumi/DukeMTMC-reID_evaluation For In-Shop Clothes dataset, 1. Downlaod clothes dataset from http://virutalbuy-public.oss-cn-hangzhou.aliyuncs.com/share/bfe_models/clothes.tar 2. Extract dataset and put it to `data/` folder. ## Results Dataset | CUHK03-Label | CUHK03-Detect | DukeMTMC re-ID | Market1501 | In-Shop Clothes| --------|--------------|---------------|-----------------|------------|----------------| Rank-1 | 79.4 | 76.4 | 88.9 | 95.3 |89.5 | mAP | 76.7 | 73.5 | 75.9 | 86.2 |72.3 | model | [aliyun](http://virutalbuy-public.oss-cn-hangzhou.aliyuncs.com/share/bfe_models/cuhk-label-794.pth.tar)| [aliyun](http://virutalbuy-public.oss-cn-hangzhou.aliyuncs.com/share/bfe_models/cuhk-detect-764.pth.tar)] | [aliyun](http://virutalbuy-public.oss-cn-hangzhou.aliyuncs.com/share/bfe_models/duke_887.pth.tar) | [aliyun](http://virutalbuy-public.oss-cn-hangzhou.aliyuncs.com/share/bfe_models/market_953.pth.tar)|[aliyun](http://virutalbuy-public.oss-cn-hangzhou.aliyuncs.com/share/bfe_models/clothes_895.pth.tar) You can download the pre-trained models from the above table and evaluate on person re-ID datasets. For example, to evaluate CUHK03-Label dataset, you can download the model to './pytorch-ckpt/cuhk_label_bfe' directory and run the following commands. ### Evaluate Market1501 ```bash python3 main_reid.py train --save_dir='./pytorch-ckpt/market_bfe' --model_name=bfe --train_batch=32 --test_batch=32 --dataset=market1501 --pretrained_model='./pytorch-ckpt/market_bfe/944.pth.tar' --evaluate ``` ### Evaluate CUHK03-Label ```bash python3 main_reid.py train --save_dir='./pytorch-ckpt/cuhk_label_bfe' --model_name=bfe --train_batch=32 --test_batch=32 --dataset=cuhk-label --pretrained_model='./pytorch-ckpt/cuhk_label_bfe/750.pth.tar' --evaluate ``` ### Evaluate In-Shop clothes ```bash python main_reid.py train --save_dir='./pytorch-ckpt/clothes_bfe' --model_name=bfe --pretrained_model='./pytorch-ckpt/clothes_bfe/clothes_895.pth.tar' --test_batch=32 --dataset=clothes --evaluate ``` ## Training ### Traning Market1501 ```bash python main_reid.py train --save_dir='./pytorch-ckpt/market-bfe' --max_epoch=400 --eval_step=30 --dataset=market1501 --test_batch=128 --train_batch=128 --optim=adam --adjust_lr ``` This traning command is tested on 4 GTX1080 gpus. Here is [training log](http://virutalbuy-public.oss-cn-hangzhou.aliyuncs.com/share/bfe_models/market_953.txt). You shoud get a result around 95%. ================================================ FILE: config.py ================================================ # encoding: utf-8 import warnings import numpy as np class DefaultConfig(object): seed = 0 # dataset options dataset = 'market1501' datatype = 'person' mode = 'retrieval' # optimization options loss = 'triplet' optim = 'adam' max_epoch = 60 train_batch = 32 test_batch = 32 adjust_lr = False lr = 0.0001 adjust_lr = False gamma = 0.1 weight_decay = 5e-4 momentum = 0.9 random_crop = False margin = None num_instances = 4 num_gpu = 1 evaluate = False savefig = None re_ranking = False # model options model_name = 'bfe' # triplet, softmax_triplet, bfe, ide last_stride = 1 pretrained_model = None # miscs print_freq = 30 eval_step = 50 save_dir = './pytorch-ckpt/market' workers = 10 start_epoch = 0 best_rank = -np.inf def _parse(self, kwargs): for k, v in kwargs.items(): if not hasattr(self, k): warnings.warn("Warning: opt has not attribut %s" % k) setattr(self, k, v) if 'cls' in self.dataset: self.mode='class' if 'market' in self.dataset or 'cuhk' in self.dataset or 'duke' in self.dataset: self.datatype = 'person' elif 'cub' in self.dataset: self.datatype = 'cub' elif 'car' in self.dataset: self.datatype = 'car' elif 'clothes' in self.dataset: self.datatype = 'clothes' elif 'product' in self.dataset: self.datatype = 'product' def _state_dict(self): return {k: getattr(self, k) for k, _ in DefaultConfig.__dict__.items() if not k.startswith('_')} opt = DefaultConfig() ================================================ FILE: datasets/__init__.py ================================================ ================================================ FILE: datasets/data_loader.py ================================================ from __future__ import print_function, absolute_import from PIL import Image from torch.utils.data import Dataset def read_image(img_path): """Keep reading image until succeed. This can avoid IOError incurred by heavy IO process.""" got_img = False while not got_img: try: img = Image.open(img_path).convert('RGB') got_img = True except IOError: print("IOError incurred when reading '{}'. Will redo. Don't worry. Just chill.".format(img_path)) pass return img class ImageData(Dataset): def __init__(self, dataset, transform): self.dataset = dataset self.transform = transform def __getitem__(self, item): img, pid, camid = self.dataset[item] img = read_image(img) if self.transform is not None: img = self.transform(img) return img, pid, camid def __len__(self): return len(self.dataset) ================================================ FILE: datasets/data_manager.py ================================================ from __future__ import print_function, absolute_import import glob import re from os import path as osp import os """Dataset classes""" class Market1501(object): """ Market1501 Reference: Zheng et al. Scalable Person Re-identification: A Benchmark. ICCV 2015. URL: http://www.liangzheng.org/Project/project_reid.html Dataset statistics: # identities: 1501 (+1 for background) # images: 12936 (train) + 3368 (query) + 15913 (gallery) """ def __init__(self, dataset_dir, mode, root='data'): self.dataset_dir = dataset_dir self.dataset_dir = osp.join(root, self.dataset_dir) self.train_dir = osp.join(self.dataset_dir, 'bounding_box_train') self.query_dir = osp.join(self.dataset_dir, 'query') self.gallery_dir = osp.join(self.dataset_dir, 'bounding_box_test') self._check_before_run() train_relabel = (mode == 'retrieval') train, num_train_pids, num_train_imgs = self._process_dir(self.train_dir, relabel=train_relabel) query, num_query_pids, num_query_imgs = self._process_dir(self.query_dir, relabel=False) gallery, num_gallery_pids, num_gallery_imgs = self._process_dir(self.gallery_dir, relabel=False) num_total_pids = num_train_pids + num_query_pids num_total_imgs = num_train_imgs + num_query_imgs + num_gallery_imgs print("=> Market1501 loaded") print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # images") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_imgs)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_imgs)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_imgs)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_imgs)) print(" ------------------------------") self.train = train self.query = query self.gallery = gallery self.num_train_pids = num_train_pids self.num_query_pids = num_query_pids self.num_gallery_pids = num_gallery_pids def _check_before_run(self): """Check if all files are available before going deeper""" if not osp.exists(self.dataset_dir): raise RuntimeError("'{}' is not available".format(self.dataset_dir)) if not osp.exists(self.train_dir): raise RuntimeError("'{}' is not available".format(self.train_dir)) if not osp.exists(self.query_dir): raise RuntimeError("'{}' is not available".format(self.query_dir)) if not osp.exists(self.gallery_dir): raise RuntimeError("'{}' is not available".format(self.gallery_dir)) def _process_dir(self, dir_path, relabel=False): img_names = os.listdir(dir_path) img_paths = [os.path.join(dir_path, img_name) for img_name in img_names \ if img_name.endswith('jpg') or img_name.endswith('png')] pattern = re.compile(r'([-\d]+)_c([-\d]+)') pid_container = set() for img_path in img_paths: pid, _ = map(int, pattern.search(img_path).groups()) if pid == -1: continue # junk images are just ignored pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} dataset = [] for img_path in img_paths: pid, camid = map(int, pattern.search(img_path).groups()) if pid == -1: continue # junk images are just ignored #assert 0 <= pid <= 1501 # pid == 0 means background #assert 1 <= camid <= 6 camid -= 1 # index starts from 0 if relabel: pid = pid2label[pid] dataset.append((img_path, pid, camid)) num_pids = len(pid_container) num_imgs = len(dataset) return dataset, num_pids, num_imgs def init_dataset(name, mode): return Market1501(name, mode) ================================================ FILE: datasets/samplers.py ================================================ from __future__ import absolute_import from collections import defaultdict import numpy as np import torch import random from torch.utils.data.sampler import Sampler class RandomIdentitySampler(Sampler): def __init__(self, data_source, num_instances=4): self.data_source = data_source self.num_instances = num_instances self.index_dic = defaultdict(list) for index, (_, pid, _) in enumerate(data_source): self.index_dic[pid].append(index) self.pids = list(self.index_dic.keys()) self.num_identities = len(self.pids) def __iter__(self): indices = torch.randperm(self.num_identities) ret = [] for i in indices: pid = self.pids[i] t = self.index_dic[pid] replace = False if len(t) >= self.num_instances else True t = np.random.choice(t, size=self.num_instances, replace=replace) ret.extend(t) return iter(ret) def __len__(self): return self.num_identities * self.num_instances ================================================ FILE: main_reid.py ================================================ # encoding: utf-8 import os import sys from os import path as osp from pprint import pprint import numpy as np import torch from tensorboardX import SummaryWriter from torch import nn from torch.backends import cudnn from torch.utils.data import DataLoader from config import opt from datasets import data_manager from datasets.data_loader import ImageData from datasets.samplers import RandomIdentitySampler from models.networks import ResNetBuilder, IDE, Resnet, BFE from trainers.evaluator import ResNetEvaluator from trainers.trainer import cls_tripletTrainer from utils.loss import CrossEntropyLabelSmooth, TripletLoss, Margin from utils.LiftedStructure import LiftedStructureLoss from utils.DistWeightDevianceLoss import DistWeightBinDevianceLoss from utils.serialization import Logger, save_checkpoint from utils.transforms import TestTransform, TrainTransform def train(**kwargs): opt._parse(kwargs) # set random seed and cudnn benchmark torch.manual_seed(opt.seed) os.makedirs(opt.save_dir, exist_ok=True) use_gpu = torch.cuda.is_available() sys.stdout = Logger(osp.join(opt.save_dir, 'log_train.txt')) print('=========user config==========') pprint(opt._state_dict()) print('============end===============') if use_gpu: print('currently using GPU') cudnn.benchmark = True torch.cuda.manual_seed_all(opt.seed) else: print('currently using cpu') print('initializing dataset {}'.format(opt.dataset)) dataset = data_manager.init_dataset(name=opt.dataset, mode=opt.mode) pin_memory = True if use_gpu else False summary_writer = SummaryWriter(osp.join(opt.save_dir, 'tensorboard_log')) trainloader = DataLoader( ImageData(dataset.train, TrainTransform(opt.datatype)), sampler=RandomIdentitySampler(dataset.train, opt.num_instances), batch_size=opt.train_batch, num_workers=opt.workers, pin_memory=pin_memory, drop_last=True ) queryloader = DataLoader( ImageData(dataset.query, TestTransform(opt.datatype)), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory ) galleryloader = DataLoader( ImageData(dataset.gallery, TestTransform(opt.datatype)), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory ) queryFliploader = DataLoader( ImageData(dataset.query, TestTransform(opt.datatype, True)), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory ) galleryFliploader = DataLoader( ImageData(dataset.gallery, TestTransform(opt.datatype, True)), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory ) print('initializing model ...') if opt.model_name == 'softmax' or opt.model_name == 'softmax_triplet': model = ResNetBuilder(dataset.num_train_pids, 1, True) elif opt.model_name == 'triplet': model = ResNetBuilder(None, 1, True) elif opt.model_name == 'bfe': if opt.datatype == "person": model = BFE(dataset.num_train_pids, 1.0, 0.33) else: model = BFE(dataset.num_train_pids, 0.5, 0.5) elif opt.model_name == 'ide': model = IDE(dataset.num_train_pids) elif opt.model_name == 'resnet': model = Resnet(dataset.num_train_pids) optim_policy = model.get_optim_policy() if opt.pretrained_model: state_dict = torch.load(opt.pretrained_model)['state_dict'] #state_dict = {k: v for k, v in state_dict.items() \ # if not ('reduction' in k or 'softmax' in k)} model.load_state_dict(state_dict, False) print('load pretrained model ' + opt.pretrained_model) print('model size: {:.5f}M'.format(sum(p.numel() for p in model.parameters()) / 1e6)) if use_gpu: model = nn.DataParallel(model).cuda() reid_evaluator = ResNetEvaluator(model) if opt.evaluate: reid_evaluator.evaluate(queryloader, galleryloader, queryFliploader, galleryFliploader, re_ranking=opt.re_ranking, savefig=opt.savefig) return #xent_criterion = nn.CrossEntropyLoss() xent_criterion = CrossEntropyLabelSmooth(dataset.num_train_pids) if opt.loss == 'triplet': embedding_criterion = TripletLoss(opt.margin) elif opt.loss == 'lifted': embedding_criterion = LiftedStructureLoss(hard_mining=True) elif opt.loss == 'weight': embedding_criterion = Margin() def criterion(triplet_y, softmax_y, labels): losses = [embedding_criterion(output, labels)[0] for output in triplet_y] + \ [xent_criterion(output, labels) for output in softmax_y] loss = sum(losses) return loss # get optimizer if opt.optim == "sgd": optimizer = torch.optim.SGD(optim_policy, lr=opt.lr, momentum=0.9, weight_decay=opt.weight_decay) else: optimizer = torch.optim.Adam(optim_policy, lr=opt.lr, weight_decay=opt.weight_decay) start_epoch = opt.start_epoch # get trainer and evaluator reid_trainer = cls_tripletTrainer(opt, model, optimizer, criterion, summary_writer) def adjust_lr(optimizer, ep): if ep < 50: lr = 1e-4*(ep//5+1) elif ep < 200: lr = 1e-3 elif ep < 300: lr = 1e-4 else: lr = 1e-5 for p in optimizer.param_groups: p['lr'] = lr # start training best_rank1 = opt.best_rank best_epoch = 0 for epoch in range(start_epoch, opt.max_epoch): if opt.adjust_lr: adjust_lr(optimizer, epoch + 1) reid_trainer.train(epoch, trainloader) # skip if not save model if opt.eval_step > 0 and (epoch + 1) % opt.eval_step == 0 or (epoch + 1) == opt.max_epoch: if opt.mode == 'class': rank1 = test(model, queryloader) else: rank1 = reid_evaluator.evaluate(queryloader, galleryloader, queryFliploader, galleryFliploader) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({'state_dict': state_dict, 'epoch': epoch + 1}, is_best=is_best, save_dir=opt.save_dir, filename='checkpoint_ep' + str(epoch + 1) + '.pth.tar') print('Best rank-1 {:.1%}, achived at epoch {}'.format(best_rank1, best_epoch)) def test(model, queryloader): model.eval() correct = 0 with torch.no_grad(): for data, target, _ in queryloader: output = model(data).cpu() # get the index of the max log-probability pred = output.max(1, keepdim=True)[1] correct += pred.eq(target.view_as(pred)).sum().item() rank1 = 100. * correct / len(queryloader.dataset) print('\nTest set: Accuracy: {}/{} ({:.2f}%)\n'.format(correct, len(queryloader.dataset), rank1)) return rank1 if __name__ == '__main__': import fire fire.Fire() ================================================ FILE: models/__init__.py ================================================ ================================================ FILE: models/networks.py ================================================ # encoding: utf-8 import copy import itertools import numpy as np import torch import torch.nn.functional as F import torch.utils.model_zoo as model_zoo import random from scipy.spatial.distance import cdist from sklearn.preprocessing import normalize from torch import nn, optim from torch.utils.data import dataloader from torchvision import transforms from torchvision.models.resnet import Bottleneck, resnet50 from torchvision.transforms import functional from .resnet import ResNet def weights_init_kaiming(m): classname = m.__class__.__name__ if classname.find('Linear') != -1: nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') nn.init.constant_(m.bias, 0.0) elif classname.find('Conv') != -1: nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') if m.bias is not None: nn.init.constant_(m.bias, 0.0) elif classname.find('BatchNorm') != -1: if m.affine: nn.init.normal_(m.weight, 1.0, 0.02) nn.init.constant_(m.bias, 0.0) def weights_init_classifier(m): classname = m.__class__.__name__ if classname.find('Linear') != -1: nn.init.normal_(m.weight, std=0.001) if m.bias: nn.init.constant_(m.bias, 0.0) class SELayer(nn.Module): def __init__(self, channel, reduction=16): super(SELayer, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Sequential( nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), nn.Linear(channel // reduction, channel), nn.Sigmoid() ) def forward(self, x): b, c, _, _ = x.size() y = self.avg_pool(x).view(b, c) y = self.fc(y).view(b, c, 1, 1) return x * y class BatchDrop(nn.Module): def __init__(self, h_ratio, w_ratio): super(BatchDrop, self).__init__() self.h_ratio = h_ratio self.w_ratio = w_ratio def forward(self, x): if self.training: h, w = x.size()[-2:] rh = round(self.h_ratio * h) rw = round(self.w_ratio * w) sx = random.randint(0, h-rh) sy = random.randint(0, w-rw) mask = x.new_ones(x.size()) mask[:, :, sx:sx+rh, sy:sy+rw] = 0 x = x * mask return x class BatchCrop(nn.Module): def __init__(self, ratio): super(BatchCrop, self).__init__() self.ratio = ratio def forward(self, x): if self.training: h, w = x.size()[-2:] rw = int(self.ratio * w) start = random.randint(0, h-1) if start + rw > h: select = list(range(0, start+rw-h)) + list(range(start, h)) else: select = list(range(start, start+rw)) mask = x.new_zeros(x.size()) mask[:, :, select, :] = 1 x = x * mask return x class ResNetBuilder(nn.Module): in_planes = 2048 def __init__(self, num_classes=None, last_stride=1, pretrained=False): super().__init__() self.base = ResNet(last_stride) if pretrained: model_url = 'https://download.pytorch.org/models/resnet50-19c8e357.pth' self.base.load_param(model_zoo.load_url(model_url)) self.num_classes = num_classes if num_classes is not None: self.bottleneck = nn.Sequential( nn.Linear(self.in_planes, 512), nn.BatchNorm1d(512), nn.LeakyReLU(0.1), nn.Dropout(p=0.5) ) self.bottleneck.apply(weights_init_kaiming) self.classifier = nn.Linear(512, self.num_classes) self.classifier.apply(weights_init_classifier) def forward(self, x): global_feat = self.base(x) global_feat = F.avg_pool2d(global_feat, global_feat.shape[2:]) # (b, 2048, 1, 1) global_feat = global_feat.view(global_feat.shape[0], -1) if self.training and self.num_classes is not None: feat = self.bottleneck(global_feat) cls_score = self.classifier(feat) return [global_feat], [cls_score] else: return global_feat def get_optim_policy(self): base_param_group = self.base.parameters() if self.num_classes is not None: add_param_group = itertools.chain(self.bottleneck.parameters(), self.classifier.parameters()) return [ {'params': base_param_group}, {'params': add_param_group} ] else: return [ {'params': base_param_group} ] class BFE(nn.Module): def __init__(self, num_classes, width_ratio=0.5, height_ratio=0.5): super(BFE, self).__init__() resnet = resnet50(pretrained=True) self.backbone = nn.Sequential( resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1, # res_conv2 resnet.layer2, # res_conv3 resnet.layer3, # res_conv4 ) self.res_part = nn.Sequential( Bottleneck(1024, 512, stride=1, downsample=nn.Sequential( nn.Conv2d(1024, 2048, kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(2048), )), Bottleneck(2048, 512), Bottleneck(2048, 512), ) self.res_part.load_state_dict(resnet.layer4.state_dict()) reduction = nn.Sequential( nn.Conv2d(2048, 512, 1), nn.BatchNorm2d(512), nn.ReLU() ) # global branch self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.global_softmax = nn.Linear(512, num_classes) self.global_softmax.apply(weights_init_kaiming) self.global_reduction = copy.deepcopy(reduction) self.global_reduction.apply(weights_init_kaiming) # part branch self.res_part2 = Bottleneck(2048, 512) self.part_maxpool = nn.AdaptiveMaxPool2d((1,1)) self.batch_crop = BatchDrop(height_ratio, width_ratio) self.reduction = nn.Sequential( nn.Linear(2048, 1024, 1), nn.BatchNorm1d(1024), nn.ReLU() ) self.reduction.apply(weights_init_kaiming) self.softmax = nn.Linear(1024, num_classes) self.softmax.apply(weights_init_kaiming) def forward(self, x): """ :param x: input image tensor of (N, C, H, W) :return: (prediction, triplet_losses, softmax_losses) """ x = self.backbone(x) x = self.res_part(x) predict = [] triplet_features = [] softmax_features = [] #global branch glob = self.global_avgpool(x) global_triplet_feature = self.global_reduction(glob).squeeze() global_softmax_class = self.global_softmax(global_triplet_feature) softmax_features.append(global_softmax_class) triplet_features.append(global_triplet_feature) predict.append(global_triplet_feature) #part branch x = self.res_part2(x) x = self.batch_crop(x) triplet_feature = self.part_maxpool(x).squeeze() feature = self.reduction(triplet_feature) softmax_feature = self.softmax(feature) triplet_features.append(feature) softmax_features.append(softmax_feature) predict.append(feature) if self.training: return triplet_features, softmax_features else: return torch.cat(predict, 1) def get_optim_policy(self): params = [ {'params': self.backbone.parameters()}, {'params': self.res_part.parameters()}, {'params': self.global_reduction.parameters()}, {'params': self.global_softmax.parameters()}, {'params': self.res_part2.parameters()}, {'params': self.reduction.parameters()}, {'params': self.softmax.parameters()}, ] return params class Resnet(nn.Module): def __init__(self, num_classes, resnet=None): super(Resnet, self).__init__() if not resnet: resnet = resnet50(pretrained=True) self.backbone = nn.Sequential( resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1, # res_conv2 resnet.layer2, # res_conv3 resnet.layer3, # res_conv4 resnet.layer4 ) self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.softmax = nn.Linear(2048, num_classes) def forward(self, x): """ :param x: input image tensor of (N, C, H, W) :return: (prediction, triplet_losses, softmax_losses) """ x = self.backbone(x) x = self.global_avgpool(x).squeeze() feature = self.softmax(x) if self.training: return [], [feature] else: return feature def get_optim_policy(self): return self.parameters() class IDE(nn.Module): def __init__(self, num_classes, resnet=None): super(IDE, self).__init__() if not resnet: resnet = resnet50(pretrained=True) self.backbone = nn.Sequential( resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1, # res_conv2 resnet.layer2, # res_conv3 resnet.layer3, # res_conv4 resnet.layer4 ) self.global_avgpool = nn.AvgPool2d(kernel_size=(12, 4)) def forward(self, x): """ :param x: input image tensor of (N, C, H, W) :return: (prediction, triplet_losses, softmax_losses) """ x = self.backbone(x) feature = self.global_avgpool(x).squeeze() if self.training: return [feature], [] else: return feature def get_optim_policy(self): return self.parameters() ================================================ FILE: models/resnet.py ================================================ # encoding: utf-8 import math import torch as th import torch from torch import nn class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * 4) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class CBAM_Module(nn.Module): def __init__(self, channels, reduction): super(CBAM_Module, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) self.max_pool = nn.AdaptiveMaxPool2d(1) self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, padding=0) self.relu = nn.ReLU(inplace=True) self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, padding=0) self.sigmoid_channel = nn.Sigmoid() self.conv_after_concat = nn.Conv2d(2, 1, kernel_size = 3, stride=1, padding = 1) self.sigmoid_spatial = nn.Sigmoid() def forward(self, x): #channel attention module_input = x avg = self.avg_pool(x) mx = self.max_pool(x) avg = self.fc1(avg) mx = self.fc1(mx) avg = self.relu(avg) mx = self.relu(mx) avg = self.fc2(avg) mx = self.fc2(mx) x = avg + mx x = self.sigmoid_channel(x) x = module_input * x #spatial attention module_input = x avg = torch.mean(x, 1, True) mx, _ = torch.max(x, 1, True) x = torch.cat((avg, mx), 1) x = self.conv_after_concat(x) x = self.sigmoid_spatial(x) x = module_input * x return x class CBAMBottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(CBAMBottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * 4) self.relu = nn.ReLU(inplace=True) self.cbam = CBAM_Module(planes * 4, reduction=16) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out = self.cbam(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out def cbam_resnet50(): return ResNet(last_stride=1, block=CBAMBottleneck) class ResNet(nn.Module): def __init__(self, last_stride=2, block=Bottleneck, layers=[3, 4, 6, 3]): self.inplanes = 64 super().__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer( block, 512, layers[3], stride=last_stride) def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) return x def load_param(self, param_dict): for i in param_dict: if 'fc' in i: continue self.state_dict()[i].copy_(param_dict[i]) def random_init(self): for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() if __name__ == "__main__": net = ResNet(last_stride=2) import torch x = net(torch.zeros(1, 3, 256, 128)) print(x.shape) ================================================ FILE: requirements.txt ================================================ tensorboardX fire ================================================ FILE: trainers/__init__.py ================================================ ================================================ FILE: trainers/evaluator.py ================================================ # encoding: utf-8 import numpy as np import os import torch from PIL import Image import matplotlib.pyplot as plt from trainers.re_ranking import re_ranking as re_ranking_func class ResNetEvaluator: def __init__(self, model): self.model = model def save_incorrect_pairs(self, distmat, queryloader, galleryloader, g_pids, q_pids, g_camids, q_camids, savefig): os.makedirs(savefig, exist_ok=True) self.model.eval() m = distmat.shape[0] indices = np.argsort(distmat, axis=1) for i in range(m): for j in range(10): index = indices[i][j] if g_camids[index] == q_camids[i] and g_pids[index] == q_pids[i]: continue else: break if g_pids[index] == q_pids[i]: continue fig, axes =plt.subplots(1, 11, figsize=(12, 8)) img = queryloader.dataset.dataset[i][0] img = Image.open(img).convert('RGB') axes[0].set_title(q_pids[i]) axes[0].imshow(img) axes[0].set_axis_off() for j in range(10): gallery_index = indices[i][j] img = galleryloader.dataset.dataset[gallery_index][0] img = Image.open(img).convert('RGB') axes[j+1].set_title(g_pids[gallery_index]) axes[j+1].set_axis_off() axes[j+1].imshow(img) fig.savefig(os.path.join(savefig, '%d.png' %q_pids[i])) plt.close(fig) def evaluate(self, queryloader, galleryloader, queryFliploader, galleryFliploader, ranks=[1, 2, 4, 5,8, 10, 16, 20], eval_flip=False, re_ranking=False, savefig=False): self.model.eval() qf, q_pids, q_camids = [], [], [] for inputs0, inputs1 in zip(queryloader, queryFliploader): inputs, pids, camids = self._parse_data(inputs0) feature0 = self._forward(inputs) if eval_flip: inputs, pids, camids = self._parse_data(inputs1) feature1 = self._forward(inputs) qf.append((feature0 + feature1) / 2.0) else: qf.append(feature0) q_pids.extend(pids) q_camids.extend(camids) qf = torch.cat(qf, 0) q_pids = torch.Tensor(q_pids) q_camids = torch.Tensor(q_camids) print("Extracted features for query set: {} x {}".format(qf.size(0), qf.size(1))) gf, g_pids, g_camids = [], [], [] for inputs0, inputs1 in zip(galleryloader, galleryFliploader): inputs, pids, camids = self._parse_data(inputs0) feature0 = self._forward(inputs) if eval_flip: inputs, pids, camids = self._parse_data(inputs1) feature1 = self._forward(inputs) gf.append((feature0 + feature1) / 2.0) else: gf.append(feature0) g_pids.extend(pids) g_camids.extend(camids) gf = torch.cat(gf, 0) g_pids = torch.Tensor(g_pids) g_camids = torch.Tensor(g_camids) print("Extracted features for gallery set: {} x {}".format(gf.size(0), gf.size(1))) print("Computing distance matrix") m, n = qf.size(0), gf.size(0) q_g_dist = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \ torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t() q_g_dist.addmm_(1, -2, qf, gf.t()) if re_ranking: q_q_dist = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, m) + \ torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, m).t() q_q_dist.addmm_(1, -2, qf, qf.t()) g_g_dist = torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, n) + \ torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, n).t() g_g_dist.addmm_(1, -2, gf, gf.t()) q_g_dist = q_g_dist.numpy() q_g_dist[q_g_dist < 0] = 0 q_g_dist = np.sqrt(q_g_dist) q_q_dist = q_q_dist.numpy() q_q_dist[q_q_dist < 0] = 0 q_q_dist = np.sqrt(q_q_dist) g_g_dist = g_g_dist.numpy() g_g_dist[g_g_dist < 0] = 0 g_g_dist = np.sqrt(g_g_dist) distmat = torch.Tensor(re_ranking_func(q_g_dist, q_q_dist, g_g_dist)) else: distmat = q_g_dist if savefig: print("Saving fingure") self.save_incorrect_pairs(distmat.numpy(), queryloader, galleryloader, g_pids.numpy(), q_pids.numpy(), g_camids.numpy(), q_camids.numpy(), savefig) print("Computing CMC and mAP") cmc, mAP = self.eval_func_gpu(distmat, q_pids, g_pids, q_camids, g_camids) print("Results ----------") print("mAP: {:.1%}".format(mAP)) print("CMC curve") for r in ranks: print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1])) print("------------------") return cmc[0] def _parse_data(self, inputs): imgs, pids, camids = inputs return imgs.cuda(), pids, camids def _forward(self, inputs): with torch.no_grad(): feature = self.model(inputs) return feature.cpu() def eval_func_gpu(self, distmat, q_pids, g_pids, q_camids, g_camids, max_rank=50): num_q, num_g = distmat.size() if num_g < max_rank: max_rank = num_g print("Note: number of gallery samples is quite small, got {}".format(num_g)) _, indices = torch.sort(distmat, dim=1) matches = g_pids[indices] == q_pids.view([num_q, -1]) keep = ~((g_pids[indices] == q_pids.view([num_q, -1])) & (g_camids[indices] == q_camids.view([num_q, -1]))) #keep = g_camids[indices] != q_camids.view([num_q, -1]) results = [] num_rel = [] for i in range(num_q): m = matches[i][keep[i]] if m.any(): num_rel.append(m.sum()) results.append(m[:max_rank].unsqueeze(0)) matches = torch.cat(results, dim=0).float() num_rel = torch.Tensor(num_rel) cmc = matches.cumsum(dim=1) cmc[cmc > 1] = 1 all_cmc = cmc.sum(dim=0) / cmc.size(0) pos = torch.Tensor(range(1, max_rank+1)) temp_cmc = matches.cumsum(dim=1) / pos * matches AP = temp_cmc.sum(dim=1) / num_rel mAP = AP.sum() / AP.size(0) return all_cmc.numpy(), mAP.item() def eval_func(self, distmat, q_pids, g_pids, q_camids, g_camids, max_rank=50): """Evaluation with market1501 metric Key: for each query identity, its gallery images from the same camera view are discarded. """ num_q, num_g = distmat.shape if num_g < max_rank: max_rank = num_g print("Note: number of gallery samples is quite small, got {}".format(num_g)) indices = np.argsort(distmat, axis=1) matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) # compute cmc curve for each query all_cmc = [] all_AP = [] num_valid_q = 0. # number of valid query for q_idx in range(num_q): # get query pid and camid q_pid = q_pids[q_idx] q_camid = q_camids[q_idx] # remove gallery samples that have the same pid and camid with query order = indices[q_idx] remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid) keep = np.invert(remove) # compute cmc curve # binary vector, positions with value 1 are correct matches orig_cmc = matches[q_idx][keep] if not np.any(orig_cmc): # this condition is true when query identity does not appear in gallery continue cmc = orig_cmc.cumsum() cmc[cmc > 1] = 1 all_cmc.append(cmc[:max_rank]) num_valid_q += 1. # compute average precision # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision num_rel = orig_cmc.sum() tmp_cmc = orig_cmc.cumsum() tmp_cmc = [x / (i + 1.) for i, x in enumerate(tmp_cmc)] tmp_cmc = np.asarray(tmp_cmc) * orig_cmc AP = tmp_cmc.sum() / num_rel all_AP.append(AP) assert num_valid_q > 0, "Error: all query identities do not appear in gallery" all_cmc = np.asarray(all_cmc).astype(np.float32) all_cmc = all_cmc.sum(0) / num_valid_q mAP = np.mean(all_AP) return all_cmc, mAP ================================================ FILE: trainers/re_ranking.py ================================================ #!/usr/bin/env python2/python3 # -*- coding: utf-8 -*- """ Created on Mon Jun 26 14:46:56 2017 @author: luohao Modified by Houjing Huang, 2017-12-22. - This version accepts distance matrix instead of raw features. - The difference of `/` division between python 2 and 3 is handled. - numpy.float16 is replaced by numpy.float32 for numerical precision. Modified by Zhedong Zheng, 2018-1-12. - replace sort with topK, which save about 30s. """ """ CVPR2017 paper:Zhong Z, Zheng L, Cao D, et al. Re-ranking Person Re-identification with k-reciprocal Encoding[J]. 2017. url:http://openaccess.thecvf.com/content_cvpr_2017/papers/Zhong_Re-Ranking_Person_Re-Identification_CVPR_2017_paper.pdf Matlab version: https://github.com/zhunzhong07/person-re-ranking """ """ API q_g_dist: query-gallery distance matrix, numpy array, shape [num_query, num_gallery] q_q_dist: query-query distance matrix, numpy array, shape [num_query, num_query] g_g_dist: gallery-gallery distance matrix, numpy array, shape [num_gallery, num_gallery] k1, k2, lambda_value: parameters, the original paper is (k1=20, k2=6, lambda_value=0.3) Returns: final_dist: re-ranked distance, numpy array, shape [num_query, num_gallery] """ import numpy as np def re_ranking(q_g_dist, q_q_dist, g_g_dist, k1=20, k2=6, lambda_value=0.3): # The following naming, e.g. gallery_num, is different from outer scope. # Don't care about it. original_dist = np.concatenate( [np.concatenate([q_q_dist, q_g_dist], axis=1), np.concatenate([q_g_dist.T, g_g_dist], axis=1)], axis=0) original_dist = np.power(original_dist, 2).astype(np.float32) original_dist = np.transpose(1. * original_dist/np.max(original_dist,axis = 0)) V = np.zeros_like(original_dist).astype(np.float32) initial_rank = np.argsort(original_dist).astype(np.int32) query_num = q_g_dist.shape[0] gallery_num = q_g_dist.shape[0] + q_g_dist.shape[1] all_num = gallery_num for i in range(all_num): # k-reciprocal neighbors forward_k_neigh_index = initial_rank[i,:k1+1] backward_k_neigh_index = initial_rank[forward_k_neigh_index,:k1+1] fi = np.where(backward_k_neigh_index==i)[0] k_reciprocal_index = forward_k_neigh_index[fi] k_reciprocal_expansion_index = k_reciprocal_index for j in range(len(k_reciprocal_index)): candidate = k_reciprocal_index[j] candidate_forward_k_neigh_index = initial_rank[candidate,:int(np.around(k1/2.))+1] candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,:int(np.around(k1/2.))+1] fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0] candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate] if len(np.intersect1d(candidate_k_reciprocal_index,k_reciprocal_index))> 2./3*len(candidate_k_reciprocal_index): k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index,candidate_k_reciprocal_index) k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) weight = np.exp(-original_dist[i,k_reciprocal_expansion_index]) V[i,k_reciprocal_expansion_index] = 1.*weight/np.sum(weight) original_dist = original_dist[:query_num,] if k2 != 1: V_qe = np.zeros_like(V,dtype=np.float32) for i in range(all_num): V_qe[i,:] = np.mean(V[initial_rank[i,:k2],:],axis=0) V = V_qe del V_qe del initial_rank invIndex = [] for i in range(gallery_num): invIndex.append(np.where(V[:,i] != 0)[0]) jaccard_dist = np.zeros_like(original_dist,dtype = np.float32) for i in range(query_num): temp_min = np.zeros(shape=[1,gallery_num],dtype=np.float32) indNonZero = np.where(V[i,:] != 0)[0] indImages = [] indImages = [invIndex[ind] for ind in indNonZero] for j in range(len(indNonZero)): temp_min[0,indImages[j]] = temp_min[0,indImages[j]]+ np.minimum(V[i,indNonZero[j]],V[indImages[j],indNonZero[j]]) jaccard_dist[i] = 1-temp_min/(2.-temp_min) final_dist = jaccard_dist*(1-lambda_value) + original_dist*lambda_value del original_dist del V del jaccard_dist final_dist = final_dist[:query_num,query_num:] return final_dist def k_reciprocal_neigh( initial_rank, i, k1): forward_k_neigh_index = initial_rank[i,:k1+1] backward_k_neigh_index = initial_rank[forward_k_neigh_index,:k1+1] fi = np.where(backward_k_neigh_index==i)[0] return forward_k_neigh_index[fi] def re_ranking_new(q_g_dist, q_q_dist, g_g_dist, k1=20, k2=6, lambda_value=0.3): # The following naming, e.g. gallery_num, is different from outer scope. # Don't care about it. original_dist = np.concatenate( [np.concatenate([q_q_dist, q_g_dist], axis=1), np.concatenate([q_g_dist.T, g_g_dist], axis=1)], axis=0) original_dist = 2. - 2 * original_dist #np.power(original_dist, 2).astype(np.float32) original_dist = np.transpose(1. * original_dist/np.max(original_dist,axis = 0)) V = np.zeros_like(original_dist).astype(np.float32) #initial_rank = np.argsort(original_dist).astype(np.int32) # top K1+1 initial_rank = np.argpartition( original_dist, range(1,k1+1) ) query_num = q_g_dist.shape[0] all_num = original_dist.shape[0] for i in range(all_num): # k-reciprocal neighbors k_reciprocal_index = k_reciprocal_neigh( initial_rank, i, k1) k_reciprocal_expansion_index = k_reciprocal_index for j in range(len(k_reciprocal_index)): candidate = k_reciprocal_index[j] candidate_k_reciprocal_index = k_reciprocal_neigh( initial_rank, candidate, int(np.around(k1/2))) if len(np.intersect1d(candidate_k_reciprocal_index,k_reciprocal_index))> 2./3*len(candidate_k_reciprocal_index): k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index,candidate_k_reciprocal_index) k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) weight = np.exp(-original_dist[i,k_reciprocal_expansion_index]) V[i,k_reciprocal_expansion_index] = 1.*weight/np.sum(weight) original_dist = original_dist[:query_num,] if k2 != 1: V_qe = np.zeros_like(V,dtype=np.float32) for i in range(all_num): V_qe[i,:] = np.mean(V[initial_rank[i,:k2],:],axis=0) V = V_qe del V_qe del initial_rank invIndex = [] for i in range(all_num): invIndex.append(np.where(V[:,i] != 0)[0]) jaccard_dist = np.zeros_like(original_dist,dtype = np.float32) for i in range(query_num): temp_min = np.zeros(shape=[1,all_num],dtype=np.float32) indNonZero = np.where(V[i,:] != 0)[0] indImages = [] indImages = [invIndex[ind] for ind in indNonZero] for j in range(len(indNonZero)): temp_min[0,indImages[j]] = temp_min[0,indImages[j]]+ np.minimum(V[i,indNonZero[j]],V[indImages[j],indNonZero[j]]) jaccard_dist[i] = 1-temp_min/(2.-temp_min) final_dist = jaccard_dist*(1-lambda_value) + original_dist*lambda_value del original_dist del V del jaccard_dist final_dist = final_dist[:query_num,query_num:] return final_dist ================================================ FILE: trainers/trainer.py ================================================ # encoding: utf-8 import math import time import numpy as np import random import torch from torch import nn from torch.utils.data import DataLoader from utils.loss import euclidean_dist, hard_example_mining from utils.meters import AverageMeter class cls_tripletTrainer: def __init__(self, opt, model, optimzier, criterion, summary_writer): self.opt = opt self.model = model self.optimizer= optimzier self.criterion = criterion self.summary_writer = summary_writer def train(self, epoch, data_loader): self.model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() start = time.time() for i, inputs in enumerate(data_loader): data_time.update(time.time() - start) # model optimizer self._parse_data(inputs) self._forward() self.optimizer.zero_grad() self._backward() self.optimizer.step() batch_time.update(time.time() - start) losses.update(self.loss.item()) # tensorboard global_step = epoch * len(data_loader) + i self.summary_writer.add_scalar('loss', self.loss.item(), global_step) self.summary_writer.add_scalar('lr', self.optimizer.param_groups[0]['lr'], global_step) start = time.time() if (i + 1) % self.opt.print_freq == 0: print('Epoch: [{}][{}/{}]\t' 'Batch Time {:.3f} ({:.3f})\t' 'Data Time {:.3f} ({:.3f})\t' 'Loss {:.3f} ({:.3f})\t' .format(epoch, i + 1, len(data_loader), batch_time.val, batch_time.mean, data_time.val, data_time.mean, losses.val, losses.mean)) param_group = self.optimizer.param_groups print('Epoch: [{}]\tEpoch Time {:.3f} s\tLoss {:.3f}\t' 'Lr {:.2e}' .format(epoch, batch_time.sum, losses.mean, param_group[0]['lr'])) print() def _parse_data(self, inputs): imgs, pids, _ = inputs if self.opt.random_crop and random.random() > 0.3: h, w = imgs.size()[-2:] start = int((h-2*w)*random.random()) mask = imgs.new_zeros(imgs.size()) mask[:, :, start:start+2*w, :] = 1 imgs = imgs * mask ''' if random.random() > 0.5: h, w = imgs.size()[-2:] for attempt in range(100): area = h * w target_area = random.uniform(0.02, 0.4) * area aspect_ratio = random.uniform(0.3, 3.33) ch = int(round(math.sqrt(target_area * aspect_ratio))) cw = int(round(math.sqrt(target_area / aspect_ratio))) if cw < w and ch < h: x1 = random.randint(0, h - ch) y1 = random.randint(0, w - cw) imgs[:, :, x1:x1+h, y1:y1+w] = 0 break ''' self.data = imgs.cuda() self.target = pids.cuda() def _forward(self): score, feat = self.model(self.data) self.loss = self.criterion(score, feat, self.target) def _backward(self): self.loss.backward() ================================================ FILE: utils/DistWeightDevianceLoss.py ================================================ from __future__ import absolute_import import torch from torch import nn from torch.autograd import Variable import numpy as np def similarity(inputs_): # Compute similarity mat of deep feature # n = inputs_.size(0) sim = torch.matmul(inputs_, inputs_.t()) return sim def GaussDistribution(data): """ :param data: :return: """ mean_value = torch.mean(data) diff = data - mean_value std = torch.sqrt(torch.mean(torch.pow(diff, 2))) return mean_value, std class DistWeightBinDevianceLoss(nn.Module): def __init__(self, margin=0.5): super(DistWeightBinDevianceLoss, self).__init__() self.margin = margin def forward(self, inputs, targets): n = inputs.size(0) # Compute similarity matrix sim_mat = similarity(inputs) # print(sim_mat) targets = targets.cuda() # split the positive and negative pairs eyes_ = Variable(torch.eye(n, n)).cuda() # eyes_ = Variable(torch.eye(n, n)) pos_mask = targets.expand(n, n).eq(targets.expand(n, n).t()) neg_mask = eyes_.eq(eyes_) - pos_mask pos_mask = pos_mask - eyes_.eq(1) pos_sim = torch.masked_select(sim_mat, pos_mask) neg_sim = torch.masked_select(sim_mat, neg_mask) num_instances = len(pos_sim)//n + 1 num_neg_instances = n - num_instances pos_sim = pos_sim.resize(len(pos_sim)//(num_instances-1), num_instances-1) neg_sim = neg_sim.resize( len(neg_sim) // num_neg_instances, num_neg_instances) # clear way to compute the loss first loss = list() c = 0 for i, pos_pair in enumerate(pos_sim): # print(i) pos_pair = torch.sort(pos_pair)[0] neg_pair = torch.sort(neg_sim[i])[0] neg_mean, neg_std = GaussDistribution(neg_pair) prob = torch.exp(torch.pow(neg_pair - neg_mean, 2) / (2*torch.pow(neg_std, 2))) neg_index = torch.multinomial(prob, num_instances - 1, replacement=False) neg_pair = neg_pair[neg_index] if len(neg_pair) < 1: c += 1 continue if pos_pair[-1].item() > neg_pair[-1].item() + 0.05: c += 1 neg_pair = torch.sort(neg_pair)[0] if i == 1 and np.random.randint(256) == 1: print('neg_pair is ---------', neg_pair) print('pos_pair is ---------', pos_pair.data) pos_loss = torch.mean(torch.log(1 + torch.exp(-2*(pos_pair - self.margin)))) neg_loss = 0.04*torch.mean(torch.log(1 + torch.exp(50*(neg_pair - self.margin)))) loss.append(pos_loss + neg_loss) loss = [torch.unsqueeze(l,0) for l in loss] loss = torch.sum(torch.cat(loss))/n prec = float(c)/n neg_d = torch.mean(neg_sim).item() pos_d = torch.mean(pos_sim).item() return loss, prec, pos_d, neg_d def main(): data_size = 32 input_dim = 3 output_dim = 2 num_class = 4 # margin = 0.5 x = Variable(torch.rand(data_size, input_dim), requires_grad=False) # print(x) w = Variable(torch.rand(input_dim, output_dim), requires_grad=True) inputs = x.mm(w) y_ = 8*list(range(num_class)) targets = Variable(torch.IntTensor(y_)) print(DistWeightBinDevianceLoss()(inputs, targets)) if __name__ == '__main__': main() print('Congratulations to you!') ================================================ FILE: utils/LiftedStructure.py ================================================ from __future__ import absolute_import import torch from torch import nn from torch.autograd import Variable import torch.nn.functional as F import numpy as np def similarity(inputs_): # Compute similarity mat of deep feature # n = inputs_.size(0) sim = torch.matmul(inputs_, inputs_.t()) return sim def pdist(A, squared = False, eps = 1e-4): prod = torch.mm(A, A.t()) norm = prod.diag().unsqueeze(1).expand_as(prod) res = (norm + norm.t() - 2 * prod).clamp(min = 0) return res if squared else res.clamp(min = eps).sqrt() class LiftedStructureLoss(nn.Module): def __init__(self, alpha=10, beta=2, margin=0.5, hard_mining=None, **kwargs): super(LiftedStructureLoss, self).__init__() self.margin = margin self.alpha = alpha self.beta = beta self.hard_mining = hard_mining def forward(self, embeddings, labels): ''' score = embeddings target = labels loss = 0 counter = 0 bsz = score.size(0) mag = (score ** 2).sum(1).expand(bsz, bsz) sim = score.mm(score.transpose(0, 1)) dist = (mag + mag.transpose(0, 1) - 2 * sim) dist = torch.nn.functional.relu(dist).sqrt() for i in range(bsz): t_i = target[i].item() for j in range(i + 1, bsz): t_j = target[j].item() if t_i == t_j: # Negative component # !! Could do other things (like softmax that weights closer negatives) l_ni = (self.margin - dist[i][target != t_i]).exp().sum() l_nj = (self.margin - dist[j][target != t_j]).exp().sum() l_n = (l_ni + l_nj).log() # Positive component l_p = dist[i,j] loss += torch.nn.functional.relu(l_n + l_p) ** 2 counter += 1 return loss / (2 * counter), 0 ''' margin = 1.0 eps = 1e-4 d = pdist(embeddings, squared = False, eps = eps) pos = torch.eq(*[labels.unsqueeze(dim).expand_as(d) for dim in [0, 1]]).type_as(d) neg_i = torch.mul((margin - d).exp(), 1 - pos).sum(1).expand_as(d) return torch.sum(F.relu(pos.triu(1) * ((neg_i + neg_i.t()).log() + d)).pow(2)) / (pos.sum() - len(d)), 0 def main(): data_size = 32 input_dim = 3 output_dim = 2 num_class = 4 # margin = 0.5 x = Variable(torch.rand(data_size, input_dim), requires_grad=False) # print(x) w = Variable(torch.rand(input_dim, output_dim), requires_grad=True) inputs = x.mm(w) y_ = 8*list(range(num_class)) targets = Variable(torch.IntTensor(y_)) print(LiftedStructureLoss()(inputs, targets)) if __name__ == '__main__': main() print('Congratulations to you!') ================================================ FILE: utils/__init__.py ================================================ ================================================ FILE: utils/loss.py ================================================ # encoding: utf-8 import random import torch from torch import nn import torch.nn.functional as F def topk_mask(input, dim, K = 10, **kwargs): index = input.topk(max(1, min(K, input.size(dim))), dim = dim, **kwargs)[1] return torch.autograd.Variable(torch.zeros_like(input.data)).scatter(dim, index, 1.0) def pdist(A, squared = False, eps = 1e-4): prod = torch.mm(A, A.t()) norm = prod.diag().unsqueeze(1).expand_as(prod) res = (norm + norm.t() - 2 * prod).clamp(min = 0) return res if squared else res.clamp(min = eps).sqrt() def normalize(x, axis=-1): """Normalizing to unit length along the specified dimension. Args: x: pytorch Variable Returns: x: pytorch Variable, same shape as input """ x = 1. * x / (torch.norm(x, 2, axis, keepdim=True).expand_as(x) + 1e-12) return x def euclidean_dist(x, y): """ Args: x: pytorch Variable, with shape [m, d] y: pytorch Variable, with shape [n, d] Returns: dist: pytorch Variable, with shape [m, n] """ m, n = x.size(0), y.size(0) xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() dist = xx + yy dist.addmm_(1, -2, x, y.t()) dist = dist.clamp(min=1e-12).sqrt() # for numerical stability return dist def hard_example_mining(dist_mat, labels, margin, return_inds=False): """For each anchor, find the hardest positive and negative sample. Args: dist_mat: pytorch Variable, pair wise distance between samples, shape [N, N] labels: pytorch LongTensor, with shape [N] return_inds: whether to return the indices. Save time if `False`(?) Returns: dist_ap: pytorch Variable, distance(anchor, positive); shape [N] dist_an: pytorch Variable, distance(anchor, negative); shape [N] p_inds: pytorch LongTensor, with shape [N]; indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1 n_inds: pytorch LongTensor, with shape [N]; indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1 NOTE: Only consider the case in which all labels have same num of samples, thus we can cope with all anchors in parallel. """ torch.set_printoptions(threshold=5000) assert len(dist_mat.size()) == 2 assert dist_mat.size(0) == dist_mat.size(1) N = dist_mat.size(0) # shape [N, N] is_pos = labels.expand(N, N).eq(labels.expand(N, N).t()) is_neg = labels.expand(N, N).ne(labels.expand(N, N).t()) # `dist_ap` means distance(anchor, positive) # both `dist_ap` and `relative_p_inds` with shape [N, 1] dist_ap, relative_p_inds = torch.max( dist_mat[is_pos].contiguous().view(N, -1), 1, keepdim=True) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_an, relative_n_inds = torch.min( dist_mat[is_neg].contiguous().view(N, -1), 1, keepdim=True) # shape [N] dist_ap = dist_ap.squeeze(1) dist_an = dist_an.squeeze(1) if return_inds: # shape [N, N] ind = (labels.new().resize_as_(labels) .copy_(torch.arange(0, N).long()) .unsqueeze(0).expand(N, N)) # shape [N, 1] p_inds = torch.gather( ind[is_pos].contiguous().view(N, -1), 1, relative_p_inds.data) n_inds = torch.gather( ind[is_neg].contiguous().view(N, -1), 1, relative_n_inds.data) # shape [N] p_inds = p_inds.squeeze(1) n_inds = n_inds.squeeze(1) return dist_ap, dist_an, p_inds, n_inds return dist_ap, dist_an class TripletLoss(object): """Modified from Tong Xiao's open-reid (https://github.com/Cysu/open-reid). Related Triplet Loss theory can be found in paper 'In Defense of the Triplet Loss for Person Re-Identification'.""" def __init__(self, margin=None): self.margin = margin if margin is not None: self.ranking_loss = nn.MarginRankingLoss(margin=margin) else: self.ranking_loss = nn.SoftMarginLoss() def __call__(self, global_feat, labels, normalize_feature=False): if normalize_feature: global_feat = normalize(global_feat, axis=-1) dist_mat = euclidean_dist(global_feat, global_feat) dist_ap, dist_an = hard_example_mining(dist_mat, labels, self.margin) y = dist_an.new().resize_as_(dist_an).fill_(1) if self.margin is not None: loss = self.ranking_loss(dist_an, dist_ap, y) else: loss = self.ranking_loss(dist_an - dist_ap, y) return loss, dist_ap, dist_an class CrossEntropyLabelSmooth(nn.Module): """Cross entropy loss with label smoothing regularizer. Reference: Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016. Equation: y = (1 - epsilon) * y + epsilon / K. Args: num_classes (int): number of classes. epsilon (float): weight. """ def __init__(self, num_classes, epsilon=0.1, use_gpu=True): super(CrossEntropyLabelSmooth, self).__init__() self.num_classes = num_classes self.epsilon = epsilon self.use_gpu = use_gpu self.logsoftmax = nn.LogSoftmax(dim=1) def forward(self, inputs, targets): """ Args: inputs: prediction matrix (before softmax) with shape (batch_size, num_classes) targets: ground truth labels with shape (num_classes) """ log_probs = self.logsoftmax(inputs) targets = torch.zeros(log_probs.size()).scatter_(1, targets.unsqueeze(1).cpu(), 1) if self.use_gpu: targets = targets.cuda() targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes loss = (- targets * log_probs).mean(0).sum() return loss class Margin: def __call__(self, embeddings, labels): embeddings = F.normalize(embeddings) alpha = 0.2 beta = 1.2 distance_threshold = 0.5 inf = 1e6 eps = 1e-6 distance_weighted_sampling = True d = pdist(embeddings) pos = torch.eq(*[labels.unsqueeze(dim).expand_as(d) for dim in [0, 1]]).type_as(d) - torch.autograd.Variable(torch.eye(len(d))).type_as(d) num_neg = int(pos.data.sum() / len(pos)) if distance_weighted_sampling: ''' dim = embeddings.size(-1) distance = d.data.clamp(min = distance_threshold) distribution = distance.pow(dim - 2) * ((1 - distance.pow(2) / 4).pow(0.5 * (dim - 3))) weights = distribution.reciprocal().masked_fill_(pos.data + torch.eye(len(d)).type_as(d.data) > 0, eps) samples = torch.multinomial(weights, replacement = False, num_samples = num_neg) neg = torch.autograd.Variable(torch.zeros_like(pos.data).scatter_(1, samples, 1)) ''' neg = torch.autograd.Variable(torch.zeros_like(pos.data).scatter_(1, torch.multinomial((d.data.clamp(min = distance_threshold).pow(embeddings.size(-1) - 2) * (1 - d.data.clamp(min = distance_threshold).pow(2) / 4).pow(0.5 * (embeddings.size(-1) - 3))).reciprocal().masked_fill_(pos.data + torch.eye(len(d)).type_as(d.data) > 0, eps), replacement = False, num_samples = num_neg), 1)) else: neg = topk_mask(d + inf * ((pos > 0) + (d < distance_threshold)).type_as(d), dim = 1, largest = False, K = num_neg) L = F.relu(alpha + (pos * 2 - 1) * (d - beta)) M = ((pos + neg > 0) * (L > 0)).float() return (M * L).sum() / M.sum(), 0 ================================================ FILE: utils/meters.py ================================================ # encoding: utf-8 import math import numpy as np class AverageMeter(object): def __init__(self): self.n = 0 self.sum = 0.0 self.var = 0.0 self.val = 0.0 self.mean = np.nan self.std = np.nan def update(self, value, n=1): self.val = value self.sum += value self.var += value * value self.n += n if self.n == 0: self.mean, self.std = np.nan, np.nan elif self.n == 1: self.mean, self.std = self.sum, np.inf else: self.mean = self.sum / self.n self.std = math.sqrt( (self.var - self.n * self.mean * self.mean) / (self.n - 1.0)) def value(self): return self.mean, self.std def reset(self): self.n = 0 self.sum = 0.0 self.var = 0.0 self.val = 0.0 self.mean = np.nan self.std = np.nan ================================================ FILE: utils/random_erasing.py ================================================ from __future__ import absolute_import from torchvision.transforms import * from PIL import Image import random import math import numpy as np import torch class Cutout(object): def __init__(self, probability = 0.5, size = 64, mean=[0.4914, 0.4822, 0.4465]): self.probability = probability self.mean = mean self.size = size def __call__(self, img): if random.uniform(0, 1) > self.probability: return img h = self.size w = self.size for attempt in range(100): area = img.size()[1] * img.size()[2] if w < img.size()[2] and h < img.size()[1]: x1 = random.randint(0, img.size()[1] - h) y1 = random.randint(0, img.size()[2] - w) if img.size()[0] == 3: img[0, x1:x1+h, y1:y1+w] = self.mean[0] img[1, x1:x1+h, y1:y1+w] = self.mean[1] img[2, x1:x1+h, y1:y1+w] = self.mean[2] else: img[0, x1:x1+h, y1:y1+w] = self.mean[0] return img return img class RandomErasing(object): """ Randomly selects a rectangle region in an image and erases its pixels. 'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/pdf/1708.04896.pdf Args: probability: The probability that the Random Erasing operation will be performed. sl: Minimum proportion of erased area against input image. sh: Maximum proportion of erased area against input image. r1: Minimum aspect ratio of erased area. mean: Erasing value. """ def __init__(self, probability = 0.5, sl = 0.02, sh = 0.4, r1 = 0.3, mean=[0.4914, 0.4822, 0.4465]): self.probability = probability self.mean = mean self.sl = sl self.sh = sh self.r1 = r1 def __call__(self, img): if random.uniform(0, 1) > self.probability: return img for attempt in range(100): area = img.size()[1] * img.size()[2] target_area = random.uniform(self.sl, self.sh) * area aspect_ratio = random.uniform(self.r1, 1/self.r1) h = int(round(math.sqrt(target_area * aspect_ratio))) w = int(round(math.sqrt(target_area / aspect_ratio))) if w < img.size()[2] and h < img.size()[1]: x1 = random.randint(0, img.size()[1] - h) y1 = random.randint(0, img.size()[2] - w) if img.size()[0] == 3: img[0, x1:x1+h, y1:y1+w] = self.mean[0] img[1, x1:x1+h, y1:y1+w] = self.mean[1] img[2, x1:x1+h, y1:y1+w] = self.mean[2] else: img[0, x1:x1+h, y1:y1+w] = self.mean[0] return img return img ================================================ FILE: utils/serialization.py ================================================ # encoding: utf-8 import errno import os import shutil import sys import os.path as osp import torch class Logger(object): """ Write console output to external text file. Code imported from https://github.com/Cysu/open-reid/blob/master/reid/utils/logging.py. """ def __init__(self, fpath=None): self.console = sys.stdout self.file = None if fpath is not None: mkdir_if_missing(os.path.dirname(fpath)) self.file = open(fpath, 'w') def __del__(self): self.close() def __enter__(self): pass def __exit__(self, *args): self.close() def write(self, msg): self.console.write(msg) if self.file is not None: self.file.write(msg) def flush(self): self.console.flush() if self.file is not None: self.file.flush() os.fsync(self.file.fileno()) def close(self): self.console.close() if self.file is not None: self.file.close() def mkdir_if_missing(dir_path): try: os.makedirs(dir_path) except OSError as e: if e.errno != errno.EEXIST: raise def save_checkpoint(state, is_best, save_dir, filename): fpath = osp.join(save_dir, filename) mkdir_if_missing(save_dir) torch.save(state, fpath) if is_best: shutil.copy(fpath, osp.join(save_dir, 'model_best.pth.tar')) ================================================ FILE: utils/transforms.py ================================================ # encoding: utf-8 from PIL import Image from torchvision import transforms as T from utils.random_erasing import RandomErasing, Cutout import random class Random2DTranslation(object): """ With a probability, first increase image size to (1 + 1/8), and then perform random crop. Args: height (int): target height. width (int): target width. p (float): probability of performing this transformation. Default: 0.5. """ def __init__(self, height, width, p=0.5, interpolation=Image.BILINEAR): self.height = height self.width = width self.p = p self.interpolation = interpolation def __call__(self, img): """ Args: img (PIL Image): Image to be cropped. Returns: PIL Image: Cropped image. """ if random.random() < self.p: return img.resize((self.width, self.height), self.interpolation) new_width, new_height = int( round(self.width * 1.125)), int(round(self.height * 1.125)) resized_img = img.resize((new_width, new_height), self.interpolation) x_maxrange = new_width - self.width y_maxrange = new_height - self.height x1 = int(round(random.uniform(0, x_maxrange))) y1 = int(round(random.uniform(0, y_maxrange))) croped_img = resized_img.crop( (x1, y1, x1 + self.width, y1 + self.height)) return croped_img def pad_shorter(x): h,w = x.size[-2:] s = max(h, w) new_im = Image.new("RGB", (s, s)) new_im.paste(x, ((s-h)//2, (s-w)//2)) return new_im class TrainTransform(object): def __init__(self, data): self.data = data def __call__(self, x): if self.data == 'person': x = T.Resize((384, 128))(x) elif self.data == 'car': x = pad_shorter(x) x = T.Resize((256, 256))(x) x = T.RandomCrop((224, 224))(x) elif self.data == 'cub': x = pad_shorter(x) x = T.Resize((256, 256))(x) x = T.RandomCrop((224, 224))(x) elif self.data == 'clothes': x = pad_shorter(x) x = T.Resize((256, 256))(x) x = T.RandomCrop((224, 224))(x) elif self.data == 'product': x = pad_shorter(x) x = T.Resize((256, 256))(x) x = T.RandomCrop((224, 224))(x) elif self.data == 'cifar': x = T.Resize((40, 40))(x) x = T.RandomCrop((32, 32))(x) x = T.RandomHorizontalFlip()(x) x = T.ToTensor()(x) x = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(x) if self.data == 'person': x = Cutout(probability = 0.5, size=64, mean=[0.0, 0.0, 0.0])(x) else: x = RandomErasing(probability = 0.5, mean=[0.0, 0.0, 0.0])(x) return x class TestTransform(object): def __init__(self, data, flip=False): self.data = data self.flip = flip def __call__(self, x=None): if self.data == 'cub': x = pad_shorter(x) x = T.Resize((256, 256))(x) elif self.data == 'car': #x = pad_shorter(x) x = T.Resize((256, 256))(x) elif self.data == 'clothes': x = pad_shorter(x) x = T.Resize((256, 256))(x) elif self.data == 'product': x = pad_shorter(x) x = T.Resize((224, 224))(x) elif self.data == 'person': x = T.Resize((384, 128))(x) if self.flip: x = T.functional.hflip(x) x = T.ToTensor()(x) x = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(x) return x ================================================ FILE: utils/validation_metrics.py ================================================ # encoding: utf-8 def accuracy(score, target, topk=(1,)): maxk = max(topk) batch_size = target.size(0) _, pred = score.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) ret = [] for k in topk: correct_k = correct[:k].view(-1).float().sum(dim=0, keepdim=True) ret.append(correct_k.mul_(1. / batch_size)) return ret