Repository: hygenie1228/ClothWild_RELEASE Branch: main Commit: f166dbdadc8a Files: 37 Total size: 20.7 MB Directory structure: gitextract_nd4dnj_b/ ├── .gitignore ├── README.md ├── assets/ │ └── directory.md ├── common/ │ ├── base.py │ ├── logger.py │ ├── nets/ │ │ ├── layer.py │ │ ├── loss.py │ │ ├── module.py │ │ └── resnet.py │ ├── timer.py │ └── utils/ │ ├── SMPLicit/ │ │ └── SMPLicit/ │ │ ├── SMPL.py │ │ ├── SMPLicit.py │ │ ├── SMPLicit_options.py │ │ ├── __init__.py │ │ ├── network.py │ │ ├── smplicit_core_test.py │ │ ├── util_smpl.py │ │ └── utils/ │ │ ├── __init__.py │ │ └── sdf.py │ ├── dir.py │ ├── human_models.py │ ├── postprocessing.py │ ├── preprocessing.py │ ├── transforms.py │ └── vis.py ├── data/ │ ├── DeepFashion2/ │ │ └── DeepFashion2.py │ ├── MSCOCO/ │ │ └── MSCOCO.py │ ├── PW3D/ │ │ └── PW3D.py │ └── dataset.py ├── demo/ │ ├── demo.py │ ├── output.obj │ └── pose2pose_result.json ├── main/ │ ├── config.py │ ├── model.py │ ├── test.py │ └── train.py └── requirements.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Created by .ignore support plugin (hsz.mobi) ### Python template # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # custom .profile ./*.png ./*.jpg *.out data/base_data data/preprocessed_data data/*/parses data/*/data data/*/*.json data/*/*/*.json data/*/images data*/data data/*/annotation data/*/annotations demo/*.pth.tar output # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ ### macOS template # General .DS_Store .AppleDouble .LSOverride # Icon must end with two \r Icon # Thumbnails ._* # Files that might appear in the root of a volume .DocumentRevisions-V100 .fseventsd .Spotlight-V100 .TemporaryItems .Trashes .VolumeIcon.icns .com.apple.timemachine.donotpresent # Directories potentially created on remote AFP share .AppleDB .AppleDesktop Network Trash Folder Temporary Items .apdisk ### JetBrains template # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 # User-specific stuff .idea/**/workspace.xml .idea/**/tasks.xml .idea/**/dictionaries .idea/**/shelf # Sensitive or high-churn files .idea/**/dataSources/ .idea/**/dataSources.ids .idea/**/dataSources.local.xml .idea/**/sqlDataSources.xml .idea/**/dynamic.xml .idea/**/uiDesigner.xml .idea/**/dbnavigator.xml # Gradle .idea/**/gradle.xml .idea/**/libraries # CMake cmake-build-debug/ cmake-build-release/ # Mongo Explorer plugin .idea/**/mongoSettings.xml # File-based project format *.iws # IntelliJ out/ # mpeltonen/sbt-idea plugin .idea_modules/ # JIRA plugin atlassian-ide-plugin.xml # Cursive Clojure plugin .idea/replstate.xml # Crashlytics plugin (for Android Studio and IntelliJ) com_crashlytics_export_strings.xml crashlytics.properties crashlytics-build.properties fabric.properties # Editor-based Rest Client .idea/httpRequests experiment/ data/PW3D/data/ ================================================ FILE: README.md ================================================ # **3D Clothed Human Reconstruction in the Wild (ClothWild codes)** > [**3D Clothed Human Reconstruction in the Wild**](https://arxiv.org/abs/2207.10053), > Gyeongsik Moon*, > Hyeongjin Nam*, > Takaaki Shiratori, > Kyoung Mu Lee (* equal contribution) > *European Conference on Computer Vision (ECCV), 2022*

## Installation * We recommend you to use an [Anaconda](https://www.anaconda.com/) virtual environment. Install PyTorch >=1.8.0 and Python >= 3.7.0. * Install Pytorch3d following [here](https://github.com/facebookresearch/pytorch3d/blob/main/INSTALL.md) depending on your environment. * Then, run `sh requirements.sh`. You should slightly change `torchgeometry` kernel code following [here](https://github.com/mks0601/I2L-MeshNet_RELEASE/issues/6#issuecomment-675152527). ## Quick demo * Download the pre-trained weight from [here](https://drive.google.com/file/d/1-gafc-6V1ma7L5NS1JphzbhTdO_pQFuL/view?usp=sharing) and place it in `demo` folder. * Prepare `base_data` folder following below [`Directory`](./assets/directory.md/#required-data) part. * Prepare `input.png` and edit its `bbox` of `demo/demo.py`. * Prepare SMPL parameter, as `pose2pose_result.json`. You can get the SMPL parameter by running the off-the-shelf method [[code](https://github.com/mks0601/Hand4Whole_RELEASE/tree/Pose2Pose)]. * Run `python demo.py --gpu 0`. ## Directory Refer to [here](./assets/directory.md). ## Running ClothWild ### Train In the `main/config.py`, you can change datasets to use. ``` cd ${ROOT}/main python train.py --gpu 0 ``` ### Test Place trained model at the `output/model_dump` and follow below. To evaluate CD (Chamfer Distance) on 3DPW, run ``` cd ${ROOT}/main python test.py --gpu 0 --test_epoch 7 --type cd ``` To evaluate BCC (Body-Cloth Correspondence) on MSCOCO, run ``` cd ${ROOT}/main python test.py --gpu 0 --test_epoch 7 --type bcc ``` You can download the checkpoint trained on MSCOCO+DeepFashion2 from [here](https://drive.google.com/file/d/1-gafc-6V1ma7L5NS1JphzbhTdO_pQFuL/view?usp=sharing). ## Result Refer to the [paper](https://arxiv.org/abs/2207.10053)'s main manuscript and supplementary material for diverse qualitative results! ### Chamfer Distance (CD)

### Body-Cloth Correspondence (BCC)

## Reference ``` @InProceedings{Moon_2022_ECCV_ClothWild, author = {Moon, Gyeongsik and Nam, Hyeongjin and Shiratori, Takaaki and Lee, Kyoung Mu}, title = {3D Clothed Human Reconstruction in the Wild}, booktitle = {European Conference on Computer Vision (ECCV)}, year = {2022} } ``` ================================================ FILE: assets/directory.md ================================================ # Directory ## Root The `${ROOT}` is described as below. ``` ${ROOT} |-- common |-- data |-- demo |-- main |-- output ``` * `common` contains kernel codes for ClothWild. * `data` contains required data and soft links to images and annotations directories. * `demo` contains demo codes. * `main` contains high-level codes for training or testing the network. * `output` contains log, trained models, visualized outputs, and test result. ## Required data You need to follow directory structure of the `data` as below. ``` ${ROOT} |-- data | |-- base_data | | |-- human_models | | | |-- SMPL_FEMALE.pkl | | | |-- SMPL_MALE.pkl | | | |-- SMPL_NEUTRAL.pkl | | |-- smplicit | | | |-- checkpoints | | | | |-- hair.pth | | | | |-- pants.pth | | | | |-- shoes.pth | | | | |-- skirts.pth | | | | |-- upperclothes.pth | | | |-- clusters | | | | |-- clusters_hairs.npy | | | | |-- clusters_lowerbody.npy | | | | |-- clusters_shoes.npy | | | | |-- indexs_clusters_tshirt_smpl.npy | |-- preprocessed_data | | |-- densepose | | |-- gender | | |-- parse | | |-- smpl_param | |-- ... ``` * `base_data/human_model_files` contains `smpl` 3D model files. Download the files from [[smpl](https://smpl.is.tue.mpg.de/)]. * `base_data/smplicit` contains 3D cloth generative model (`SMPLicit`) files. Download the files from [[smplicit](https://github.com/enriccorona/SMPLicit)]. * `preprocessed_data` is required for training and testing stages. Download it from [[preprocessed_data](https://drive.google.com/file/d/1m5AfZt2qx90DkbXACuDK8D1tWl7LuPKe/view?usp=sharing)]. ## Dataset ``` ${ROOT} |-- data | |-- ... | |-- DeepFashion2 | | |-- data | | | |-- train | | | |-- DeepFashion2_train.json | |-- MSCOCO | | |-- images | | | |-- train2017 | | | |-- val2017 | | |-- parses | | |-- annotations | | | |-- coco_wholebody_train_v1.0.json | | | |-- coco_wholebody_val_v1.0.json | | | |-- coco_dp_train.json | | | |-- coco_dp_val.json | |-- PW3D | | |-- data | | | |-- imageFiles | | | |-- sequenceFiles | | | |-- 3DPW_test.json ``` * Download DeepFashion2 parsed data [[data](https://github.com/switchablenorms/DeepFashion2)] [[annot](https://drive.google.com/drive/folders/1P2AgxZZsq21fcGnP_RNuvaEOrye1SBkj?usp=sharing)] * Download MSCOCO data, parses (LIP dataset), and densepose [[data](https://github.com/jin-s13/COCO-WholeBody)] [[parses](https://drive.google.com/file/d/1i2-qbNPTtn2kmxko--riEJjBehZnvumw/view?usp=sharing)] [[densepose](https://drive.google.com/drive/folders/1P2AgxZZsq21fcGnP_RNuvaEOrye1SBkj?usp=sharing)] * Download 3DPW parsed data [[data](https://virtualhumans.mpi-inf.mpg.de/3DPW/)] [[annot]](https://drive.google.com/drive/folders/1P2AgxZZsq21fcGnP_RNuvaEOrye1SBkj?usp=sharing) * All annotation files follow [MSCOCO format](http://cocodataset.org/#format-data). If you want to add your own dataset, you have to convert it to [MSCOCO format](http://cocodataset.org/#format-data). ### Output ``` ${ROOT} |-- output | |-- log | |-- model_dump | |-- result | |-- vis ``` * Creating `output` folder as soft link form is recommended instead of folder form because it would take large storage capacity. * `log` folder contains training log file. * `model_dump` folder contains saved checkpoints for each epoch. * `result` folder contains final estimation files generated in the testing stage. * `vis` folder contains visualized results. ================================================ FILE: common/base.py ================================================ import os import os.path as osp import math import time import glob import abc import numpy as np from torch.utils.data import DataLoader import torch.optim import torchvision.transforms as transforms from collections import OrderedDict from timer import Timer from logger import colorlogger from torch.nn.parallel.data_parallel import DataParallel from config import cfg from utils.dir import make_folder from model import get_model from dataset import MultipleDatasets from utils.human_models import smpl # dynamic dataset import for i in range(len(cfg.trainset_2d)): exec('from ' + cfg.trainset_2d[i] + ' import ' + cfg.trainset_2d[i]) for i in range(len(cfg.testset)): exec('from ' + cfg.testset[i] + ' import ' + cfg.testset[i]) def worker_init_fn(worder_id): np.random.seed(np.random.get_state()[1][0] + worder_id) class Base(object): __metaclass__ = abc.ABCMeta def __init__(self, log_name='logs.txt'): self.cur_epoch = 0 # timer self.tot_timer = Timer() self.gpu_timer = Timer() self.read_timer = Timer() # logger self.logger = colorlogger(cfg.log_dir, log_name=log_name) @abc.abstractmethod def _make_batch_generator(self): return @abc.abstractmethod def _make_model(self): return class Trainer(Base): def __init__(self): super(Trainer, self).__init__(log_name = 'train_logs.txt') def get_optimizer(self, model): total_params = [] for module in model.module.trainable_modules: total_params += list(module.parameters()) optimizer = torch.optim.Adam(total_params, lr=cfg.lr) return optimizer def save_model(self, state, epoch): file_path = osp.join(cfg.model_dir,'snapshot_{}.pth.tar'.format(str(epoch))) # do not save smpl & smplicit layer weights dump_key = [] for k in state['network'].keys(): if 'smpl_layer' in k: dump_key.append(k) if 'smplicit_layer' in k: dump_key.append(k) for k in dump_key: state['network'].pop(k, None) torch.save(state, file_path) self.logger.info("Write snapshot into {}".format(file_path)) def load_model(self, model, optimizer): model_file_list = glob.glob(osp.join(cfg.model_dir,'*.pth.tar')) cur_epoch = max([int(file_name[file_name.find('snapshot_') + 9 : file_name.find('.pth.tar')]) for file_name in model_file_list]) ckpt_path = osp.join(cfg.model_dir, 'snapshot_' + str(cur_epoch) + '.pth.tar') ckpt = torch.load(ckpt_path) start_epoch = ckpt['epoch'] + 1 model.load_state_dict(ckpt['network'], strict=False) print("cur_epoch: ", cur_epoch) self.logger.info('Load checkpoint from {}'.format(ckpt_path)) return start_epoch, model, optimizer def set_lr(self, epoch): for e in cfg.lr_dec_epoch: if epoch < e: break if epoch < cfg.lr_dec_epoch[-1]: idx = cfg.lr_dec_epoch.index(e) for g in self.optimizer.param_groups: g['lr'] = cfg.lr / (cfg.lr_dec_factor ** idx) else: for g in self.optimizer.param_groups: g['lr'] = cfg.lr / (cfg.lr_dec_factor ** len(cfg.lr_dec_epoch)) def get_lr(self): for g in self.optimizer.param_groups: cur_lr = g['lr'] return cur_lr def _make_batch_generator(self): # data load and construct batch generator self.logger.info("Creating dataset...") trainset3d_loader = [] for i in range(len(cfg.trainset_3d)): trainset3d_loader.append(eval(cfg.trainset_3d[i])(transforms.ToTensor(), "train")) trainset2d_loader = [] for i in range(len(cfg.trainset_2d)): trainset2d_loader.append(eval(cfg.trainset_2d[i])(transforms.ToTensor(), "train")) valid_loader_num = 0 if len(trainset3d_loader) > 0: trainset3d_loader = [MultipleDatasets(trainset3d_loader, make_same_len=False)] valid_loader_num += 1 else: trainset3d_loader = [] if len(trainset2d_loader) > 0: trainset2d_loader = [MultipleDatasets(trainset2d_loader, make_same_len=False)] valid_loader_num += 1 else: trainset2d_loader = [] if valid_loader_num > 1: trainset_loader = MultipleDatasets(trainset3d_loader + trainset2d_loader, make_same_len=True) else: trainset_loader = MultipleDatasets(trainset3d_loader + trainset2d_loader, make_same_len=False) self.itr_per_epoch = math.ceil(len(trainset_loader) / cfg.num_gpus / cfg.train_batch_size) self.batch_generator = DataLoader(dataset=trainset_loader, batch_size=cfg.num_gpus*cfg.train_batch_size, shuffle=True, num_workers=cfg.num_thread, pin_memory=True, drop_last=True, worker_init_fn=worker_init_fn) def _make_model(self): # prepare network self.logger.info("Creating graph and optimizer...") model = get_model('train') model = DataParallel(model).cuda() optimizer = self.get_optimizer(model) if cfg.continue_train: start_epoch, model, optimizer = self.load_model(model, optimizer) else: start_epoch = 0 model.train() self.start_epoch = start_epoch self.model = model self.optimizer = optimizer class Tester(Base): def __init__(self, test_epoch): self.test_epoch = int(test_epoch) super(Tester, self).__init__(log_name = 'test_logs.txt') def _make_batch_generator(self): # data load and construct batch generator self.logger.info("Creating dataset...") testset_loader = eval(cfg.testset[0])(transforms.ToTensor(), "test") batch_generator = DataLoader(dataset=testset_loader, batch_size=cfg.num_gpus*cfg.test_batch_size, shuffle=False, num_workers=cfg.num_thread, pin_memory=True, worker_init_fn=worker_init_fn) self.testset = testset_loader self.batch_generator = batch_generator def _make_model(self): model_path = os.path.join(cfg.model_dir, 'snapshot_%d.pth.tar' % self.test_epoch) assert os.path.exists(model_path), 'Cannot find model at ' + model_path self.logger.info('Load checkpoint from {}'.format(model_path)) # prepare network model = get_model('test') model = DataParallel(model).cuda() ckpt = torch.load(model_path) model.load_state_dict(ckpt['network'], strict=False) model.eval() self.model = model def _evaluate(self, outs, cur_sample_idx): eval_result = self.testset.evaluate(outs, cur_sample_idx) return eval_result def _print_eval_result(self, eval_result): self.testset.print_eval_result(eval_result) def check_data_parallel(train_weight): new_state_dict = OrderedDict() for k, v in train_weight.items(): name = k[7:] if k.startswith('module') else k # remove `module.` new_state_dict[name] = v return new_state_dict ================================================ FILE: common/logger.py ================================================ import logging import os OK = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' END = '\033[0m' PINK = '\033[95m' BLUE = '\033[94m' GREEN = OK RED = FAIL WHITE = END YELLOW = WARNING class colorlogger(): def __init__(self, log_dir, log_name='train_logs.txt'): # set log self._logger = logging.getLogger(log_name) self._logger.setLevel(logging.INFO) log_file = os.path.join(log_dir, log_name) if not os.path.exists(log_dir): os.makedirs(log_dir) file_log = logging.FileHandler(log_file, mode='a') file_log.setLevel(logging.INFO) console_log = logging.StreamHandler() console_log.setLevel(logging.INFO) formatter = logging.Formatter( "{}%(asctime)s{} %(message)s".format(GREEN, END), "%m-%d %H:%M:%S") file_log.setFormatter(formatter) console_log.setFormatter(formatter) self._logger.addHandler(file_log) self._logger.addHandler(console_log) def debug(self, msg): self._logger.debug(str(msg)) def info(self, msg): self._logger.info(str(msg)) def warning(self, msg): self._logger.warning(WARNING + 'WRN: ' + str(msg) + END) def critical(self, msg): self._logger.critical(RED + 'CRI: ' + str(msg) + END) def error(self, msg): self._logger.error(RED + 'ERR: ' + str(msg) + END) ================================================ FILE: common/nets/layer.py ================================================ import torch import torch.nn as nn from torch.nn import functional as F import math from config import cfg def make_linear_layers(feat_dims, relu_final=True, use_bn=False): layers = [] for i in range(len(feat_dims)-1): layers.append(nn.Linear(feat_dims[i], feat_dims[i+1])) # Do not use ReLU for final estimation if i < len(feat_dims)-2 or (i == len(feat_dims)-2 and relu_final): if use_bn: layers.append(nn.BatchNorm1d(feat_dims[i+1])) layers.append(nn.ReLU(inplace=True)) return nn.Sequential(*layers) def make_conv_layers(feat_dims, kernel=3, stride=1, padding=1, bnrelu_final=True): layers = [] for i in range(len(feat_dims)-1): layers.append( nn.Conv2d( in_channels=feat_dims[i], out_channels=feat_dims[i+1], kernel_size=kernel, stride=stride, padding=padding )) # Do not use BN and ReLU for final estimation if i < len(feat_dims)-2 or (i == len(feat_dims)-2 and bnrelu_final): layers.append(nn.BatchNorm2d(feat_dims[i+1])) layers.append(nn.ReLU(inplace=True)) return nn.Sequential(*layers) def make_deconv_layers(feat_dims, bnrelu_final=True): layers = [] for i in range(len(feat_dims)-1): layers.append( nn.ConvTranspose2d( in_channels=feat_dims[i], out_channels=feat_dims[i+1], kernel_size=4, stride=2, padding=1, output_padding=0, bias=False)) # Do not use BN and ReLU for final estimation if i < len(feat_dims)-2 or (i == len(feat_dims)-2 and bnrelu_final): layers.append(nn.BatchNorm2d(feat_dims[i+1])) layers.append(nn.ReLU(inplace=True)) return nn.Sequential(*layers) ================================================ FILE: common/nets/loss.py ================================================ import torch import torch.nn as nn from torch.nn import functional as F from utils.human_models import smpl from utils.vis import save_obj from config import cfg class ClothClsLoss(nn.Module): def __init__(self): super(ClothClsLoss, self).__init__() self.dp_parts = { 'head': [22,23], 'upperbody': [0,1,14,15,16,17,18,19,20,21], 'lowerbody': [6,7,8,9,10,11,12,13], 'foot': [4,5] } self.part_clothes={ 'head': ['hair'], 'upperbody': ['uppercloth', 'coat'], 'lowerbody': ['pants', 'skirts'], 'foot': ['shoes'] } self.bce_loss = nn.BCELoss(reduction='none') def forward(self, out, patch_idx, cloth_idx): # valid only on visible valid = torch.zeros_like(out).cuda() index_gt = torch.zeros_like(out).cuda() for part in self.dp_parts.keys(): valid_one_part = torch.zeros((out.shape[0],)).cuda() for part_idx in self.dp_parts[part]: valid_one_part += (patch_idx == part_idx).any(1) for cloth in self.part_clothes[part]: if cloth in cfg.cloth_types: valid[valid_one_part>0, cfg.cloth_types.index(cloth)] = 1 for idx in range(len(cfg.cloth_types)): index_gt[:, idx] += (cloth_idx==idx+1).any(1) loss = self.bce_loss(out, index_gt) loss = loss[valid>0] return loss.mean() class GenderClsLoss(nn.Module): def __init__(self): super(GenderClsLoss, self).__init__() self.bce_loss = nn.BCELoss(reduction='none') def forward(self, out, gt): valid = (gt != 0) # if neutral gender, set valid = 0 gt = F.one_hot((gt.long()), num_classes=3)[:,1:].float() loss = self.bce_loss(out, gt) loss = loss[valid] return loss.mean() class SdfDPLoss(nn.Module): def __init__(self): super(SdfDPLoss, self).__init__() def forward(self, sdf, cloth_meshes_unposed, smpl_cloth_idx, smpl_cloth_valid, cloth_idx, sdf_thresh, dist_thresh, v_template): batch_size = sdf.shape[0] cloth_type = cfg.cloth_types[cloth_idx[0]-1] loss_list = [] for bid in range(batch_size): smpl_mask = smpl_cloth_valid[bid] > 0 smpl_verts = v_template[bid][smpl_mask[:,None].repeat(1,3)].view(-1,3) cloth_verts = cloth_meshes_unposed[bid] if smpl_verts.shape[0] > 0: dists = torch.sqrt(torch.sum((smpl_verts[None,:,:] - cloth_verts[:,None,:])**2,2)) else: loss_list.append(torch.zeros((1)).mean().float().cuda()) continue # remove too closest query points dists[dists 0) * (dists < dist_thresh) loss_neg = torch.abs(sdf[bid,:] - sdf_thresh) * (sum([target_cloth_idx == idx for idx in cloth_idx]) == 0) * (dists < dist_thresh) cloth_exist = (sum([target_cloth_idx == idx for idx in cloth_idx]) > 0).sum() > 0 loss = (loss_pos + loss_neg).mean() * cloth_exist loss_list.append(loss) loss = torch.stack(loss_list) return loss class RegLoss(nn.Module): def __init__(self): super(RegLoss, self).__init__() self.l2_loss = nn.MSELoss(reduction='none') def forward(self, param, valid): zeros = torch.zeros_like(param).cuda() loss = self.l2_loss(param, zeros) * valid[:,None] return loss.mean() class SdfParseLoss(nn.Module): def __init__(self): super(SdfParseLoss, self).__init__() def forward(self, sdf, cloth_meshes, parse_gt, sdf_thresh, cloth_meshes_unposed, parse_valid, dist_thresh, v_template): batch_size = sdf.shape[0] inf = 9999 # mask invalid xy coordinatets x, y = cloth_meshes[:,:,0].long(), cloth_meshes[:,:,1].long() idx = y * cfg.input_img_shape[1] + x is_valid = (x >= 0) * (x < cfg.input_img_shape[1]) * (y >= 0) * (y < cfg.input_img_shape[0]) idx[is_valid == 0] = 0 # minimum sdf min_sdf = sdf * is_valid.float() + inf * (1 - is_valid.float()) parse_out_min = torch.ones((batch_size, cfg.input_img_shape[0] * cfg.input_img_shape[1])).float().cuda() * inf # maximum sdf max_sdf = sdf * is_valid.float() - inf * (1 - is_valid.float()) parse_out_max = torch.ones((batch_size, cfg.input_img_shape[0] * cfg.input_img_shape[1])).float().cuda() * -inf try: parse_out_min, _ = scatter_min(min_sdf, idx, 1, parse_out_min) parse_out_max, _ = scatter_max(max_sdf, idx, 1, parse_out_max) except: # some GPUs have trouble in torch_scatter, compute in CPU idx = idx.cpu() min_sdf, max_sdf = min_sdf.cpu(), max_sdf.cpu() parse_out_min, parse_out_max = parse_out_min.cpu(), parse_out_max.cpu() parse_out_min, _ = scatter_min(min_sdf, idx, 1, parse_out_min) parse_out_max, _ = scatter_max(max_sdf, idx, 1, parse_out_max) parse_out_min, parse_out_max = parse_out_min.cuda(), parse_out_max.cuda() parse_out_min = parse_out_min.view(batch_size, cfg.input_img_shape[0], cfg.input_img_shape[1]) parse_out_min[parse_out_min == inf] = 0 parse_out_max = parse_out_max.view(batch_size, cfg.input_img_shape[0], cfg.input_img_shape[1]) parse_out_max[parse_out_max == -inf] = sdf_thresh loss_pos = torch.abs(parse_out_min) * (parse_gt == 1) * parse_valid loss_neg = torch.abs(parse_out_max - sdf_thresh) * (parse_gt == 0) * parse_valid loss = loss_pos.mean((1,2)) + loss_neg.mean((1,2)) cloth_exist = (parse_gt == 1).sum((1,2)) > 0 loss = loss * cloth_exist return loss ================================================ FILE: common/nets/module.py ================================================ import torch import torch.nn as nn from torch.nn import functional as F from nets.layer import make_linear_layers, make_conv_layers, make_deconv_layers from utils.human_models import smpl from config import cfg class ClothNet(nn.Module): def __init__(self): super(ClothNet, self).__init__() input_feat_dim = 2048 if 'uppercloth' in cfg.cloth_types: self.z_cut_uppercloth = make_linear_layers([input_feat_dim,6], relu_final=False) self.z_style_uppercloth = make_linear_layers([input_feat_dim,12], relu_final=False) if 'coat' in cfg.cloth_types: self.z_cut_coat = make_linear_layers([input_feat_dim,6], relu_final=False) self.z_style_coat = make_linear_layers([input_feat_dim,12], relu_final=False) if 'pants' in cfg.cloth_types: self.z_cut_pants = make_linear_layers([input_feat_dim,6], relu_final=False) self.z_style_pants = make_linear_layers([input_feat_dim,12], relu_final=False) if 'skirts' in cfg.cloth_types: self.z_cut_skirts = make_linear_layers([input_feat_dim,6], relu_final=False) self.z_style_skirts = make_linear_layers([input_feat_dim,12], relu_final=False) if 'hair' in cfg.cloth_types: self.z_cut_hair = make_linear_layers([input_feat_dim,6], relu_final=False) self.z_style_hair = make_linear_layers([input_feat_dim,12], relu_final=False) if 'shoes' in cfg.cloth_types: self.z_style_shoes = make_linear_layers([input_feat_dim,4], relu_final=False) self.cloth_cls_layer = make_linear_layers([input_feat_dim, len(cfg.cloth_types)], relu_final=False) self.gender_cls_layer = make_linear_layers([input_feat_dim, 2], relu_final=False) def forward(self, img_feat): batch_size = img_feat.shape[0] img_feat = img_feat.mean((2,3)) z_cuts, z_styles = [], [] for cloth_type in cfg.cloth_types: if cloth_type == 'uppercloth': z_cuts.append(self.z_cut_uppercloth(img_feat)) z_styles.append(self.z_style_uppercloth(img_feat)) elif cloth_type == 'coat': z_cuts.append(self.z_cut_coat(img_feat)) z_styles.append(self.z_style_coat(img_feat)) elif cloth_type == 'pants': z_cuts.append(self.z_cut_pants(img_feat)) z_styles.append(self.z_style_pants(img_feat)) elif cloth_type == 'skirts': z_cuts.append(self.z_cut_skirts(img_feat)) z_styles.append(self.z_style_skirts(img_feat)) elif cloth_type == 'hair': z_cuts.append(self.z_cut_hair(img_feat)) z_styles.append(self.z_style_hair(img_feat)) elif cloth_type == 'shoes': z_cuts.append(torch.zeros((batch_size,0)).float().cuda()) z_styles.append(self.z_style_shoes(img_feat)) scores = self.cloth_cls_layer(img_feat) scores = torch.sigmoid(scores) genders = self.gender_cls_layer(img_feat) genders = F.softmax(genders, dim=-1) return genders, scores, z_cuts, z_styles ================================================ FILE: common/nets/resnet.py ================================================ import torch import torch.nn as nn from torchvision.models.resnet import BasicBlock, Bottleneck from torchvision.models.resnet import model_urls class ResNetBackbone(nn.Module): def __init__(self, resnet_type): resnet_spec = {18: (BasicBlock, [2, 2, 2, 2], [64, 64, 128, 256, 512], 'resnet18'), 34: (BasicBlock, [3, 4, 6, 3], [64, 64, 128, 256, 512], 'resnet34'), 50: (Bottleneck, [3, 4, 6, 3], [64, 256, 512, 1024, 2048], 'resnet50'), 101: (Bottleneck, [3, 4, 23, 3], [64, 256, 512, 1024, 2048], 'resnet101'), 152: (Bottleneck, [3, 8, 36, 3], [64, 256, 512, 1024, 2048], 'resnet152')} block, layers, channels, name = resnet_spec[resnet_type] self.name = name self.inplanes = 64 super(ResNetBackbone, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) for m in self.modules(): if isinstance(m, nn.Conv2d): # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') nn.init.normal_(m.weight, mean=0, std=0.001) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) return x def init_weights(self): org_resnet = torch.utils.model_zoo.load_url(model_urls[self.name]) # drop orginal resnet fc layer, add 'None' in case of no fc layer, that will raise error org_resnet.pop('fc.weight', None) org_resnet.pop('fc.bias', None) self.load_state_dict(org_resnet) print("Initialize resnet from model zoo") ================================================ FILE: common/timer.py ================================================ # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- import time class Timer(object): """A simple timer.""" def __init__(self): self.total_time = 0. self.calls = 0 self.start_time = 0. self.diff = 0. self.average_time = 0. self.warm_up = 0 def tic(self): # using time.time instead of time.clock because time time.clock # does not normalize for multithreading self.start_time = time.time() def toc(self, average=True): self.diff = time.time() - self.start_time if self.warm_up < 10: self.warm_up += 1 return self.diff else: self.total_time += self.diff self.calls += 1 self.average_time = self.total_time / self.calls if average: return self.average_time else: return self.diff ================================================ FILE: common/utils/SMPLicit/SMPLicit/SMPL.py ================================================ import torch import json import sys import numpy as np from .util_smpl import batch_global_rigid_transformation, batch_rodrigues, reflect_pose import torch.nn as nn import os import trimesh import pickle #from utils.human_models import smpl #from utils.vis import vis_keypoints, vis_mesh, save_obj, vis_parse, vis_dp class SMPL(nn.Module): def __init__(self, model_path, joint_type = 'cocoplus', obj_saveable = False): super(SMPL, self).__init__() if joint_type not in ['cocoplus', 'lsp']: msg = 'unknow joint type: {}, it must be either "cocoplus" or "lsp"'.format(joint_type) sys.exit(msg) self.model_path = model_path self.joint_type = joint_type with open(model_path, 'rb') as reader: model = pickle.load(reader, encoding='latin1') if obj_saveable: self.faces = model['f'] else: self.faces = None np_v_template = np.array(model['v_template'], dtype = np.float) self.register_buffer('v_template', torch.from_numpy(np_v_template).float()) self.size = [np_v_template.shape[0], 3] np_shapedirs = np.array(model['shapedirs'], dtype = np.float)[:,:,:10] self.num_betas = np_shapedirs.shape[-1] np_shapedirs = np.reshape(np_shapedirs, [-1, self.num_betas]).T self.register_buffer('shapedirs', torch.from_numpy(np_shapedirs).float()) np_J_regressor = np.array(model['J_regressor'].toarray().transpose(1,0), dtype = np.float) self.register_buffer('J_regressor', torch.from_numpy(np_J_regressor).float()) np_posedirs = np.array(model['posedirs'], dtype = np.float) num_pose_basis = np_posedirs.shape[-1] np_posedirs = np.reshape(np_posedirs, [-1, num_pose_basis]).T self.register_buffer('posedirs', torch.from_numpy(np_posedirs).float()) self.parents = np.array(model['kintree_table'])[0].astype(np.int32) np_weights = np.array(model['weights'], dtype = np.float) vertex_count = np_weights.shape[0] vertex_component = np_weights.shape[1] self.register_buffer('weight', torch.from_numpy(np_weights).float().reshape(-1, vertex_count, vertex_component)) self.register_buffer('e3', torch.eye(3).float()) self.cur_device = None def save_obj(self, verts, obj_mesh_name): if not self.faces: msg = 'obj not saveable!' sys.exit(msg) with open(obj_mesh_name, 'w') as fp: for v in verts: fp.write( 'v {:f} {:f} {:f}\n'.format( v[0], v[1], v[2]) ) for f in self.faces: # Faces are 1-based, not 0-based in obj files fp.write( 'f {:d} {:d} {:d}\n'.format(f[0] + 1, f[1] + 1, f[2] + 1) ) def forward(self, beta, theta, get_skin = False, theta_in_rodrigues=True): device, dtype = beta.device, beta.dtype self.cur_device = torch.device(device.type, device.index) num_batch = beta.shape[0] v_shaped = torch.matmul(beta, self.shapedirs).view(-1, self.size[0], self.size[1]) + self.v_template Jx = torch.matmul(v_shaped[:, :, 0], self.J_regressor) Jy = torch.matmul(v_shaped[:, :, 1], self.J_regressor) Jz = torch.matmul(v_shaped[:, :, 2], self.J_regressor) J = torch.stack([Jx, Jy, Jz], dim = 2) if theta_in_rodrigues: Rs = batch_rodrigues(theta.view(-1, 3)).view(-1, 24, 3, 3) else: #theta is already rotations Rs = theta.view(-1,24,3,3) pose_feature = (Rs[:, 1:, :, :] - torch.eye(3, dtype=dtype, device=device)).view(-1, 207) v_posed = torch.matmul(pose_feature, self.posedirs).view(-1, self.size[0], self.size[1]) + v_shaped J_transformed, A = batch_global_rigid_transformation(Rs, J, self.parents, rotate_base = False) W=self.weight.expand(num_batch,*self.weight.shape[1:]) T = torch.matmul(W, A.view(num_batch, 24, 16)).view(num_batch, -1, 4, 4) #v_posed_homo = torch.cat([v_posed, torch.ones(num_batch, v_posed.shape[1], 1, device = self.cur_device)], dim = 2) v_posed_homo = torch.cat([v_posed, torch.ones(num_batch, v_posed.shape[1], 1, device=device)], dim = 2) v_homo = torch.matmul(T, torch.unsqueeze(v_posed_homo, -1)) verts = v_homo[:, :, :3, 0] joint_x = torch.matmul(verts[:, :, 0], self.J_regressor) joint_y = torch.matmul(verts[:, :, 1], self.J_regressor) joint_z = torch.matmul(verts[:, :, 2], self.J_regressor) joints = torch.stack([joint_x, joint_y, joint_z], dim = 2) if get_skin: return verts, joints, Rs else: return joints def deform_clothed_smpl(self, theta, J, v_smpl, v_cloth): num_batch = theta.shape[0] device = theta.device self.cur_device = torch.device(device.type, device.index) Rs = batch_rodrigues(theta.view(-1, 3)).view(-1, 24, 3, 3) pose_feature = (Rs[:, 1:, :, :] - torch.eye(3, device=device).float()).view(-1, 207) pose_params = torch.matmul(pose_feature, self.posedirs).view(-1, self.size[0], self.size[1]) v_posed_smpl = pose_params + v_smpl # Calculate closest SMPL vertex for each vertex of the cloth mesh with torch.no_grad(): dists = ((v_smpl.unsqueeze(1) - v_cloth.unsqueeze(2))**2).sum(-1) dists, correspondance = torch.min(dists, 2) # num_batch, v_cloth.shape[1] v_posed_cloth = torch.gather(pose_params, 1, correspondance[:,:,None].repeat(1,1,3)) + v_cloth J_transformed, A = batch_global_rigid_transformation(Rs, J, self.parents, rotate_base = False) W = self.weight.expand(num_batch,*self.weight.shape[1:]) T = torch.matmul(W, A.view(num_batch, 24, 16)).view(num_batch, -1, 4, 4) v_posed_homo_smpl = torch.cat([v_posed_smpl, torch.ones(num_batch, v_posed_smpl.shape[1], 1, device=device)], dim = 2) v_posed_homo_cloth = torch.cat([v_posed_cloth, torch.ones(num_batch, v_posed_cloth.shape[1], 1, device=device)], dim = 2) v_homo_smpl = torch.matmul(T, torch.unsqueeze(v_posed_homo_smpl, -1)) v_homo_cloth = torch.matmul(torch.gather(T, 1, correspondance[:,:,None,None].repeat(1,1,4,4)), torch.unsqueeze(v_posed_homo_cloth, -1)) verts_smpl = v_homo_smpl[:, :, :3, 0] verts_cloth = v_homo_cloth[:, :, :3, 0] return verts_cloth def unpose_and_deform_cloth(self, v_cloth_posed, theta_from, theta_to, beta, Jsmpl, vsmpl, theta_in_rodrigues=True): ### UNPOSE: device = theta_from.device self.cur_device = torch.device(device.type, device.index) num_batch = beta.shape[0] v_shaped = torch.matmul(beta, self.shapedirs).view(-1, self.size[0], self.size[1]) + self.v_template Jx = torch.matmul(v_shaped[:, :, 0], self.J_regressor) Jy = torch.matmul(v_shaped[:, :, 1], self.J_regressor) Jz = torch.matmul(v_shaped[:, :, 2], self.J_regressor) J = torch.stack([Jx, Jy, Jz], dim = 2) if theta_in_rodrigues: Rs = batch_rodrigues(theta_from.view(-1, 3)).view(-1, 24, 3, 3) else: #theta is already rotations Rs = theta_from.view(-1,24,3,3) pose_feature = (Rs[:, 1:, :, :] - torch.eye(3, device=device).float()).view(-1, 207) pose_displ = torch.matmul(pose_feature, self.posedirs).view(-1, self.size[0], self.size[1]) v_posed = pose_displ + v_shaped J_transformed, A = batch_global_rigid_transformation(Rs, J, self.parents, rotate_base = False) W = self.weight.expand(num_batch,*self.weight.shape[1:]) T = torch.matmul(W, A.view(num_batch, 24, 16)).view(num_batch, -1, 4, 4) v_posed_homo = torch.cat([v_posed, torch.ones(num_batch, v_posed.shape[1], 1, device=device)], dim = 2) v_homo = torch.matmul(T, torch.unsqueeze(v_posed_homo, -1)) v_smpl = v_homo[:, :, :3, 0] with torch.no_grad(): dists = ((v_smpl.unsqueeze(1) - v_cloth_posed.unsqueeze(2))**2).sum(-1) dists, correspondance = torch.min(dists, 2) # num_batch, v_cloth_posed.shape[1] invT = torch.inverse(torch.gather(T, 1, correspondance[:,:,None,None].repeat(1,1,4,4)).view(num_batch,-1,4,4)) v = torch.cat([v_cloth_posed, torch.ones(num_batch, v_cloth_posed.shape[1], 1, device=device)], 2) v = torch.matmul(invT, v.unsqueeze(-1))[:,:, :3, 0] unposed_v = v - torch.gather(pose_displ, 1, correspondance[:,:,None].repeat(1,1,3)) ### REPOSE: Rs = batch_rodrigues(theta_to.view(-1, 3)).view(-1, 24, 3, 3) pose_feature = (Rs[:, 1:, :, :] - torch.eye(3, device=device).float()).view(-1, 207) pose_params = torch.matmul(pose_feature, self.posedirs).view(-1, self.size[0], self.size[1]) v_posed_cloth = torch.gather(pose_params,1,correspondance[:,:,None].repeat(1,1,3)) + unposed_v J_transformed, A = batch_global_rigid_transformation(Rs, Jsmpl, self.parents, rotate_base = False) W = self.weight.expand(num_batch,*self.weight.shape[1:]) T = torch.matmul(W, A.view(num_batch, 24, 16)).view(num_batch, -1, 4, 4) v_posed_homo_cloth = torch.cat([v_posed_cloth, torch.ones(num_batch, v_posed_cloth.shape[1], 1, device=device)], dim = 2) v_homo_cloth = torch.matmul(torch.gather(T,1,correspondance[:,:,None,None].repeat(1,1,4,4)), torch.unsqueeze(v_posed_homo_cloth, -1)) verts_cloth = v_homo_cloth[:, :, :3, 0] return verts_cloth def skeleton(self,beta,require_body=False): num_batch = beta.shape[0] v_shaped = torch.matmul(beta, self.shapedirs).view(-1, self.size[0], self.size[1]) + self.v_template Jx = torch.matmul(v_shaped[:, :, 0], self.J_regressor) Jy = torch.matmul(v_shaped[:, :, 1], self.J_regressor) Jz = torch.matmul(v_shaped[:, :, 2], self.J_regressor) J = torch.stack([Jx, Jy, Jz], dim = 2) if require_body: return J, v_shaped else: return J ================================================ FILE: common/utils/SMPLicit/SMPLicit/SMPLicit.py ================================================ import torch import numpy as np import torch.nn as nn import os import os.path as osp import trimesh import math import copy from .SMPL import SMPL from .SMPLicit_options import Options from .smplicit_core_test import Model class SMPLicit(nn.Module): def __init__(self, root_path, cloth_types): super(SMPLicit, self).__init__() self._opt = Options() uppercloth = Model(osp.join(root_path, self._opt.path_checkpoints, 'upperclothes.pth'), self._opt.upperbody_n_z_cut, self._opt.upperbody_n_z_style, self._opt.upperbody_num_clusters, osp.join(root_path, self._opt.path_cluster_files, self._opt.upperbody_clusters), self._opt.upperbody_b_min, self._opt.upperbody_b_max, self._opt.upperbody_resolution, thresh=self._opt.upperbody_thresh_occupancy) coat = Model(osp.join(root_path, self._opt.path_checkpoints, 'upperclothes.pth'), self._opt.upperbody_n_z_cut, self._opt.upperbody_n_z_style, self._opt.upperbody_num_clusters, osp.join(root_path, self._opt.path_cluster_files, self._opt.upperbody_clusters), self._opt.upperbody_b_min, self._opt.upperbody_b_max, self._opt.upperbody_resolution, thresh=self._opt.coat_thresh_occupancy) pants = Model(osp.join(root_path, self._opt.path_checkpoints, 'pants.pth'), self._opt.pants_n_z_cut, self._opt.pants_n_z_style, self._opt.pants_num_clusters, osp.join(root_path, self._opt.path_cluster_files, self._opt.pants_clusters), self._opt.pants_b_min, self._opt.pants_b_max, self._opt.pants_resolution, thresh=self._opt.pants_thresh_occupancy) skirts = Model(osp.join(root_path, self._opt.path_checkpoints, 'skirts.pth'), self._opt.skirts_n_z_cut, self._opt.skirts_n_z_style, self._opt.skirts_num_clusters, osp.join(root_path, self._opt.path_cluster_files, self._opt.skirts_clusters), self._opt.skirts_b_min, self._opt.skirts_b_max, self._opt.skirts_resolution, thresh=self._opt.skirts_thresh_occupancy) hair = Model(osp.join(root_path, self._opt.path_checkpoints, 'hair.pth'), self._opt.hair_n_z_cut, self._opt.hair_n_z_style, self._opt.hair_num_clusters, osp.join(root_path, self._opt.path_cluster_files, self._opt.hair_clusters), self._opt.hair_b_min, self._opt.hair_b_max, self._opt.hair_resolution, thresh=self._opt.hair_thresh_occupancy) shoes = Model(osp.join(root_path, self._opt.path_checkpoints, 'shoes.pth'), self._opt.shoes_n_z_cut, self._opt.shoes_n_z_style, self._opt.shoes_num_clusters, osp.join(root_path, self._opt.path_cluster_files, self._opt.shoes_clusters), self._opt.shoes_b_min, self._opt.shoes_b_max, self._opt.shoes_resolution, thresh=self._opt.shoes_thresh_occupancy) self.models = [] for cloth_type in cloth_types: if cloth_type == 'uppercloth': self.models.append(uppercloth) elif cloth_type == 'coat': self.models.append(coat) elif cloth_type == 'pants': self.models.append(pants) elif cloth_type == 'skirts': self.models.append(skirts) elif cloth_type == 'hair': self.models.append(hair) elif cloth_type == 'shoes': self.models.append(shoes) else: assert 0, 'Not supported cloth type: ' + cloth_type self.cloth_types = cloth_types self.SMPL_Layers = [SMPL(osp.join(root_path, self._opt.path_SMPL, 'SMPL_NEUTRAL.pkl'), obj_saveable=True).cuda(),\ SMPL(osp.join(root_path, self._opt.path_SMPL, 'SMPL_MALE.pkl'), obj_saveable=True).cuda(),\ SMPL(osp.join(root_path, self._opt.path_SMPL, 'SMPL_FEMALE.pkl'), obj_saveable=True).cuda()] self.SMPL_Layer = None self.smpl_faces = self.SMPL_Layers[0].faces Astar_pose = torch.zeros(1, 72).cuda() Astar_pose[0, 5] = 0.04 Astar_pose[0, 8] = -0.04 self.register_buffer('Astar_pose', Astar_pose) # HYPERPARAMETER: Maximum number of points used when reposing. # This takes a lot of memory when finding the closest point in the SMPL so doing it by steps self.step = 1000 def get_right_shoe(self, sdf, unposed_cloth_mesh, do_marching_cube): # when not doing marching cube, mesh only contains vertices without faces if not do_marching_cube: sdf = torch.cat((sdf, sdf),1) # copy sdf rshoe = torch.stack((-unposed_cloth_mesh[:,:,0], unposed_cloth_mesh[:,:,1], unposed_cloth_mesh[:,:,2]),2) unposed_cloth_mesh = torch.cat((unposed_cloth_mesh, rshoe),1) return sdf, unposed_cloth_mesh # when doing marching cube, mesh contains both vertices and faces else: rshoe = np.stack((-unposed_cloth_mesh.vertices[:,0], unposed_cloth_mesh.vertices[:,1], unposed_cloth_mesh.vertices[:,2]),1) vertices = np.concatenate((unposed_cloth_mesh.vertices, rshoe)) faces = np.concatenate((unposed_cloth_mesh.faces, unposed_cloth_mesh.faces[:,::-1] + len(rshoe))) unposed_cloth_mesh = trimesh.Trimesh(vertices, faces) return None, unposed_cloth_mesh def pose_mesh(self, unposed_cloth_mesh, pose, unposed_smpl_joint, unposed_smpl_mesh, do_marching_cube, smooth=True): if not do_marching_cube: iters = math.ceil(unposed_cloth_mesh.shape[1] / self.step) posed_cloth_mesh = [] for i in range(iters): in_verts = unposed_cloth_mesh[:,i*self.step:(i+1)*self.step,:] out_verts = self.SMPL_Layer.deform_clothed_smpl(pose, unposed_smpl_joint, unposed_smpl_mesh, in_verts) posed_cloth_mesh.append(out_verts) posed_cloth_mesh = torch.cat(posed_cloth_mesh,1) return posed_cloth_mesh else: iters = math.ceil(len(unposed_cloth_mesh.vertices) / self.step) for i in range(iters): in_verts = torch.FloatTensor(unposed_cloth_mesh.vertices[None,i*self.step:(i+1)*self.step,:]).cuda() out_verts = self.SMPL_Layer.deform_clothed_smpl(pose, unposed_smpl_joint, unposed_smpl_mesh, in_verts) unposed_cloth_mesh.vertices[i*self.step:(i+1)*self.step] = out_verts.cpu().data.numpy() # replace unposed cloth mesh with posed one posed_cloth_mesh = unposed_cloth_mesh if smooth: posed_cloth_mesh = trimesh.smoothing.filter_laplacian(posed_cloth_mesh, lamb=0.5) return posed_cloth_mesh def pose_mesh_lower_body(self, unposed_cloth_mesh, pose, shape, Astar_pose, unposed_smpl_joint, unposed_smpl_mesh, do_marching_cube, smooth=True): if not do_marching_cube: iters = math.ceil(unposed_cloth_mesh.shape[1] / self.step) posed_cloth_mesh = [] for i in range(iters): in_verts = unposed_cloth_mesh[:,i*self.step:(i+1)*self.step] out_verts = self.SMPL_Layer.unpose_and_deform_cloth(in_verts, Astar_pose, pose, shape, unposed_smpl_joint, unposed_smpl_mesh) posed_cloth_mesh.append(out_verts) posed_cloth_mesh = torch.cat(posed_cloth_mesh,1) return posed_cloth_mesh else: iters = math.ceil(len(unposed_cloth_mesh.vertices) / self.step) for i in range(iters): in_verts = torch.FloatTensor(unposed_cloth_mesh.vertices[None,i*self.step:(i+1)*self.step]).cuda() out_verts = self.SMPL_Layer.unpose_and_deform_cloth(in_verts, Astar_pose, pose, shape, unposed_smpl_joint, unposed_smpl_mesh) unposed_cloth_mesh.vertices[i*self.step:(i+1)*self.step] = out_verts.cpu().data.numpy() # replace unposed cloth mesh with posed one posed_cloth_mesh = unposed_cloth_mesh if smooth: posed_cloth_mesh = trimesh.smoothing.filter_laplacian(posed_cloth_mesh, lamb=0.5) return posed_cloth_mesh def forward(self, z_cuts, z_styles, pose, shape, gender=[0], do_marching_cube=False, valid=None, do_smooth=True): batch_size = pose.shape[0] unposed_smpl_joint, unposed_smpl_mesh = [], [] Astar_smpl_mesh, Astar_smpl_joint = [], [] for i in range(batch_size): SMPL_Layer = self.SMPL_Layers[gender[i]] unposed_smpl_joint_i, unposed_smpl_mesh_i = SMPL_Layer.skeleton(shape[None,i], require_body=True) Astar_smpl_mesh_i, Astar_smpl_joint_i, _ = SMPL_Layer.forward(beta=shape[None,i], theta=self.Astar_pose.repeat(1,1), get_skin=True) unposed_smpl_joint.append(unposed_smpl_joint_i); unposed_smpl_mesh.append(unposed_smpl_mesh_i) Astar_smpl_mesh.append(Astar_smpl_mesh_i); Astar_smpl_joint.append(Astar_smpl_joint_i) unposed_smpl_joint = torch.cat(unposed_smpl_joint); unposed_smpl_mesh = torch.cat(unposed_smpl_mesh) Astar_smpl_mesh = torch.cat(Astar_smpl_mesh); Astar_smpl_joint = torch.cat(Astar_smpl_joint) self.SMPL_Layer = self.SMPL_Layers[gender[0]] out_sdfs = [] out_meshes = [] out_meshes_unposed = [] for i in range(len(self.models)): if ~valid[i]: out_sdfs.append([None]) out_meshes.append([None]) out_meshes_unposed.append([None]) continue if self.cloth_types[i] in ['uppercloth', 'coat']: cloth_type = 'upperbody' else: cloth_type = self.cloth_types[i] resolution = eval(f'self._opt.{cloth_type}_resolution') if self.cloth_types[i] =='coat': is_coat = True else: is_coat = False if not do_marching_cube: resolution = 21 if self.cloth_types[i] == 'pants' or self.cloth_types[i] == 'skirts': # forward network sdf, unposed_cloth_mesh = self.models[i].decode(z_cuts[i], z_styles[i], Astar_smpl_joint, Astar_smpl_mesh, resolution, do_marching_cube, do_smooth) # when not doing marching cube, all unposed_cloth_mesh have the same number of vertices if not do_marching_cube: posed_cloth_mesh = self.pose_mesh_lower_body(unposed_cloth_mesh, pose, shape, self.Astar_pose.repeat(batch_size,1), unposed_smpl_joint, unposed_smpl_mesh, do_marching_cube) # when doing marching cube, unposed_cloth_mesh can have different number of vertices else: posed_cloth_mesh = [] for j in range(len(unposed_cloth_mesh)): if unposed_cloth_mesh[j] is None: posed_cloth_mesh.append(None) continue posed_cloth_mesh.append(self.pose_mesh_lower_body(unposed_cloth_mesh[j], pose[j,None], shape[j,None], self.Astar_pose, unposed_smpl_joint[j,None], unposed_smpl_mesh[j,None], do_marching_cube, do_smooth)) else: # forward network sdf, unposed_cloth_mesh = self.models[i].decode(z_cuts[i], z_styles[i], unposed_smpl_joint, unposed_smpl_mesh, resolution, do_marching_cube, do_smooth, is_coat=is_coat) # when not doing marching cube, all unposed_cloth_mesh have the same number of vertices if not do_marching_cube: if self.cloth_types[i] == 'shoes': # duplicate left shoe sdf, unposed_cloth_mesh = self.get_right_shoe(sdf, unposed_cloth_mesh, do_marching_cube) posed_cloth_mesh = self.pose_mesh(unposed_cloth_mesh, pose, unposed_smpl_joint, unposed_smpl_mesh, do_marching_cube) # when doing marching cube, unposed_cloth_mesh can have different number of vertices else: posed_cloth_mesh = [] for j in range(len(unposed_cloth_mesh)): if unposed_cloth_mesh[j] is None: posed_cloth_mesh.append(None) continue if self.cloth_types[i] == 'shoes': # duplicate left shoe _, unposed_cloth_mesh[j] = self.get_right_shoe(None, unposed_cloth_mesh[j], do_marching_cube) posed_cloth_mesh.append(self.pose_mesh(unposed_cloth_mesh[j], pose[j,None], unposed_smpl_joint[j,None], unposed_smpl_mesh[j,None], do_marching_cube, do_smooth)) out_sdfs.append(sdf) out_meshes.append(posed_cloth_mesh) out_meshes_unposed.append(unposed_cloth_mesh) return out_sdfs, out_meshes, out_meshes_unposed ================================================ FILE: common/utils/SMPLicit/SMPLicit/SMPLicit_options.py ================================================ import torch import os import numpy as np # HUMAN PARSING LABELS: # 1 -> Hat # 2 -> Hair # 3 -> Glove # 4 -> Sunglasses, # 5 -> Upper-Clothes, # 6 -> Dress, # 7 -> Coat, # 8 -> Socks, # 9 -> Pants, # 10 -> Torso-Skin # 11 -> Scarf # 12 -> Skirt # 13 -> Face # 14 -> Left Arm # 15 -> Right Arm # 16 -> Left Leg # 17 -> Right Leg # 18 -> Left Shoe # 19 -> Right Shoe class Options(): def __init__(self): # Upper body options: self.upperbody_loadepoch = 11 self.upperbody_clusters = 'indexs_clusters_tshirt_smpl.npy' self.upperbody_num_clusters = 500 self.upperbody_n_z_cut = 6 self.upperbody_n_z_style = 12 self.upperbody_resolution = 128 self.upperbody_thresh_occupancy = -0.03 self.coat_thresh_occupancy = -0.08 # Pants options: self.pants_loadepoch = 60 self.pants_clusters = 'clusters_lowerbody.npy' self.pants_num_clusters = 500 self.pants_n_z_cut = 6 self.pants_n_z_style = 12 self.pants_resolution = 128 self.pants_thresh_occupancy = -0.02 # Skirts options: self.skirts_loadepoch = 40 self.skirts_clusters = 'clusters_lowerbody.npy' self.skirts_num_clusters = 500 self.skirts_n_z_cut = 6 self.skirts_n_z_style = 12 self.skirts_resolution = 128 self.skirts_thresh_occupancy = -0.05 # Hair options: self.hair_loadepoch = 20000 self.hair_clusters = 'clusters_hairs.npy' self.hair_num_clusters = 500 self.hair_n_z_cut = 6 self.hair_n_z_style = 12 self.hair_resolution = 128 self.hair_thresh_occupancy = -2.0 # Shoes options self.shoes_loadepoch = 20000 self.shoes_clusters = 'clusters_shoes.npy' self.shoes_n_z_cut = 0 self.shoes_n_z_style = 4 self.shoes_resolution = 64 self.shoes_thresh_occupancy = -0.36 self.shoes_num_clusters = 100 # General options: self.path_checkpoints = '../../../../data/base_data/smplicit/checkpoints/' self.path_cluster_files = '../../../../data/base_data/smplicit/clusters/' self.path_SMPL = '../../../../data/base_data/human_models/smpl' self.upperbody_b_min = [-0.8, -0.4, -0.3] self.upperbody_b_max = [0.8, 0.6, 0.3] self.pants_b_min = [-0.3, -1.2, -0.3] self.pants_b_max = [0.3, 0.0, 0.3] self.skirts_b_min = [-0.3, -1.2, -0.3] self.skirts_b_max = [0.3, 0.0, 0.3] self.hair_b_min = [-0.35, -0.42, -0.33] self.hair_b_max = [0.35, 0.68, 0.37] self.shoes_b_min = [-0.1, -1.4, -0.2] self.shoes_b_max = [0.25, -0.6, 0.3] ================================================ FILE: common/utils/SMPLicit/SMPLicit/__init__.py ================================================ name = 'SMPLicit' from .SMPLicit import SMPLicit from .SMPL import SMPL ================================================ FILE: common/utils/SMPLicit/SMPLicit/network.py ================================================ import torch.nn as nn import numpy as np import torchvision import torch import torch.nn.functional as F class Network(nn.Module): def __init__(self, n_z_style=1, point_pos_size=3, output_dim=1, n_z_cut=12): super(Network, self).__init__() self.point_pos_size = point_pos_size self.fc0_cloth = nn.utils.weight_norm(nn.Linear(n_z_style, 128, bias=True)) self.fc1_cloth = nn.utils.weight_norm(nn.Linear(128, 128, bias=True)) self.fc0_query = nn.utils.weight_norm(nn.Conv1d(point_pos_size, 128, kernel_size=1, bias=True)) self.fc1_query = nn.utils.weight_norm(nn.Conv1d(128, 256, kernel_size=1, bias=True)) self.fc0 = nn.utils.weight_norm(nn.Conv1d(128+256 + n_z_cut, 312, kernel_size=1, bias=True)) self.fc1 = nn.utils.weight_norm(nn.Conv1d(312, 312, kernel_size=1, bias=True)) self.fc2 = nn.utils.weight_norm(nn.Conv1d(312, 256, kernel_size=1, bias=True)) self.fc3 = nn.utils.weight_norm(nn.Conv1d(256, 128, kernel_size=1, bias=True)) self.fc4 = nn.utils.weight_norm(nn.Conv1d(128, output_dim, kernel_size=1, bias=True)) self.activation = F.relu def forward(self, z_cut, z_style, query): batch_size = len(z_style) query_num = query.shape[1] x_cloth = self.activation(self.fc0_cloth(z_style)) x_cloth = self.activation(self.fc1_cloth(x_cloth)) x_cloth = x_cloth.unsqueeze(-1).repeat(1, 1, query_num) query = query.reshape(batch_size, query_num, self.point_pos_size).permute(0,2,1) x_query = self.activation(self.fc0_query(query)) x_query = self.activation(self.fc1_query(x_query)) z_cut = z_cut.unsqueeze(-1).repeat(1, 1, query_num) _in = torch.cat((x_cloth, x_query, z_cut), 1) x = self.fc0(_in) x = self.activation(x) x = self.fc1(x) x = self.activation(x) x = self.fc2(x) x = self.activation(x) x = self.fc3(x) x = self.activation(x) x = self.fc4(x) if x.shape[1] == 1: return x[:, 0] else: return x ================================================ FILE: common/utils/SMPLicit/SMPLicit/smplicit_core_test.py ================================================ import torch import numpy as np from .utils.sdf import create_grid, eval_grid, eval_grid_octree from skimage import measure from .network import Network import trimesh class Model(): def __init__(self, filename, n_z_cut, n_z_style, num_clusters, name_clusters, b_min, b_max, resolution, thresh=-0.05): self.filename = filename self.n_z_cut = n_z_cut self.n_z_style = n_z_style self.num_clusters = num_clusters self.clusters = np.load(name_clusters, allow_pickle=True) self.resolution = 128 self.thresh = thresh self.load_networks() def load_networks(self): self._G = Network(n_z_style=self.n_z_style, point_pos_size=self.num_clusters*3, output_dim=1, n_z_cut=self.n_z_cut).cuda() self._G.load_state_dict(torch.load(self.filename)) self._G.eval() def get_bbox(self, joint, mesh): joints_name = ('Pelvis', 'L_Hip', 'R_Hip', 'Torso', 'L_Knee', 'R_Knee', 'Spine', 'L_Ankle', 'R_Ankle', 'Chest', 'L_Toe', 'R_Toe', 'Neck', 'L_Thorax', 'R_Thorax', 'Head', 'L_Shoulder', 'R_Shoulder', 'L_Elbow', 'R_Elbow', 'L_Wrist', 'R_Wrist', 'L_Hand', 'R_Hand') if 'upper' in self.filename: rhand = joint[:,joints_name.index('R_Hand'),:] lhand = joint[:,joints_name.index('L_Hand'),:] xmin = rhand[:,0]; xmax = lhand[:,0]; ycenter = joint[:,joints_name.index('Chest'),1] height = (ycenter - joint[:,joints_name.index('Pelvis'),1])*2*2 ymin = ycenter - height/2; ymax = ycenter + height/2; zcenter = (torch.min(mesh[:,:,2],1)[0] + torch.max(mesh[:,:,2],1)[0]) / 2. depth = (torch.max(mesh[:,:,2],1)[0] - torch.min(mesh[:,:,2],1)[0]) * 1.5 zmin = zcenter - depth/2.; zmax = zcenter + depth/2. b_min = torch.stack((xmin, ymin, zmin),1) b_max = torch.stack((xmax, ymax, zmax),1) elif 'pants' in self.filename: rankle = joint[:,joints_name.index('R_Ankle'),:] lankle = joint[:,joints_name.index('L_Ankle'),:] pelvis = joint[:,joints_name.index('Pelvis'),:] spine1 = joint[:,joints_name.index('Torso'),:] xcenter = pelvis[:,0]; width = (xcenter - rankle[:,0])*2*2.3 xmin = xcenter - width/2; xmax = xcenter + width/2; ycenter = (pelvis[:,1] + rankle[:,1])/2.; height = (pelvis[:,1] - ycenter)*2*1.2 ymin = ycenter - height/2; ymax = ycenter + height/2; zcenter = (torch.min(mesh[:,:,2],1)[0] + torch.max(mesh[:,:,2],1)[0]) / 2. depth = (torch.max(mesh[:,:,2],1)[0] - torch.min(mesh[:,:,2],1)[0]) * 1.5 zmin = zcenter - depth/2; zmax = zcenter + depth/2 b_min = torch.stack((xmin, ymin, zmin),1) b_max = torch.stack((xmax, ymax, zmax),1) elif 'skirt' in self.filename: rankle = joint[:,joints_name.index('R_Ankle'),:] lankle = joint[:,joints_name.index('L_Ankle'),:] pelvis = joint[:,joints_name.index('Pelvis'),:] spine1 = joint[:,joints_name.index('Torso'),:] xcenter = pelvis[:,0]; width = (xcenter - rankle[:,0])*2*3 xmin = xcenter - width/2; xmax = xcenter + width/2; ycenter = (pelvis[:,1] + rankle[:,1])/2.; height = (pelvis[:,1] - ycenter)*2*1.2 ymin = ycenter - height/2; ymax = ycenter + height/2; zcenter = (torch.min(mesh[:,:,2],1)[0] + torch.max(mesh[:,:,2],1)[0]) / 2. depth = (torch.max(mesh[:,:,2],1)[0] - torch.min(mesh[:,:,2],1)[0]) * 2 zmin = zcenter - depth/2; zmax = zcenter + depth/2 b_min = torch.stack((xmin, ymin, zmin),1) b_max = torch.stack((xmax, ymax, zmax),1) elif 'hair' in self.filename: lshoulder = joint[:,joints_name.index('L_Shoulder'),:] rshoulder = joint[:,joints_name.index('R_Shoulder'),:] xcenter = (lshoulder[:,0] + rshoulder[:,0])/2. width = (xcenter - rshoulder[:,0])*2 xmin = xcenter - width/2; xmax = xcenter + width/2; head = joint[:,joints_name.index('Head'),:] ymax = torch.max(mesh[:,:,1],1)[0]; ymin = joint[:,joints_name.index('Spine'),1] ycenter = (ymin+ymax)/2.; height = (ycenter - ymin)*2*1.2 ymin = ycenter - height/2; ymax = ycenter + height/2; zcenter = (torch.min(mesh[:,:,2],1)[0] + torch.max(mesh[:,:,2],1)[0]) / 2. depth = (torch.max(mesh[:,:,2],1)[0] - torch.min(mesh[:,:,2],1)[0]) * 1.2 zmin = zcenter - depth*0.8; zmax = zcenter + depth/2 b_min = torch.stack((xmin, ymin, zmin),1) b_max = torch.stack((xmax, ymax, zmax),1) elif 'shoes' in self.filename: lknee = joint[:,joints_name.index('L_Knee'),:] lankle = joint[:,joints_name.index('L_Ankle'),:] lfoot = joint[:,joints_name.index('L_Toe'),:] xmin = lankle[:,0] - 0.15; xmax = lankle[:,0] + 0.15; ycenter = lankle[:,1] height = (lknee[:,1] - ycenter)*1.5 ymin = ycenter - height/2; ymax = ycenter + height/2; zcenter = (lankle[:,2] + lfoot[:,2])/2. zmin = zcenter - 0.25; zmax = zcenter + 0.25; b_min = torch.stack((xmin, ymin, zmin),1) b_max = torch.stack((xmax, ymax, zmax),1) return b_min, b_max def decode(self, z_cut, z_style, smpl_joint, smpl_mesh, resolution, do_marching_cube, smooth=True, is_coat=False): batch_size = z_cut.shape[0] # prepare query points to predict SDF b_min, b_max = self.get_bbox(smpl_joint, smpl_mesh) query_points = create_grid((resolution, resolution, resolution), b_min, b_max) # sample points in clusters from smpl mesh smpl_points = smpl_mesh[:,self.clusters[self.num_clusters]] # batch_size, smpl_point_num, 3 smpl_point_num = smpl_points.shape[1] def eval_func(query_points, ref_points, z_cut, z_style, scale): dist = query_points[:,:,None,:] - ref_points[:,None,:,:] dist = dist.view(-1, query_points.shape[1], ref_points.shape[1]*3) pred = self._G(z_cut, z_style, dist)*scale return pred if not do_marching_cube: if not smooth: # remove empty 3D space if 'upper' in self.filename: query_points = query_points.view(batch_size, resolution, resolution, resolution, 3) is_empty = torch.zeros((resolution, resolution, resolution)).float().cuda() is_empty[:resolution//4,:resolution//2,:] = 1 # right is_empty[resolution//4*3:,:resolution//2,:] = 1 # left is_empty[resolution//4:resolution//4*3:,resolution//4:resolution//2,:resolution//3] = 1 # back center query_points = query_points[is_empty[None,:,:,:,None].repeat(batch_size,1,1,1,3)==0].view(batch_size,-1,3) query_point_num = query_points.shape[1] # predict SDF sdf = eval_grid(query_points, smpl_points, z_cut, z_style, eval_func, resolution, 1, num_samples=10000) cloth_points = query_points return sdf, cloth_points else: cloth_meshes = [] sdfs = eval_grid(query_points, smpl_points, z_cut, z_style, eval_func, resolution, -100, num_samples=10000) sdfs = sdfs.view(batch_size,resolution,resolution,resolution) for i in range(batch_size): sdf = sdfs[i].cpu().numpy() if 'pant' in self.filename: # pant exception handling (heuristic) sdf[resolution*63//128:resolution*66//128,:resolution*47//64,:] = self.thresh - 0.001 sdf[resolution*62//128:resolution*67//128,:resolution*45//64,:] = self.thresh - 0.001 try: verts, faces, normals, values = measure.marching_cubes(sdf, self.thresh, method='lewiner') cloth_mesh = trimesh.Trimesh(np.float64(verts), faces[:, ::-1]) cloth_mesh.vertices /= resolution cloth_mesh.vertices *= (b_max[i,None].cpu().numpy() - b_min[i,None].cpu().numpy()) cloth_mesh.vertices += b_min[i,None].cpu().numpy() if smooth: smooth_mesh = trimesh.smoothing.filter_laplacian(cloth_mesh, lamb=0.5) if not np.isnan(smooth_mesh.vertices).any(): cloth_mesh = smooth_mesh except ValueError: cloth_mesh = None cloth_meshes.append(cloth_mesh) return None, cloth_meshes ================================================ FILE: common/utils/SMPLicit/SMPLicit/util_smpl.py ================================================ # import h5py import torch import numpy as np import json from torch.autograd import Variable import torch.nn.functional as F import cv2 import math import os # def load_mean_theta(): # mean = np.zeros(85, dtype = np.float) # mean_values = h5py.File(os.path.join(os.path.dirname(__file__),'model/neutral_smpl_mean_params.h5'),'r') # mean_pose = mean_values['pose'] # mean_pose[:3] = 0 # mean_shape = mean_values['shape'] # mean_pose[0]=np.pi # #init sacle is 0.9 # mean[0] = 0.9 # mean[3:75] = mean_pose[:] # mean[75:] = mean_shape[:] # return mean def batch_rodrigues(theta): #theta N x 3 batch_size = theta.shape[0] l1norm = torch.norm(theta + 1e-8, p = 2, dim = 1) angle = torch.unsqueeze(l1norm, -1) normalized = torch.div(theta, angle) angle = angle * 0.5 v_cos = torch.cos(angle) v_sin = torch.sin(angle) quat = torch.cat([v_cos, v_sin * normalized], dim = 1) return quat2mat(quat) def quat2mat(quat): """Convert quaternion coefficients to rotation matrix. Args: quat: size = [B, 4] 4 <===>(w, x, y, z) Returns: Rotation matrix corresponding to the quaternion -- size = [B, 3, 3] """ norm_quat = quat norm_quat = norm_quat/norm_quat.norm(p=2, dim=1, keepdim=True) w, x, y, z = norm_quat[:,0], norm_quat[:,1], norm_quat[:,2], norm_quat[:,3] B = quat.size(0) w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2) wx, wy, wz = w*x, w*y, w*z xy, xz, yz = x*y, x*z, y*z rotMat = torch.stack([w2 + x2 - y2 - z2, 2*xy - 2*wz, 2*wy + 2*xz, 2*wz + 2*xy, w2 - x2 + y2 - z2, 2*yz - 2*wx, 2*xz - 2*wy, 2*wx + 2*yz, w2 - x2 - y2 + z2], dim=1).view(B, 3, 3) return rotMat def batch_global_rigid_transformation(Rs, Js, parent, rotate_base = False): N = Rs.shape[0] if rotate_base: np_rot_x = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]], dtype = np.float) np_rot_x = np.reshape(np.tile(np_rot_x, [N, 1]), [N, 3, 3]) rot_x = Variable(torch.from_numpy(np_rot_x).float()).to(Rs.device) root_rotation = torch.matmul(Rs[:, 0, :, :], rot_x) else: root_rotation = Rs[:, 0, :, :] Js = torch.unsqueeze(Js, -1) def make_A(R, t): R_homo = F.pad(R, [0, 0, 0, 1, 0, 0]) t_homo = torch.cat([t, Variable(torch.ones(N, 1, 1)).to(R.device)], dim = 1) return torch.cat([R_homo, t_homo], 2) A0 = make_A(root_rotation, Js[:, 0]) results = [A0] for i in range(1, parent.shape[0]): j_here = Js[:, i] - Js[:, parent[i]] A_here = make_A(Rs[:, i], j_here) res_here = torch.matmul(results[parent[i]], A_here) results.append(res_here) results = torch.stack(results, dim = 1) new_J = results[:, :, :3, 3] Js_w0 = torch.cat([Js, Variable(torch.zeros(N, 24, 1, 1)).to(Rs.device)], dim = 2) init_bone = torch.matmul(results, Js_w0) init_bone = F.pad(init_bone, [3, 0, 0, 0, 0, 0, 0, 0]) A = results - init_bone return new_J, A def batch_lrotmin(theta): theta = theta[:,3:].contiguous() Rs = batch_rodrigues(theta.view(-1, 3)) print(Rs.shape) e = Variable(torch.eye(3).float()) Rs = Rs.sub(1.0, e) return Rs.view(-1, 23 * 9) def batch_orth_proj(X, camera): ''' X is N x num_points x 3 ''' camera = camera.view(-1, 1, 3) X_trans = X[:, :, :2] + camera[:, :, 1:] shape = X_trans.shape return (camera[:, :, 0] * X_trans.view(shape[0], -1)).view(shape) def reflect_pose(poses): swap_inds = np.array([ 0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11, 15, 16, 17, 12, 13, 14, 18, 19, 20, 24, 25, 26, 21, 22, 23, 27, 28, 29, 33, 34, 35, 30, 31, 32, 36, 37, 38, 42, 43, 44, 39, 40, 41, 45, 46, 47, 51, 52, 53, 48, 49, 50, 57, 58, 59, 54, 55, 56, 63, 64, 65, 60, 61, 62, 69, 70, 71, 66, 67, 68 ]) sign_flip = np.array([ 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1, 1, -1, -1 ]) return poses[swap_inds] * sign_flip ================================================ FILE: common/utils/SMPLicit/SMPLicit/utils/__init__.py ================================================ ================================================ FILE: common/utils/SMPLicit/SMPLicit/utils/sdf.py ================================================ import numpy as np import torch import math def create_grid(resolution, b_min, b_max): batch_size = b_min.shape[0] # make grids res_x, res_y, res_z = resolution #zz,yy,xx = torch.meshgrid(torch.arange(res_z),torch.arange(res_y),torch.arange(res_x)) xx,yy,zz = torch.meshgrid(torch.arange(res_x),torch.arange(res_y),torch.arange(res_z)) coords = torch.stack((xx, yy, zz)) coords = coords.reshape(3, -1).float() coords = coords[None,:,:].repeat(batch_size,1,1).float().cuda() # affine transform coords_matrix = torch.eye(4).view(1,4,4).repeat(batch_size,1,1).float().cuda() length = b_max - b_min coords_matrix[:, 0, 0] = length[:,0] / res_x coords_matrix[:, 1, 1] = length[:,1] / res_y coords_matrix[:, 2, 2] = length[:,2] / res_z coords_matrix[:, 0:3, 3] = b_min coords = torch.bmm(coords_matrix[:, :3, :3], coords) + coords_matrix[:, :3, 3:4] # return grids coords = coords.view(batch_size, 3, -1).transpose(2,1).contiguous() # res_x*res_y*res_z, 3 return coords def batch_eval(query_points, ref_points, z_cut, z_style, eval_func, scale, num_samples): num_pts = query_points.shape[1] num_batches = math.ceil(num_pts / num_samples) sdf = [] for i in range(num_batches): sdf.append(eval_func(query_points[:,i * num_samples:i * num_samples + num_samples,:], ref_points, z_cut, z_style, scale)) sdf = torch.cat(sdf,1) return sdf def eval_grid(query_points, ref_points, z_cut, z_style, eval_func, resolution, scale, num_samples=512 * 512 * 512): sdf = batch_eval(query_points, ref_points, z_cut, z_style, eval_func, scale, num_samples=num_samples) return sdf def eval_grid_octree(query_points, ref_points, z_cut, z_style, eval_func, resolution, init_resolution=64, threshold=0.01, num_samples=512 * 512 * 512): res_x, res_y, res_z = resolution sdf = np.zeros(resolution) dirty = np.ones(resolution, dtype=np.bool) grid_mask = np.zeros(resolution, dtype=np.bool) step_size = res_x // init_resolution while step_size > 0: # subdivide the grid grid_mask[0:res_x:step_size, 0:res_y:step_size, 0:res_z:step_size] = True # test samples in this iteration test_mask = np.logical_and(grid_mask, dirty) points = query_points[torch.from_numpy(test_mask).cuda().reshape(-1)==1,:] sdf[test_mask] = batch_eval(points[None,:,:], ref_points[None,:,:], z_cut[None,:], z_style[None,:], eval_func, num_samples=num_samples).detach().cpu().numpy().reshape(-1) dirty[test_mask] = False # do interpolation if step_size <= 1: break for x in range(0, res_x - step_size, step_size): for y in range(0, res_y - step_size, step_size): for z in range(0, res_z - step_size, step_size): # if center marked, return if not dirty[x + step_size // 2, y + step_size // 2, z + step_size // 2]: continue v0 = sdf[x, y, z] v1 = sdf[x, y, z + step_size] v2 = sdf[x, y + step_size, z] v3 = sdf[x, y + step_size, z + step_size] v4 = sdf[x + step_size, y, z] v5 = sdf[x + step_size, y, z + step_size] v6 = sdf[x + step_size, y + step_size, z] v7 = sdf[x + step_size, y + step_size, z + step_size] v = np.array([v0, v1, v2, v3, v4, v5, v6, v7]) v_min = v.min() v_max = v.max() # this cell is all the same if (v_max - v_min) < threshold: sdf[x:x + step_size, y:y + step_size, z:z + step_size] = (v_max + v_min) / 2 dirty[x:x + step_size, y:y + step_size, z:z + step_size] = False step_size //= 2 return sdf.reshape(resolution) ================================================ FILE: common/utils/dir.py ================================================ import os import sys def make_folder(folder_name): if not os.path.exists(folder_name): os.makedirs(folder_name) def add_pypath(path): if path not in sys.path: sys.path.insert(0, path) ================================================ FILE: common/utils/human_models.py ================================================ import numpy as np import torch import os.path as osp from config import cfg from utils.transforms import transform_joint_to_other_db import smplx class SMPL(object): def __init__(self): self.layer_arg = {'create_body_pose': False, 'create_betas': False, 'create_global_orient': False, 'create_transl': False} self.layer = {'neutral': smplx.create(cfg.human_model_path, 'smpl', gender='NEUTRAL', **self.layer_arg), 'male': smplx.create(cfg.human_model_path, 'smpl', gender='MALE', **self.layer_arg), 'female': smplx.create(cfg.human_model_path, 'smpl', gender='FEMALE', **self.layer_arg)} self.vertex_num = 6890 self.face = self.layer['neutral'].faces self.shape_param_dim = 10 # SMPL joint set self.joint_num = 24 self.joints_name = ('Pelvis', 'L_Hip', 'R_Hip', 'Torso', 'L_Knee', 'R_Knee', 'Spine', 'L_Ankle', 'R_Ankle', 'Chest', 'L_Foot', 'R_Foot', 'Neck', 'L_Collar', 'R_Collar', 'Head', 'L_Shoulder', 'R_Shoulder', 'L_Elbow', 'R_Elbow', 'L_Wrist', 'R_Wrist', 'L_Hand', 'R_Hand') self.flip_pairs = ( (1,2), (4,5), (7,8), (10,11), (13,14), (16,17), (18,19), (20,21), (22,23) ) self.root_joint_idx = self.joints_name.index('Pelvis') self.joint_regressor = self.layer['neutral'].J_regressor.numpy().astype(np.float32) # Astar pose self.Astar_pose = torch.zeros(1, self.joint_num*3) self.Astar_pose[0, 5] = 0.04 self.Astar_pose[0, 8] = -0.04 def get_custom_template_layer(self, v_template, gender): layer_arg = {'create_body_pose': False, 'create_betas': False, 'create_global_orient': False, 'create_transl': False, 'v_template': v_template} layer = smplx.create(cfg.human_model_path, 'smpl', gender=gender.upper(), **layer_arg) return layer smpl = SMPL() ================================================ FILE: common/utils/postprocessing.py ================================================ import os import os.path as osp import numpy as np import torch import cv2 import json import copy from pytorch3d.structures import Meshes from pytorch3d.renderer import RasterizationSettings, MeshRasterizer, TexturesVertex from pytorch3d.renderer.cameras import PerspectiveCameras from pytorch3d.renderer.lighting import AmbientLights, PointLights from pytorch3d.renderer.mesh.shader import BlendParams, HardPhongShader from pytorch3d.renderer.materials import Materials from pytorch3d.renderer.mesh.renderer import MeshRenderer from config import cfg def get_face_map(pix_to_face, faces): face_map = torch.zeros((pix_to_face.shape[0], pix_to_face.shape[1], 3)) - 1 for i in range(pix_to_face.shape[0]): for j in range(pix_to_face.shape[1]): if pix_to_face[i][j] != -1: face_map[i][j] = faces[pix_to_face[i][j]] return face_map class Renderer: def __init__(self, device='cuda', focal=cfg.focal, princpt=cfg.princpt, img_shape=cfg.input_img_shape): self.device = device self.set_renderer(focal, princpt, img_shape) def set_renderer(self, focal, princpt, img_shape, anti_aliasing=False): focal, princpt = torch.FloatTensor(focal)[None,:], torch.FloatTensor(princpt)[None,:] self.img_shape = img_shape self.anti_aliasing = anti_aliasing if self.anti_aliasing: img_shape = (img_shape[0]*2, img_shape[1]*2) princpt *= 2; focal *= 2 img_size = max(img_shape[0], img_shape[1]) raster_settings = RasterizationSettings(image_size=(img_size,img_size), blur_radius=0.0, faces_per_pixel=1, bin_size=0) cameras = PerspectiveCameras(focal_length=focal, \ principal_point=princpt, \ in_ndc=False, \ R=torch.eye(3)[None,:,:], \ T=torch.zeros(3)[None,:], \ image_size=((img_size,img_size),),\ device=torch.device(self.device)) lights = PointLights(device=self.device, location=[[0.0, 0.0, -10.0]]) materials = Materials(ambient_color=((0.92, 0.92, 0.92), ), diffuse_color=((1, 1, 1), ), specular_color=((1, 1, 1), ), shininess=4, device=self.device) blend_params = BlendParams(sigma=1e-1, gamma=1e-4) shader = HardPhongShader(device=self.device, blend_params=blend_params, cameras=cameras, lights=lights, materials=materials) self.rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings).to(self.device) self.renderer = MeshRenderer(rasterizer=self.rasterizer, shader=shader) def rasterize_mesh(self, mesh_vert, mesh_face): output = self.rasterizer(Meshes(verts=[mesh_vert.to(self.device)], faces=[mesh_face.to(self.device)])) face_map = get_face_map(output.pix_to_face.squeeze(), mesh_face) return face_map[:, :cfg.input_img_shape[1]] def render(self, img, mesh_vert, mesh_face): mesh_vert, mesh_face = torch.tensor(mesh_vert), torch.tensor(mesh_face) verts_rgb = torch.ones_like(mesh_vert)[None] textures = TexturesVertex(verts_features=verts_rgb.to(self.device)) output = self.renderer(Meshes(verts=[mesh_vert.to(self.device)], faces=[mesh_face.to(self.device)], textures=textures)) output = (output[0]*255).cpu().numpy() if self.anti_aliasing: img = cv2.resize(img, (self.img_shape[1]*2, self.img_shape[0]*2)) img_shape = (self.img_shape[0]*2, self.img_shape[1]*2) else: img_shape = self.img_shape if img_shape[0] > img_shape[1]: output = output[:, :img_shape[1]] else: output = output[:img_shape[0], :] valid = output[:,:,3] > 0 img[valid] = output[:,:,:3][valid] if self.anti_aliasing: img = cv2.resize(img, (self.img_shape[1], self.img_shape[0])) return img def rasterize_mesh_given_cam_param(mesh_vert, mesh_face, focal, princpt): device = 'cuda' raster_settings = RasterizationSettings(image_size=(cfg.input_img_shape[0],cfg.input_img_shape[0]), blur_radius=0.0, faces_per_pixel=1) cameras = PerspectiveCameras(focal_length=torch.FloatTensor([focal[0],focal[1]])[None,:], \ principal_point=torch.FloatTensor([princpt[0],princpt[1]])[None,:], \ in_ndc=False, \ R=torch.eye(3)[None,:,:], \ T=torch.zeros(3)[None,:], \ image_size=((cfg.input_img_shape[0],cfg.input_img_shape[0]),),\ device=torch.device(device)) rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings).to(device) output = rasterizer(Meshes(verts=[mesh_vert.to(device)], faces=[mesh_face.to(device)])) face_map = get_face_map(output.pix_to_face.squeeze(), mesh_face) return face_map[:, :cfg.input_img_shape[1]] def save_proj_faces(face_map, save_path): face_map = face_map.reshape(-1, 3) file = open(save_path, 'w') for idx, v in enumerate(face_map): file.write('%d %d %d\n' % (v[0], v[1], v[2])) file.close() def merge_mesh(verts, faces): vert_len = [0] for vert in verts: vert_len.append(len(vert)) vert_len = np.cumsum(vert_len) for i, face in enumerate(faces): face += vert_len[i] return np.concatenate(verts), np.concatenate(faces) def read_valid_point(verts, indexs, valid): valid_verts = [] for i, val in enumerate(valid): if val != 0: idx1, idx2, idx3 = indexs[i] v = (verts[idx1] + verts[idx2] + verts[idx3]) / 3 valid_verts.append(v) valid_verts = np.stack(valid_verts) return valid_verts def pa_mpjpe(predicted, target): """ Pose error: MPJPE after rigid alignment (scale, rotation, and translation), often referred to as "Protocol #2" in many papers. """ assert predicted.shape == target.shape muX = np.mean(target, axis=1, keepdims=True) muY = np.mean(predicted, axis=1, keepdims=True) X0 = target - muX Y0 = predicted - muY normX = np.sqrt(np.sum(X0**2, axis=(1, 2), keepdims=True)) normY = np.sqrt(np.sum(Y0**2, axis=(1, 2), keepdims=True)) X0 /= normX Y0 /= normY H = np.matmul(X0.transpose(0, 2, 1), Y0) U, s, Vt = np.linalg.svd(H) V = Vt.transpose(0, 2, 1) R = np.matmul(V, U.transpose(0, 2, 1)) # Avoid improper rotations (reflections), i.e. rotations with det(R) = -1 sign_detR = np.sign(np.expand_dims(np.linalg.det(R), axis=1)) V[:, :, -1] *= sign_detR s[:, -1] *= sign_detR.flatten() R = np.matmul(V, U.transpose(0, 2, 1)) # Rotation tr = np.expand_dims(np.sum(s, axis=1, keepdims=True), axis=2) a = tr * normX / normY # Scale t = muX - a*np.matmul(muY, R) # Translation return a, R, t def pairwise_distances(a, b, p=2, inv=False, num_samples=500): if not inv: tmp = a; a = b; b= tmp a = torch.tensor(a[None, :, :]).cuda() b = torch.tensor(b[None, :, :]).cuda() num_batches = a.shape[1] // num_samples dists = [] for i in range(num_batches): dist = torch.norm((a[:,i*num_samples : (i+1)*num_samples, None, :] - b[:, None, :, :]),p=2,dim=3) dist, _ = torch.min(dist, 2) dist = dist.reshape(-1) dists.append(dist) if a.shape[1] % num_samples > 0: dist = torch.norm((a[:,-1 * (a.shape[1] % num_samples):, None, :] - b[:, None, :, :]),p=2,dim=3) dist, _ = torch.min(dist, 2) dist = dist.reshape(-1) dists.append(dist) dist= torch.cat(dists).mean().cpu() return dist renderer = Renderer() ================================================ FILE: common/utils/preprocessing.py ================================================ import numpy as np import cv2 import random from config import cfg import math from utils.human_models import smpl from utils.transforms import cam2pixel, transform_joint_to_other_db import torch def load_img(path, order='RGB'): img = cv2.imread(path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if not isinstance(img, np.ndarray): raise IOError("Fail to read %s" % path) if order=='RGB': img = img[:,:,::-1].copy() img = img.astype(np.float32) return img def get_bbox(joint_img, joint_valid, extend_ratio=1.2): x_img, y_img = joint_img[:,0], joint_img[:,1] x_img = x_img[joint_valid==1]; y_img = y_img[joint_valid==1]; xmin = min(x_img); ymin = min(y_img); xmax = max(x_img); ymax = max(y_img); x_center = (xmin+xmax)/2.; width = xmax-xmin; xmin = x_center - 0.5 * width * extend_ratio xmax = x_center + 0.5 * width * extend_ratio y_center = (ymin+ymax)/2.; height = ymax-ymin; ymin = y_center - 0.5 * height * extend_ratio ymax = y_center + 0.5 * height * extend_ratio bbox = np.array([xmin, ymin, xmax - xmin, ymax - ymin]).astype(np.float32) return bbox def process_bbox(bbox, img_width, img_height): # sanitize bboxes x, y, w, h = bbox x1 = np.max((0, x)) y1 = np.max((0, y)) x2 = np.min((img_width - 1, x1 + np.max((0, w - 1)))) y2 = np.min((img_height - 1, y1 + np.max((0, h - 1)))) if w*h > 0 and x2 > x1 and y2 > y1: bbox = np.array([x1, y1, x2-x1, y2-y1]) else: return None # aspect ratio preserving bbox w = bbox[2] h = bbox[3] c_x = bbox[0] + w/2. c_y = bbox[1] + h/2. aspect_ratio = cfg.input_img_shape[1]/cfg.input_img_shape[0] if w > aspect_ratio * h: h = w / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio bbox[2] = w*1.25 bbox[3] = h*1.25 bbox[0] = c_x - bbox[2]/2. bbox[1] = c_y - bbox[3]/2. bbox = bbox.astype(np.float32) return bbox def convert_focal_princpt(focal, princpt, img2bb_trans): focal = np.array([[focal[0], 0], [0, focal[1]], [0, 0]]) princpt = np.array([[princpt[0], 0], [0, princpt[1]], [1, 1]]) focal = np.dot(img2bb_trans, focal) princpt = np.dot(img2bb_trans, princpt) cam_param = np.array([focal[0][0], focal[1][1], princpt[0][0], princpt[1][1]]) return cam_param def get_aug_config(): scale_factor = 0.25 rot_factor = 30 color_factor = 0.2 scale = np.clip(np.random.randn(), -1.0, 1.0) * scale_factor + 1.0 rot = np.clip(np.random.randn(), -2.0, 2.0) * rot_factor if random.random() <= 0.6 else 0 c_up = 1.0 + color_factor c_low = 1.0 - color_factor color_scale = np.array([random.uniform(c_low, c_up), random.uniform(c_low, c_up), random.uniform(c_low, c_up)]) do_flip = False return scale, rot, color_scale, do_flip def augmentation(img, bbox, data_split): if data_split == 'train': scale, rot, color_scale, do_flip = get_aug_config() else: scale, rot, color_scale, do_flip = 1.0, 0.0, np.array([1,1,1]), False img, valid_mask, trans, inv_trans = generate_patch_image(img, bbox, scale, rot, do_flip, cfg.input_img_shape) img = np.clip(img * color_scale[None,None,:], 0, 255) return img, valid_mask, trans, inv_trans, rot, do_flip def generate_patch_image(cvimg, bbox, scale, rot, do_flip, out_shape): img = cvimg.copy() img_height, img_width, img_channels = img.shape bb_c_x = float(bbox[0] + 0.5*bbox[2]) bb_c_y = float(bbox[1] + 0.5*bbox[3]) bb_width = float(bbox[2]) bb_height = float(bbox[3]) if do_flip: img = img[:, ::-1, :] bb_c_x = img_width - bb_c_x - 1 trans = gen_trans_from_patch_cv(bb_c_x, bb_c_y, bb_width, bb_height, out_shape[1], out_shape[0], scale, rot) img_patch = cv2.warpAffine(img, trans, (int(out_shape[1]), int(out_shape[0])), flags=cv2.INTER_LINEAR, borderValue=(-1,-1,-1)) valid_mask = (img_patch > -1) if len(valid_mask.shape) == 3: valid_mask = valid_mask[:,:,0] img_patch[img_patch == -1] = 0 img_patch = img_patch.astype(np.float32) inv_trans = gen_trans_from_patch_cv(bb_c_x, bb_c_y, bb_width, bb_height, out_shape[1], out_shape[0], scale, rot, inv=True) return img_patch, valid_mask, trans, inv_trans def rotate_2d(pt_2d, rot_rad): x = pt_2d[0] y = pt_2d[1] sn, cs = np.sin(rot_rad), np.cos(rot_rad) xx = x * cs - y * sn yy = x * sn + y * cs return np.array([xx, yy], dtype=np.float32) def gen_trans_from_patch_cv(c_x, c_y, src_width, src_height, dst_width, dst_height, scale, rot, inv=False): # augment size with scale src_w = src_width * scale src_h = src_height * scale src_center = np.array([c_x, c_y], dtype=np.float32) # augment rotation rot_rad = np.pi * rot / 180 src_downdir = rotate_2d(np.array([0, src_h * 0.5], dtype=np.float32), rot_rad) src_rightdir = rotate_2d(np.array([src_w * 0.5, 0], dtype=np.float32), rot_rad) dst_w = dst_width dst_h = dst_height dst_center = np.array([dst_w * 0.5, dst_h * 0.5], dtype=np.float32) dst_downdir = np.array([0, dst_h * 0.5], dtype=np.float32) dst_rightdir = np.array([dst_w * 0.5, 0], dtype=np.float32) src = np.zeros((3, 2), dtype=np.float32) src[0, :] = src_center src[1, :] = src_center + src_downdir src[2, :] = src_center + src_rightdir dst = np.zeros((3, 2), dtype=np.float32) dst[0, :] = dst_center dst[1, :] = dst_center + dst_downdir dst[2, :] = dst_center + dst_rightdir if inv: trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) else: trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) trans = trans.astype(np.float32) return trans def process_db_coord(joint_img, joint_valid, do_flip, img_shape, flip_pairs, img2bb_trans, rot, src_joints_name, target_joints_name): joint_img, joint_valid = joint_img.copy(), joint_valid.copy() # flip augmentation if do_flip: joint_img[:,0] = img_shape[1] - 1 - joint_img[:,0] for pair in flip_pairs: joint_img[pair[0],:], joint_img[pair[1],:] = joint_img[pair[1],:].copy(), joint_img[pair[0],:].copy() joint_valid[pair[0],:], joint_valid[pair[1],:] = joint_valid[pair[1],:].copy(), joint_valid[pair[0],:].copy() # affine transformation and root-relative depth joint_img_xy1 = np.concatenate((joint_img, np.ones_like(joint_img[:,:1])),1) joint_img = np.dot(img2bb_trans, joint_img_xy1.transpose(1,0)).transpose(1,0) joint_img[:,0] = joint_img[:,0] / cfg.input_img_shape[1] * cfg.output_joint_shape[1] joint_img[:,1] = joint_img[:,1] / cfg.input_img_shape[0] * cfg.output_joint_shape[0] # check truncation joint_trunc = joint_valid * ((joint_img[:,0] >= 0) * (joint_img[:,0] < cfg.output_joint_shape[1]) * \ (joint_img[:,1] >= 0) * (joint_img[:,1] < cfg.output_joint_shape[0])).reshape(-1,1).astype(np.float32) # transform joints to target db joints joint_img = transform_joint_to_other_db(joint_img, src_joints_name, target_joints_name) joint_valid = transform_joint_to_other_db(joint_valid, src_joints_name, target_joints_name) joint_trunc = transform_joint_to_other_db(joint_trunc, src_joints_name, target_joints_name) return joint_img, joint_valid, joint_trunc def process_human_model_output(human_model_param, cam_param, do_flip, img_shape, img2bb_trans, rot): pose, shape = human_model_param['pose'], human_model_param['shape'] if 'trans' in human_model_param: trans = human_model_param['trans'] else: trans = [0,0,0] if 'gender' in human_model_param: gender = human_model_param['gender'] else: gender = 'neutral' pose = torch.FloatTensor(pose).view(-1,3); shape = torch.FloatTensor(shape).view(1,-1); # smpl parameters (pose: 72 dimension, shape: 10 dimension) trans = torch.FloatTensor(trans).view(1,-1) # translation vector # apply camera extrinsic (rotation) # merge root pose and camera rotation if 'R' in cam_param: R = np.array(cam_param['R'], dtype=np.float32).reshape(3,3) root_pose = pose[smpl.root_joint_idx,:].numpy() root_pose, _ = cv2.Rodrigues(root_pose) root_pose, _ = cv2.Rodrigues(np.dot(R,root_pose)) pose[smpl.root_joint_idx] = torch.from_numpy(root_pose).view(3) # get mesh and joint coordinates root_pose = pose[smpl.root_joint_idx].view(1,3) body_pose = torch.cat((pose[:smpl.root_joint_idx,:], pose[smpl.root_joint_idx+1:,:])).view(1,-1) output = smpl.layer[gender](betas=shape, body_pose=body_pose, global_orient=root_pose, transl=trans) mesh_coord = output.vertices[0].numpy() joint_coord = np.dot(smpl.joint_regressor, mesh_coord) # apply camera exrinsic (translation) # compenstate rotation (translation from origin to root joint was not cancled) if 'R' in cam_param and 't' in cam_param: R, t = np.array(cam_param['R'], dtype=np.float32).reshape(3,3), np.array(cam_param['t'], dtype=np.float32).reshape(1,3) root_coord = joint_coord[smpl.root_joint_idx,None,:] joint_coord = joint_coord - root_coord + np.dot(R, root_coord.transpose(1,0)).transpose(1,0) + t mesh_coord = mesh_coord - root_coord + np.dot(R, root_coord.transpose(1,0)).transpose(1,0) + t ## so far, data augmentations are not applied yet ## now, project the 3D coordinates to image space and apply data augmentations # 3D data rotation augmentation rot_aug_mat = np.array([[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]], dtype=np.float32) # flip pose parameter (axis-angle) if do_flip: for pair in smpl.flip_pairs: pose[pair[0], :], pose[pair[1], :] = pose[pair[1], :].clone(), pose[pair[0], :].clone() pose[:,1:3] *= -1 # multiply -1 to y and z axis of axis-angle # rotate root pose pose = pose.numpy() root_pose = pose[smpl.root_joint_idx,:] root_pose, _ = cv2.Rodrigues(root_pose) root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat,root_pose)) pose[smpl.root_joint_idx] = root_pose.reshape(3) # return results pose = pose.reshape(-1) # change to mean shape if beta is too far from it shape[(shape.abs() > 3).any(dim=1)] = 0. shape = shape.numpy().reshape(-1) return pose, shape, mesh_coord # data augmentation is not performed on mesh_coord def bilinear_interpolate(im, x, y): x = np.asarray(x) y = np.asarray(y) x0 = np.floor(x).astype(int) x1 = x0 + 1 y0 = np.floor(y).astype(int) y1 = y0 + 1 x0 = np.clip(x0, 0, im.shape[2]-1); x1 = np.clip(x1, 0, im.shape[2]-1); y0 = np.clip(y0, 0, im.shape[1]-1); y1 = np.clip(y1, 0, im.shape[1]-1); Ia = im[:, y0, x0 ] Ib = im[:, y1, x0 ] Ic = im[:, y0, x1 ] Id = im[:, y1, x1 ] wa = (x1-x) * (y1-y) wb = (x1-x) * (y-y0) wc = (x-x0) * (y1-y) wd = (x-x0) * (y-y0) return wa*Ia + wb*Ib + wc*Ic + wd*Id def iou_sil(sil_out, sil_target): intersect = sil_out * sil_target union = (sil_out + sil_target) > 0 if np.sum(union) == 0: return None else: return np.sum(intersect) / np.sum(union) ================================================ FILE: common/utils/transforms.py ================================================ import torch import numpy as np from config import cfg from torch.nn import functional as F import torchgeometry as tgm def cam2pixel(cam_coord, f, c): x = cam_coord[:,0] / cam_coord[:,2] * f[0] + c[0] y = cam_coord[:,1] / cam_coord[:,2] * f[1] + c[1] z = cam_coord[:,2] return np.stack((x,y,z),1) def pixel2cam(pixel_coord, f, c): x = (pixel_coord[:,0] - c[0]) / f[0] * pixel_coord[:,2] y = (pixel_coord[:,1] - c[1]) / f[1] * pixel_coord[:,2] z = pixel_coord[:,2] return np.stack((x,y,z),1) def world2cam(world_coord, R, t): cam_coord = np.dot(R, world_coord.transpose(1,0)).transpose(1,0) + t.reshape(1,3) return cam_coord def transform_joint_to_other_db(src_joint, src_name, dst_name): src_joint_num = len(src_name) dst_joint_num = len(dst_name) new_joint = np.zeros(((dst_joint_num,) + src_joint.shape[1:]), dtype=np.float32) for src_idx in range(len(src_name)): name = src_name[src_idx] if name in dst_name: dst_idx = dst_name.index(name) new_joint[dst_idx] = src_joint[src_idx] return new_joint def rot6d_to_axis_angle(x): batch_size = x.shape[0] x = x.view(-1,3,2) a1 = x[:, :, 0] a2 = x[:, :, 1] b1 = F.normalize(a1) b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1) b3 = torch.cross(b1, b2) rot_mat = torch.stack((b1, b2, b3), dim=-1) # 3x3 rotation matrix rot_mat = torch.cat([rot_mat,torch.zeros((batch_size,3,1)).cuda().float()],2) # 3x4 rotation matrix axis_angle = tgm.rotation_matrix_to_angle_axis(rot_mat).reshape(-1,3) # axis-angle axis_angle[torch.isnan(axis_angle)] = 0.0 return axis_angle def unwrap_xy_to_uv(feat_xy, dp_fg, dp_I, dp_u, dp_v): batch_size, feat_dim, height, width = feat_xy.shape dp_fg = torch.max(dp_fg, 1)[1] # argmax dp_I = torch.max(dp_I, 1)[1] + 1 # argmax. add 1 to make the bkg class dp_I[dp_fg == 0] = 0 # bkg _dp_u, _dp_v = 0, 0 for i in range(cfg.dp_patch_num): mask = (dp_I == (i+1)) # add 1 to make the bkg class _dp_u += dp_u[:,i,:,:] * mask _dp_v += dp_v[:,i,:,:] * mask dp_u, dp_v = _dp_u, _dp_v scatter_src = feat_xy.permute(1,0,2,3).reshape(feat_dim,-1) batch_idx = torch.arange(batch_size)[:,None,None].repeat(1,height,width).view(-1).to(feat_xy.device) #.cuda() _dp_I = dp_I.view(-1) _dp_u = (dp_u.view(-1) * (cfg.output_uv_shape[0]-1)).long() _dp_v = ((1 - dp_v.view(-1)) * (cfg.output_uv_shape[1]-1)).long() # inverse v coordinate following DensePose R-CNN scatter_idx = batch_idx * (cfg.dp_patch_num + 1) * cfg.output_uv_shape[0] * cfg.output_uv_shape[1] + \ _dp_I * cfg.output_uv_shape[0] * cfg.output_uv_shape[1] + \ _dp_u * cfg.output_uv_shape[1] + \ _dp_v is_valid = (_dp_u >= 0) * (_dp_u < cfg.output_uv_shape[0]) * (_dp_v >= 0) * (_dp_v < cfg.output_uv_shape[1]) scatter_src = scatter_src[:,is_valid] scatter_idx = scatter_idx[is_valid] feat_uv = scatter_mean(scatter_src, scatter_idx, 1, dim_size = batch_size * (cfg.dp_patch_num + 1) * cfg.output_uv_shape[0] * cfg.output_uv_shape[1]).view(feat_dim, batch_size, cfg.dp_patch_num + 1, cfg.output_uv_shape[0], cfg.output_uv_shape[1]).permute(1,2,0,3,4)[:,1:,:,:,:] # remove bkg class (cfg.dp_patch_num + 1 -> cfg.dp_patch_num) return feat_uv ================================================ FILE: common/utils/vis.py ================================================ import os import os.path as osp import cv2 import numpy as np import matplotlib.pyplot as plt from utils.human_models import smpl from utils.postprocessing import merge_mesh, renderer from config import cfg def save_result(output, path): verts, faces, colors = [], [], [] verts.append(output['smpl_mesh'].astype(np.float32)) faces.append(smpl.face.astype(np.int32)) colors.append(np.tile(cfg.cloth_colors['smpl_body'], (len(output['smpl_mesh']),1))) for cloth_type in cfg.cloth_types: if output[cloth_type + '_mesh'] is None: continue verts.append(output[cloth_type + '_mesh'].vertices.astype(np.float32)) faces.append(output[cloth_type + '_mesh'].faces.astype(np.int32)) colors.append(np.tile(cfg.cloth_colors[cloth_type], (len(output[cloth_type + '_mesh'].vertices),1))) verts, faces = merge_mesh(verts, faces) colors = np.concatenate(colors) verts[:,:2] *= -1 save_obj_with_color(verts, faces, colors, path) return verts, faces def render_result(verts, faces, img, path): rendered_img = renderer.render(img, verts, faces) cv2.imwrite(path.replace('output.obj', 'render_img.jpg'), rendered_img) def vis_keypoints_with_skeleton(img, kps, kps_lines, kp_thresh=0.4, alpha=1): # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv. cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(kps_lines) + 2)] colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors] # Perform the drawing on a copy of the image, to allow for blending. kp_mask = np.copy(img) # Draw the keypoints. for l in range(len(kps_lines)): i1 = kps_lines[l][0] i2 = kps_lines[l][1] p1 = kps[0, i1].astype(np.int32), kps[1, i1].astype(np.int32) p2 = kps[0, i2].astype(np.int32), kps[1, i2].astype(np.int32) if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh: cv2.line( kp_mask, p1, p2, color=colors[l], thickness=2, lineType=cv2.LINE_AA) if kps[2, i1] > kp_thresh: cv2.circle( kp_mask, p1, radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA) if kps[2, i2] > kp_thresh: cv2.circle( kp_mask, p2, radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA) # Blend the keypoints. return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0) def vis_keypoints(img, kps, alpha=1): # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv. cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(kps) + 2)] colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors] # Perform the drawing on a copy of the image, to allow for blending. kp_mask = np.copy(img) # Draw the keypoints. for i in range(len(kps)): p = kps[i][0].astype(np.int32), kps[i][1].astype(np.int32) cv2.circle(kp_mask, p, radius=3, color=colors[i], thickness=-1, lineType=cv2.LINE_AA) # Blend the keypoints. return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0) def vis_3d_skeleton(kpt_3d, kpt_3d_vis, kps_lines, filename=None): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv. cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(kps_lines) + 2)] colors = [np.array((c[2], c[1], c[0])) for c in colors] for l in range(len(kps_lines)): i1 = kps_lines[l][0] i2 = kps_lines[l][1] x = np.array([kpt_3d[i1,0], kpt_3d[i2,0]]) y = np.array([kpt_3d[i1,1], kpt_3d[i2,1]]) z = np.array([kpt_3d[i1,2], kpt_3d[i2,2]]) if kpt_3d_vis[i1,0] > 0 and kpt_3d_vis[i2,0] > 0: ax.plot(x, z, -y, c=colors[l], linewidth=2) if kpt_3d_vis[i1,0] > 0: ax.scatter(kpt_3d[i1,0], kpt_3d[i1,2], -kpt_3d[i1,1], c=colors[l], marker='o') if kpt_3d_vis[i2,0] > 0: ax.scatter(kpt_3d[i2,0], kpt_3d[i2,2], -kpt_3d[i2,1], c=colors[l], marker='o') x_r = np.array([0, cfg.input_shape[1]], dtype=np.float32) y_r = np.array([0, cfg.input_shape[0]], dtype=np.float32) z_r = np.array([0, 1], dtype=np.float32) if filename is None: ax.set_title('3D vis') else: ax.set_title(filename) ax.set_xlabel('X Label') ax.set_ylabel('Z Label') ax.set_zlabel('Y Label') ax.legend() plt.show() cv2.waitKey(0) def save_obj(v, f, file_name='output.obj'): obj_file = open(file_name, 'w') for i in range(len(v)): obj_file.write('v ' + str(v[i][0]) + ' ' + str(v[i][1]) + ' ' + str(v[i][2]) + '\n') if f is not None: for i in range(len(f)): obj_file.write('f ' + str(f[i][0]+1) + '/' + str(f[i][0]+1) + ' ' + str(f[i][1]+1) + '/' + str(f[i][1]+1) + ' ' + str(f[i][2]+1) + '/' + str(f[i][2]+1) + '\n') obj_file.close() def save_obj_with_color(v, f, c, file_name='output.obj'): obj_file = open(file_name, 'w') for i in range(len(v)): obj_file.write('v ' + str(v[i][0]) + ' ' + str(v[i][1]) + ' ' + str(v[i][2]) + ' ' + str(c[i][0]) + ' ' + str(c[i][1]) + ' ' + str(c[i][2]) + '\n') if f is not None: for i in range(len(f)): obj_file.write('f ' + str(f[i][0]+1) + '/' + str(f[i][0]+1) + ' ' + str(f[i][1]+1) + '/' + str(f[i][1]+1) + ' ' + str(f[i][2]+1) + '/' + str(f[i][2]+1) + '\n') obj_file.close() def vis_parse(img, parse, class_num): # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv. cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, class_num)] colors = [(0,0,0)] + [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors] # add bkg for i in range(class_num+1): img = img * (parse[:,:,None] != i) + np.array(colors[i]).reshape(1,1,3) * (parse[:,:,None] == i) return img def vis_dp(img, dp_u, dp_v, filename): fig = plt.figure() plt.imshow(img[:,:,::-1]) plt.contour(dp_u,10,linewidths=1) plt.contour(dp_v,10,linewidths=1) plt.axis('off') plt.xticks([]) plt.yticks([]) plt.subplots_adjust(left = 0, bottom = 0, right = 1, top = 1, hspace = 0, wspace = 0) plt.savefig(filename, bbox_inches='tight', pad_inches=0) plt.close(fig) return ================================================ FILE: data/DeepFashion2/DeepFashion2.py ================================================ import os import os.path as osp import numpy as np import copy import json import cv2 import torch from pycocotools.coco import COCO import pycocotools.mask as mask_util from utils.human_models import smpl from utils.preprocessing import load_img, process_bbox, augmentation, generate_patch_image, bilinear_interpolate from utils.vis import save_obj, vis_parse from config import cfg class DeepFashion2(torch.utils.data.Dataset): def __init__(self, transform, data_split): self.transform = transform self.data_split = data_split if data_split != 'train': assert 0, "Invalid train mode." self.img_path = osp.join('..', 'data', 'DeepFashion2', 'data') self.annot_path = osp.join('..', 'data', 'DeepFashion2', 'data') self.parse_path = osp.join('..', 'data', 'preprocessed_data', 'parse', 'DeepFashion2') self.preprocessed_path = osp.join('..', 'data', 'preprocessed_data') self.dp_path = osp.join(self.preprocessed_path, 'densepose', 'DeepFashion2') # lip parse set self.parse_set = {'uppercloth': (5,), 'coat': (7,), 'pants': (9,), 'skirts': (12,), 'hair': (2,), 'shoes': (18,19)} self.sampling_stride = 4 # subsampling for training self.datalist = self.load_data() print("Load data: ", len(self.datalist)) def load_data(self): self.img_path = osp.join(self.img_path , 'train', 'image') self.dp_path = osp.join(self.dp_path, 'train') db = COCO(osp.join(self.annot_path, 'DeepFashion2_train.json')) with open(osp.join(self.preprocessed_path, 'gender', 'DeepFashion2_train_gender.json')) as f: genders = json.load(f) with open(osp.join(self.parse_path, 'train_parsing_annotation.json')) as f: parsing_paths = json.load(f) datalist = [] i = 0 for aid in db.anns.keys(): i += 1 if i % self.sampling_stride != 0: continue ann = db.anns[aid] img = db.loadImgs(ann['image_id'])[0] img_path = osp.join(self.img_path, img['file_name']) # bbox bbox = process_bbox(ann['bbox'], img['width'], img['height']) if bbox is None: continue # parse if parsing_paths is not None: if str(aid) in parsing_paths: parse_path = osp.join(self.parse_path, parsing_paths[str(aid)]) else: continue else: parse_path = None # densepose if self.data_split == 'train': try: dp = np.load(osp.join(self.dp_path, str(aid) + '.npz'), allow_pickle=True) except: continue if len(dp['smpl_v_idx']) == 0: continue dp_x = np.array(dp['dp_x'], dtype=np.float32) dp_y = np.array(dp['dp_y'], dtype=np.float32) dp_xy = np.stack((dp_x, dp_y),1) dp_I = np.array(dp['dp_I'], dtype=np.int16) dp_u = np.array(dp['dp_U'], dtype=np.float32) dp_v = np.array(dp['dp_V'], dtype=np.float32) dp_uv = np.stack((dp_u, dp_v),1) smpl_v_idx = np.array(dp['smpl_v_idx'], dtype=np.int32) dp_mask = dp['dp_fg'].item() dp_mask = mask_util.decode(dp_mask) dp_data = {'xy': dp_xy, 'uv': dp_uv, 'I': dp_I, 'smpl_v_idx': smpl_v_idx, 'masks': dp_mask} else: dp_data = None # gender if str(aid) in genders: gender = genders[str(aid)] else: continue data_dict = { 'img_path': img_path, 'ann_id': aid, 'img_shape': (img['height'],img['width']), 'bbox': bbox, 'orig_bbox': ann['bbox'], 'gender': gender, 'parse_path': parse_path, 'dp': dp_data } datalist.append(data_dict) return datalist def __len__(self): return len(self.datalist) def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, img_shape = data['img_path'], data['img_shape'] # image load img = load_img(img_path) # affine transform bbox = data['bbox'] img, valid_mask, img2bb_trans, bb2img_trans, rot, do_flip = augmentation(img, bbox, self.data_split) img = self.transform(img.astype(np.float32))/255. # load parse (cloth segmentation) parse = cv2.imread(data['parse_path']) parse_list = [] for cloth_type in ('fg',) + cfg.cloth_types: # get cloth indexs if cloth_type == 'fg': idxs = np.unique(parse).tolist() idxs.pop(idxs.index(0)) if len(idxs) == 0: parse_fg = np.zeros((cfg.output_parse_shape[0], cfg.output_parse_shape[1])) > 0 continue else: idxs = self.parse_set[cloth_type] # get masking corresponding to a cloth mask = [parse == i for i in idxs] mask = (sum(mask) > 0).astype(np.float32) _, _, _, lip2img_trans = generate_patch_image(mask, data['orig_bbox'], 1.0, 0.0, False, mask.shape) mask = cv2.warpAffine(mask, lip2img_trans, (img_shape[1], img_shape[0]), flags=cv2.INTER_LINEAR) mask = cv2.warpAffine(mask, img2bb_trans, (cfg.input_img_shape[1], cfg.input_img_shape[0]), flags=cv2.INTER_LINEAR) mask = cv2.resize(mask, (cfg.output_parse_shape[1], cfg.output_parse_shape[0])) if cloth_type == 'fg': parse_fg = mask[:,:,0] > 0 else: parse_list.append(mask) parse = np.stack(parse_list)[:,:,:,0] # remove the last dimension (which has 3 channels) is_bkg = (np.prod(parse == 0, 0) == 1) parse = np.argmax(parse, 0) + 1 # add 1 to make the bkg class parse[is_bkg] = 0 parse[valid_mask == 0] = -1 parse_valid = valid_mask # load densepose dp_xy, dp_uv, dp_I, dp_vertex = data['dp']['xy'], data['dp']['uv'], data['dp']['I'], data['dp']['smpl_v_idx'] dp_xy = np.concatenate((dp_xy, np.ones_like(dp_xy[:,:1])),1) dp_xy = np.dot(img2bb_trans, dp_xy.transpose(1,0)).transpose(1,0) dp_I = dp_I - 1 # dp_I is started wtih 1. make it zero-based index. cur_point_num = len(dp_xy) if cur_point_num > cfg.dp_point_num: idxs = np.random.choice(np.arange(cur_point_num), size=cfg.dp_point_num) cur_point_num = cfg.dp_point_num dp_xy = dp_xy[idxs]; dp_uv = dp_uv[idxs]; dp_I = dp_I[idxs]; dp_vertex = dp_vertex[idxs] # match densepose & parse _dp_xy = dp_xy.copy() _dp_xy[:,0] = _dp_xy[:,0] / cfg.input_img_shape[1] * cfg.output_parse_shape[1] _dp_xy[:,1] = _dp_xy[:,1] / cfg.input_img_shape[0] * cfg.output_parse_shape[0] parse_onehot = np.zeros((len(cfg.cloth_types)+1, cfg.output_parse_shape[0], cfg.output_parse_shape[1])) for i in range(len(cfg.cloth_types)+1): parse_onehot[i][parse == i] = 1.0 dp_cloth_idx = np.ones((_dp_xy.shape[0]), np.int16) * -1 dp_cloth_idx[bilinear_interpolate(parse_fg[None,:,:], _dp_xy[:,0], _dp_xy[:,1])[0] > 0.5] = 0 for i in range(len(cfg.cloth_types)): dp_cloth_idx[np.argmax(bilinear_interpolate(parse_onehot, _dp_xy[:,0], _dp_xy[:,1]), 0) == (i+1)] = i+1 smpl_cloth_idx = np.ones((smpl.vertex_num), dtype=np.int16) * -1 smpl_cloth_idx[dp_vertex] = dp_cloth_idx smpl_cloth_valid = (smpl_cloth_idx != -1).astype(np.float32) smpl_patch_idx = np.ones((smpl.vertex_num), dtype=np.int16) * -1 smpl_patch_idx[dp_vertex] = dp_I[dp_I != -1] # remove coat ambiguity if (smpl_cloth_idx==cfg.cloth_types.index('uppercloth')+1).sum() == 0 and (smpl_cloth_idx==cfg.cloth_types.index('coat')+1).sum() > 0: idxs = (smpl_cloth_idx == cfg.cloth_types.index('coat')+1) smpl_cloth_idx[idxs] = cfg.cloth_types.index('uppercloth')+1 # gender if data['gender'] == 'male': gender = 1 elif data['gender'] == 'female': gender = 2 else: gender = 0 # dummy smpl parameter smpl_pose = np.zeros((smpl.joint_num*3,), dtype=np.float32) smpl_shape = np.zeros((smpl.shape_param_dim,), dtype=np.float32) cam_trans = np.zeros((3,), dtype=np.float32) inputs = {'img': img} targets = {'gender': gender, 'parse': parse, 'smpl_cloth_idx': smpl_cloth_idx, 'smpl_patch_idx': smpl_patch_idx} meta_info = {'smpl_cloth_valid': smpl_cloth_valid, 'smpl_pose': smpl_pose, 'smpl_shape': smpl_shape, 'cam_trans': cam_trans} return inputs, targets, meta_info ================================================ FILE: data/MSCOCO/MSCOCO.py ================================================ import os import os.path as osp import numpy as np from config import cfg import copy import json import cv2 import torch from pycocotools.coco import COCO import pycocotools.mask as mask_util from utils.human_models import smpl from utils.preprocessing import load_img, process_bbox, augmentation, generate_patch_image, process_db_coord, process_human_model_output, bilinear_interpolate, iou_sil from utils.vis import save_obj, save_result, render_result class MSCOCO(torch.utils.data.Dataset): def __init__(self, transform, data_split): self.transform = transform self.data_split = data_split self.img_path = osp.join('..', 'data', 'MSCOCO', 'images') self.annot_path = osp.join('..', 'data', 'MSCOCO', 'annotations') self.parse_path = osp.join('..', 'data', 'MSCOCO', 'parses') self.preprocessed_path = osp.join('..', 'data', 'preprocessed_data') self.dp_path = osp.join(self.preprocessed_path, 'densepose', 'MSCOCO') # lip parse set self.parse_set = {'uppercloth': (5,), 'coat': (7,), 'pants': (9,), 'skirts': (12,), 'hair': (2,), 'shoes': (18,19)} self.bcc_dist_threshold = 0.03 self.eval_types = ['upper_body', 'lower_body','non_cloth'] self.datalist = self.load_data() print("Load data: ", len(self.datalist)) def load_data(self): if self.data_split == 'train': db = COCO(osp.join(self.annot_path, 'coco_wholebody_train_v1.0.json')) with open(osp.join(self.preprocessed_path, 'gender', 'MSCOCO_train_gender.json')) as f: genders = json.load(f) with open(osp.join(self.parse_path, 'LIP_trainval_parsing.json')) as f: parsing_paths = json.load(f) else: db = COCO(osp.join(self.annot_path, 'coco_wholebody_val_v1.0.json')) with open(osp.join(self.preprocessed_path, 'gender', 'MSCOCO_val_gender.json')) as f: genders = json.load(f) with open(osp.join(self.preprocessed_path, 'smpl_param', 'MSCOCO_test_Pose2Pose.json')) as f: smpl_params = json.load(f) if cfg.calculate_bcc: with open(osp.join(self.parse_path, 'LIP_trainval_parsing.json')) as f: parsing_paths = json.load(f) with open(osp.join(self.annot_path, 'coco_dp_val.json')) as f: dps = json.load(f) else: parsing_paths = None dps = None datalist = [] for aid in db.anns.keys(): ann = db.anns[aid] img = db.loadImgs(ann['image_id'])[0] if self.data_split == 'train': imgname = osp.join('train2017', img['file_name']) else: imgname = osp.join('val2017', img['file_name']) img_path = osp.join(self.img_path, imgname) if self.data_split == 'train': if ann['iscrowd'] or (ann['num_keypoints'] == 0): continue # bbox bbox = process_bbox(ann['bbox'], img['width'], img['height']) if bbox is None: continue # parse if parsing_paths is not None: if str(aid) in parsing_paths: parse_path = osp.join(self.parse_path, 'TrainVal_parsing_annotations/TrainVal_parsing_annotations/', parsing_paths[str(aid)]) else: continue else: parse_path = None # filter images with few visible joints joint_img = np.array(ann['keypoints'], dtype=np.float32).reshape(-1,3) if np.sum(joint_img[:,2]>0) < 6: continue # densepose if self.data_split == 'train': try: dp = np.load(osp.join(self.dp_path, str(aid) + '.npz'), allow_pickle=True) except: continue if len(dp['smpl_v_idx']) == 0: continue dp_x = np.array(dp['dp_x'], dtype=np.float32) dp_y = np.array(dp['dp_y'], dtype=np.float32) dp_xy = np.stack((dp_x, dp_y),1) dp_I = np.array(dp['dp_I'], dtype=np.int16) dp_u = np.array(dp['dp_U'], dtype=np.float32) dp_v = np.array(dp['dp_V'], dtype=np.float32) dp_uv = np.stack((dp_u, dp_v),1) smpl_v_idx = np.array(dp['smpl_v_idx'], dtype=np.int32) dp_mask = dp['dp_fg'].item() dp_mask = mask_util.decode(dp_mask) dp_data = {'xy': dp_xy, 'uv': dp_uv, 'I': dp_I, 'smpl_v_idx': smpl_v_idx, 'masks': dp_mask} elif cfg.calculate_bcc: if str(aid) in dps: dp = dps[str(aid)] if len(dp['smpl_v_idx']) == 0: continue dp_x = np.array(dp['dp_x'], dtype=np.float32) / 256 * ann['bbox'][2] + ann['bbox'][0] dp_y = np.array(dp['dp_y'], dtype=np.float32) / 256 * ann['bbox'][3] + ann['bbox'][1] dp_xy = np.stack((dp_x, dp_y),1) dp_I = np.array(dp['dp_I'], dtype=np.int16) dp_u = np.array(dp['dp_U'], dtype=np.float32) dp_v = np.array(dp['dp_V'], dtype=np.float32) dp_uv = np.stack((dp_u, dp_v),1) smpl_v_idx = np.array(dp['smpl_v_idx'], dtype=np.int32) dp_mask = mask_util.decode(dp['dp_masks'][0]) dp_data = {'xy': dp_xy, 'uv': dp_uv, 'I': dp_I, 'smpl_v_idx': smpl_v_idx, 'masks': dp_mask} else: continue else: dp_data = None # smpl params if self.data_split == 'test': if str(aid) in smpl_params: smpl_param = smpl_params[str(aid)]['smpl_param'] cam_param = smpl_params[str(aid)]['cam_param'] else: continue else: smpl_param, cam_param = None, None # gender if str(aid) in genders: gender = genders[str(aid)] else: continue data_dict = { 'img_path': img_path, 'ann_id': aid, 'img_shape': (img['height'],img['width']), 'bbox': bbox, 'orig_bbox': ann['bbox'], 'gender': gender, 'parse_path': parse_path, 'dp': dp_data, 'smpl_param': smpl_param, 'cam_param': cam_param } datalist.append(data_dict) return datalist def __len__(self): return len(self.datalist) def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, img_shape = data['img_path'], data['img_shape'] # image load img = load_img(img_path) # affine transform bbox = data['bbox'] img, valid_mask, img2bb_trans, bb2img_trans, rot, do_flip = augmentation(img, bbox, self.data_split) img = self.transform(img.astype(np.float32))/255. if self.data_split == 'train': # load parse (cloth segmentation) parse = cv2.imread(data['parse_path']) parse_list = [] for cloth_type in ('fg',) + cfg.cloth_types: # get cloth indexs if cloth_type == 'fg': idxs = np.unique(parse).tolist() idxs.pop(idxs.index(0)) if len(idxs) == 0: parse_fg = np.zeros((cfg.output_parse_shape[0], cfg.output_parse_shape[1])) > 0 continue else: idxs = self.parse_set[cloth_type] # get masking corresponding to a cloth mask = [parse == i for i in idxs] mask = (sum(mask) > 0).astype(np.float32) _, _, _, lip2img_trans = generate_patch_image(mask, data['orig_bbox'], 1.0, 0.0, False, mask.shape) mask = cv2.warpAffine(mask, lip2img_trans, (img_shape[1], img_shape[0]), flags=cv2.INTER_LINEAR) mask = cv2.warpAffine(mask, img2bb_trans, (cfg.input_img_shape[1], cfg.input_img_shape[0]), flags=cv2.INTER_LINEAR) mask = cv2.resize(mask, (cfg.output_parse_shape[1], cfg.output_parse_shape[0])) if cloth_type == 'fg': parse_fg = mask[:,:,0] > 0 else: parse_list.append(mask) parse = np.stack(parse_list)[:,:,:,0] # parse: (cloths, height, width) is_bkg = (np.prod(parse == 0, 0) == 1) parse = np.argmax(parse, 0) + 1 # add 1 for bkg class parse[is_bkg] = 0 parse[valid_mask == 0 ] = -1 parse_valid = valid_mask # load densepose dp_xy, dp_uv, dp_I, dp_vertex = data['dp']['xy'], data['dp']['uv'], data['dp']['I'], data['dp']['smpl_v_idx'] dp_xy = np.concatenate((dp_xy, np.ones_like(dp_xy[:,:1])),1) dp_xy = np.dot(img2bb_trans, dp_xy.transpose(1,0)).transpose(1,0) dp_I = dp_I - 1 # dp_I is started wtih 1. make it zero-based index. cur_point_num = len(dp_xy) if cur_point_num > cfg.dp_point_num: idxs = np.random.choice(np.arange(cur_point_num), size=cfg.dp_point_num) cur_point_num = cfg.dp_point_num dp_xy = dp_xy[idxs]; dp_uv = dp_uv[idxs]; dp_I = dp_I[idxs]; dp_vertex = dp_vertex[idxs] # match densepose & parse _dp_xy = dp_xy.copy() _dp_xy[:,0] = _dp_xy[:,0] / cfg.input_img_shape[1] * cfg.output_parse_shape[1] _dp_xy[:,1] = _dp_xy[:,1] / cfg.input_img_shape[0] * cfg.output_parse_shape[0] parse_onehot = np.zeros((len(cfg.cloth_types)+1, cfg.output_parse_shape[0], cfg.output_parse_shape[1])) for i in range(len(cfg.cloth_types)+1): parse_onehot[i][parse == i] = 1.0 dp_cloth_idx = np.ones((_dp_xy.shape[0]), np.int16) * -1 dp_cloth_idx[bilinear_interpolate(parse_fg[None,:,:], _dp_xy[:,0], _dp_xy[:,1])[0] > 0.5] = 0 for i in range(len(cfg.cloth_types)): dp_cloth_idx[np.argmax(bilinear_interpolate(parse_onehot, _dp_xy[:,0], _dp_xy[:,1]), 0) == (i+1)] = i+1 smpl_cloth_idx = np.ones((smpl.vertex_num), dtype=np.int16) * -1 smpl_cloth_idx[dp_vertex] = dp_cloth_idx smpl_cloth_valid = (smpl_cloth_idx != -1).astype(np.float32) smpl_patch_idx = np.ones((smpl.vertex_num), dtype=np.int16) * -1 smpl_patch_idx[dp_vertex] = dp_I[dp_I != -1] # remove coat ambiguity if (smpl_cloth_idx==cfg.cloth_types.index('uppercloth')+1).sum() == 0 and (smpl_cloth_idx==cfg.cloth_types.index('coat')+1).sum() > 0: idxs = (smpl_cloth_idx == cfg.cloth_types.index('coat')+1) smpl_cloth_idx[idxs] = cfg.cloth_types.index('uppercloth')+1 # gender if data['gender'] == 'male': gender = 1 elif data['gender'] == 'female': gender = 2 else: gender = 0 # dummy smpl parameter smpl_pose = np.zeros((smpl.joint_num*3,), dtype=np.float32) smpl_shape = np.zeros((smpl.shape_param_dim,), dtype=np.float32) cam_trans = np.zeros((3,), dtype=np.float32) inputs = {'img': img} targets = {'gender': gender, 'parse': parse, 'smpl_cloth_idx': smpl_cloth_idx, 'smpl_patch_idx': smpl_patch_idx} meta_info = {'smpl_cloth_valid': smpl_cloth_valid, 'smpl_pose': smpl_pose, 'smpl_shape': smpl_shape, 'cam_trans': cam_trans} return inputs, targets, meta_info else: # smpl processing smpl_pose, smpl_shape, smpl_mesh = process_human_model_output(data['smpl_param'], data['cam_param'], do_flip, img_shape, img2bb_trans, rot) cam_trans = np.array(data['smpl_param']['trans'], dtype=np.float32) cam_param = np.array([cfg.focal[0], cfg.focal[1], cfg.princpt[0], cfg.princpt[1]]) # gender if data['gender'] == 'male': gender = 1 elif data['gender'] == 'female': gender = 2 else: gender = 0 if cfg.calculate_bcc: # load parse (cloth segmentation) parse = cv2.imread(data['parse_path']) parse_list = [] for cloth_type in ('fg',) + cfg.cloth_types: # get cloth indexs if cloth_type == 'fg': idxs = np.unique(parse).tolist() idxs.pop(idxs.index(0)) if len(idxs) == 0: parse_fg = np.zeros((cfg.output_parse_shape[0], cfg.output_parse_shape[1])) > 0 continue else: idxs = self.parse_set[cloth_type] # get masking corresponding to a cloth mask = [parse == i for i in idxs] mask = (sum(mask) > 0).astype(np.float32) _, _, _, lip2img_trans = generate_patch_image(mask, data['orig_bbox'], 1.0, 0.0, False, mask.shape) mask = cv2.warpAffine(mask, lip2img_trans, (img_shape[1], img_shape[0]), flags=cv2.INTER_LINEAR) mask = cv2.warpAffine(mask, img2bb_trans, (cfg.input_img_shape[1], cfg.input_img_shape[0]), flags=cv2.INTER_LINEAR) mask = cv2.resize(mask, (cfg.output_parse_shape[1], cfg.output_parse_shape[0])) if cloth_type == 'fg': parse_fg = mask[:,:,0] > 0 else: parse_list.append(mask) parse = np.stack(parse_list)[:,:,:,0] # parse: (cloths, height, width) is_bkg = (np.prod(parse == 0, 0) == 1) parse = np.argmax(parse, 0) + 1 # add 1 for bkg class parse[is_bkg] = 0 parse[valid_mask == 0 ] = -1 parse_valid = valid_mask # load densepose dp_xy, dp_uv, dp_I, dp_vertex = data['dp']['xy'], data['dp']['uv'], data['dp']['I'], data['dp']['smpl_v_idx'] dp_xy = np.concatenate((dp_xy, np.ones_like(dp_xy[:,:1])),1) dp_xy = np.dot(img2bb_trans, dp_xy.transpose(1,0)).transpose(1,0) dp_I = dp_I - 1 # dp_I is started wtih 1. make it zero-based index. cur_point_num = len(dp_xy) if cur_point_num > cfg.dp_point_num: idxs = np.random.choice(np.arange(cur_point_num), size=cfg.dp_point_num) cur_point_num = cfg.dp_point_num dp_xy = dp_xy[idxs]; dp_uv = dp_uv[idxs]; dp_I = dp_I[idxs]; dp_vertex = dp_vertex[idxs] # match densepose & parse _dp_xy = dp_xy.copy() _dp_xy[:,0] = _dp_xy[:,0] / cfg.input_img_shape[1] * cfg.output_parse_shape[1] _dp_xy[:,1] = _dp_xy[:,1] / cfg.input_img_shape[0] * cfg.output_parse_shape[0] parse_onehot = np.zeros((len(cfg.cloth_types)+1, cfg.output_parse_shape[0], cfg.output_parse_shape[1])) for i in range(len(cfg.cloth_types)+1): parse_onehot[i][parse == i] = 1.0 dp_cloth_idx = np.ones((_dp_xy.shape[0]), np.int16) * -1 dp_cloth_idx[bilinear_interpolate(parse_fg[None,:,:], _dp_xy[:,0], _dp_xy[:,1])[0] > 0.5] = 0 for i in range(len(cfg.cloth_types)): dp_cloth_idx[np.argmax(bilinear_interpolate(parse_onehot, _dp_xy[:,0], _dp_xy[:,1]), 0) == (i+1)] = i+1 smpl_cloth_idx = np.ones((smpl.vertex_num), dtype=np.int16) * -1 smpl_cloth_idx[dp_vertex] = dp_cloth_idx smpl_cloth_valid = (smpl_cloth_idx != -1).astype(np.float32) smpl_patch_idx = np.ones((smpl.vertex_num), dtype=np.int16) * -1 smpl_patch_idx[dp_vertex] = dp_I[dp_I != -1] # For BCC calculation, T-posed mesh is used. smpl_pose = np.zeros((smpl.joint_num*3,), dtype=np.float32) smpl_shape = np.zeros((smpl.shape_param_dim,), dtype=np.float32) cam_trans = np.zeros((3,), dtype=np.float32) else: smpl_cloth_idx = np.ones((smpl.vertex_num), dtype=np.int16) * -1 smpl_patch_idx = np.ones((smpl.vertex_num), dtype=np.int16) * -1 inputs = {'img': img} targets = {'gender': gender, 'smpl_cloth_idx': smpl_cloth_idx, 'smpl_patch_idx': smpl_patch_idx} meta_info = {'smpl_pose': smpl_pose, 'smpl_shape': smpl_shape, 'cam_trans': cam_trans, 'cam_param': cam_param} return inputs, targets, meta_info def evaluate(self, outs, cur_sample_idx): annots = self.datalist sample_num = len(outs) eval_result = {'bcc_upper':[], 'bcc_lower':[], 'bcc_non_cloth':[]} for n in range(sample_num): annot = annots[cur_sample_idx + n] ann_id = annot['ann_id'] out = outs[n] if cfg.calculate_bcc: for cloth_type in cfg.cloth_types: if out[cloth_type + '_mesh'] is None: out[cloth_type + '_mesh'] = np.array([]) else: out[cloth_type + '_mesh'] = np.array(out[cloth_type + '_mesh'].vertices) total_pred_cloth_verts = np.zeros((len(out['smpl_mesh']),)) gt_smpl_cloth_idx = out['smpl_cloth_idx_target'] for cloth_type in self.eval_types: if cloth_type == 'upper_body': gt_cloth_verts = ((gt_smpl_cloth_idx == 1) |(gt_smpl_cloth_idx == 2)) # uppercloth, coat cloth_idx = 1 elif cloth_type == 'lower_body': gt_cloth_verts = ((gt_smpl_cloth_idx == 3 )| (gt_smpl_cloth_idx == 4)) # pants, skirts cloth_idx = 2 elif cloth_type == 'non_cloth': gt_cloth_verts = (gt_smpl_cloth_idx == 0) # non-cloth gt_idxs = np.where(gt_cloth_verts)[0] if cloth_type == 'upper_body': cloth_verts = torch.cat([torch.from_numpy(out['uppercloth_mesh']).cuda(), torch.from_numpy(out['coat_mesh']).cuda()]) cloth_verts = cloth_verts[::8].float() # subsampling for memory efficiency elif cloth_type == 'lower_body': cloth_verts = torch.cat([torch.from_numpy(out['pants_mesh']).cuda(), torch.from_numpy(out['skirts_mesh']).cuda()]) cloth_verts = cloth_verts[::8].float() # subsampling for memory efficiency smpl_verts = torch.tensor(out['smpl_mesh']).cuda() if cloth_type in ['upper_body', 'lower_body']: if len(cloth_verts) > 0: dists = torch.sqrt(torch.sum((smpl_verts[None,:,:] - cloth_verts[:,None,:])**2,2)) dists = dists.min(0).values pred_verts = (dists < self.bcc_dist_threshold).cpu().numpy() total_pred_cloth_verts[pred_verts] = cloth_idx correct_verts = (pred_verts[gt_idxs] == gt_cloth_verts[gt_idxs]) else: correct_verts = np.zeros_like(gt_cloth_verts[gt_idxs], dtype=bool) elif cloth_type == 'non_cloth': pred_verts = (total_pred_cloth_verts == 0) correct_verts = (pred_verts[gt_idxs] == gt_cloth_verts[gt_idxs]) if len(gt_idxs) == 0: continue if cloth_type == 'upper_body': eval_result[f'bcc_upper'].append(correct_verts.sum()/len(gt_idxs)) elif cloth_type == 'lower_body': eval_result[f'bcc_lower'].append(correct_verts.sum()/len(gt_idxs)) elif cloth_type == 'non_cloth': eval_result[f'bcc_non_cloth'].append(correct_verts.sum()/len(gt_idxs)) return eval_result def print_eval_result(self, eval_result): bcc_upper = np.mean(eval_result['bcc_upper']) bcc_lower = np.mean(eval_result['bcc_lower']) bcc_non_cloth = np.mean(eval_result['bcc_non_cloth']) bcc_average = (bcc_upper + bcc_lower + bcc_non_cloth) / 3 print(">> BCC (upper body) : %.3f"%bcc_upper) print(">> BCC (lower body) : %.3f"%bcc_lower) print(">> BCC (non-cloth) : %.3f"%bcc_non_cloth) print(">> BCC (average) : %.3f"%bcc_average) ================================================ FILE: data/PW3D/PW3D.py ================================================ import os import os.path as osp import numpy as np import torch import cv2 import json import copy import pickle as pkl from pycocotools.coco import COCO from config import cfg from utils.human_models import smpl from utils.preprocessing import load_img, process_bbox, augmentation, process_human_model_output, convert_focal_princpt from utils.postprocessing import renderer, rasterize_mesh_given_cam_param, save_proj_faces, merge_mesh, read_valid_point, pa_mpjpe, pairwise_distances from utils.vis import save_obj, save_result, render_result class PW3D(torch.utils.data.Dataset): def __init__(self, transform, data_split): self.transform = transform self.data_split = data_split self.data_path = osp.join('..', 'data', 'PW3D', 'data') self.sequence_path = osp.join('..', 'data', 'PW3D', 'data', 'sequenceFiles', self.data_split) self.preprocessed_path = osp.join('..', 'data', 'preprocessed_data') self.eval_stride = 25 self.cd_inlier_threshold = 32 self.pw3d_smpl_layers = {} self.pw3d_beta_clothes = {} self.datalist = self.load_data() print(f"Load {self.data_split} data: ", len(self.datalist)) def load_data(self): db = COCO(osp.join(self.data_path, '3DPW_' + self.data_split + '.json')) if self.data_split == 'test': with open(osp.join(self.preprocessed_path, 'smpl_param', '3DPW_test_Pose2Pose.json')) as f: smpl_params = json.load(f) else: smpl_params = None datalist = [] for idx, aid in enumerate(db.anns.keys()): if idx % self.eval_stride != 0: continue ann = db.anns[aid] image_id = ann['image_id'] img = db.loadImgs(image_id)[0] sequence_name = img['sequence'] img_name = img['file_name'] pid = ann['person_id'] img_path = osp.join(self.data_path, 'imageFiles', sequence_name, img_name) bbox = process_bbox(np.array(ann['bbox']), img['width'], img['height']) if bbox is None: continue cam_param_gt = {k: np.array(v, dtype=np.float32) for k,v in img['cam_param'].items()} smpl_param_gt = ann['smpl_param'] gender = smpl_param_gt['gender'] if str(aid) in smpl_params: smpl_param = smpl_params[str(aid)]['smpl_param'] cam_param = smpl_params[str(aid)]['cam_param'] else: assert 0, "SMPL params missed!" # pre-save smpl layers if cfg.calculate_cd: sequence = img_path.split('/')[-2] index = sequence + '_' + str(pid) if index not in self.pw3d_smpl_layers.keys(): data = pkl.load(open(osp.join(self.sequence_path, f'{sequence}.pkl'), 'rb'), encoding='latin1') v_template = data['v_template_clothed'][pid] betas_clothed = data['betas_clothed'][pid][:10] layer = smpl.get_custom_template_layer(v_template, gender) self.pw3d_smpl_layers[index] = layer self.pw3d_beta_clothes[index] = betas_clothed data_dict = {'img_path': img_path, 'ann_id': aid, 'person_id': pid, 'img_shape': (img['height'], img['width']), 'bbox': bbox, 'smpl_param': smpl_param, 'cam_param': cam_param, 'smpl_param_gt': smpl_param_gt, 'cam_param_gt': cam_param_gt} datalist.append(data_dict) return datalist def __len__(self): return len(self.datalist) def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, img_shape = data['img_path'], data['img_shape'] # image load img = load_img(img_path) # affine transform bbox = data['bbox'] img, valid_mask, img2bb_trans, bb2img_trans, rot, do_flip = augmentation(img, bbox, self.data_split) img = self.transform(img.astype(np.float32))/255. # smpl processing smpl_pose, smpl_shape, smpl_mesh = process_human_model_output(data['smpl_param'], data['cam_param'], do_flip, img_shape, img2bb_trans, rot) cam_trans = np.array(data['smpl_param']['trans'], dtype=np.float32) cam_param = np.array([cfg.focal[0], cfg.focal[1], cfg.princpt[0], cfg.princpt[1]]) inputs = {'img': img} targets = {} meta_info = {'smpl_pose': smpl_pose, 'smpl_shape': smpl_shape, 'cam_trans': cam_trans, 'cam_param': cam_param} if cfg.calculate_cd: smpl_pose, smpl_shape, cam_trans, gender = data['smpl_param_gt']['pose'], data['smpl_param_gt']['shape'], data['smpl_param_gt']['trans'], data['smpl_param_gt']['gender'] smpl_pose, smpl_shape, cam_trans = np.array(smpl_pose), np.array(smpl_shape), np.array(cam_trans) cam_param = convert_focal_princpt(data['cam_param_gt']['focal'], data['cam_param_gt']['princpt'], img2bb_trans) smpl_mesh = self.get_clothed_mesh(img_path.split('/')[-2], data['person_id'], smpl_pose, smpl_shape, cam_trans, gender) targets['smpl_mesh'] = smpl_mesh targets['cam_param'] = cam_param return inputs, targets, meta_info def get_clothed_mesh(self, sequence, pid, pose, shape, trans, gender): index = sequence + '_' + str(pid) layer = self.pw3d_smpl_layers[index] betas_clothed = self.pw3d_beta_clothes[index] pose = torch.FloatTensor(pose).view(1,-1); shape = torch.FloatTensor(betas_clothed).view(1,-1); trans = torch.FloatTensor(trans).view(1,-1) output = layer(betas=shape, body_pose=pose[:,3:], global_orient=pose[:,:3], transl=trans) mesh_cam = output.vertices[0].numpy() output = smpl.layer['neutral'](betas=shape, body_pose=pose[:,3:], global_orient=pose[:,:3], transl=trans) unclothed_mesh_cam = output.vertices[0].numpy() trans = np.mean(mesh_cam, axis=0) - np.mean(unclothed_mesh_cam, axis=0) mesh_cam -= trans return mesh_cam def evaluate(self, outs, cur_sample_idx): annots = self.datalist sample_num = len(outs) start_idx = 0 eval_result = {'chamfer_distance': []} for n in range(sample_num): annot = annots[cur_sample_idx + n] ann_id = annot['ann_id'] out = outs[n] # save ouputs for calcuting cd if cfg.calculate_cd: verts_out = []; faces_out = [] verts = out['smpl_mesh'] verts[:,:2] *= -1 verts_out.append(verts) faces_out.append(smpl.face.astype(np.int32)) for cloth_type in cfg.cloth_types: if out[cloth_type + '_mesh'] is None: continue verts = out[cloth_type + '_mesh'].vertices verts[:,:2] *= -1 verts_out.append(verts) faces_out.append(out[cloth_type + '_mesh'].faces.astype(np.int32)) # pred pred_verts, pred_faces = merge_mesh(verts_out, faces_out) pred_faces = renderer.rasterize_mesh(torch.from_numpy(pred_verts).float(), torch.from_numpy(pred_faces)) # gt gt_verts = out['smpl_mesh_target']; gt_faces = smpl.face.astype(np.int32) gt_verts[:,:2] *= -1 gt_faces = rasterize_mesh_given_cam_param(torch.from_numpy(gt_verts).float(), torch.from_numpy(gt_faces), out['cam_param_target'][:2], out['cam_param_target'][2:]) # find valid pixels - exist silhouette pred_faces, gt_faces = pred_faces.numpy().astype(np.int32).reshape(-1, 3), gt_faces.numpy().astype(np.int32).reshape(-1, 3) valid = (pred_faces!=-1).sum(1) * (gt_faces!=-1).sum(1) valid = valid.reshape(-1) # if there are too few valid points, not evaluate if valid.sum() < self.cd_inlier_threshold: continue # set semantically matching pairs paired_pred_verts = read_valid_point(pred_verts, pred_faces, valid) paired_gt_verts = read_valid_point(gt_verts, gt_faces, valid) # rigid alignment a, R, t = pa_mpjpe(np.expand_dims(paired_pred_verts,0), np.expand_dims(paired_gt_verts,0)) pred_verts = (a*np.matmul(pred_verts, R) + t)[0] pred_verts *= 1000; gt_verts *= 1000 # pcu.pairwise_distances is too slow, approximate distance between vertices. dist1 = pairwise_distances(pred_verts, gt_verts) dist2 = pairwise_distances(pred_verts, gt_verts, inv=True) if torch.isinf(dist1) or torch.isinf(dist2): continue chamfer_dist = (dist1 + dist2) / 2 eval_result['chamfer_distance'].append(chamfer_dist) return eval_result def print_eval_result(self, eval_result): print('>> CD: %.2f mm' % np.mean(eval_result['chamfer_distance'])) ================================================ FILE: data/dataset.py ================================================ import random import numpy as np from torch.utils.data.dataset import Dataset from config import cfg class MultipleDatasets(Dataset): def __init__(self, dbs, make_same_len=True): self.dbs = dbs self.db_num = len(self.dbs) self.max_db_data_num = max([len(db) for db in dbs]) self.db_len_cumsum = np.cumsum([len(db) for db in dbs]) self.make_same_len = make_same_len def __len__(self): # all dbs have the same length if self.make_same_len: return self.max_db_data_num * self.db_num # each db has different length else: return sum([len(db) for db in self.dbs]) def __getitem__(self, index): if self.make_same_len: db_idx = index // self.max_db_data_num data_idx = index % self.max_db_data_num if data_idx >= len(self.dbs[db_idx]) * (self.max_db_data_num // len(self.dbs[db_idx])): # last batch: random sampling data_idx = random.randint(0,len(self.dbs[db_idx])-1) else: # before last batch: use modular data_idx = data_idx % len(self.dbs[db_idx]) else: for i in range(self.db_num): if index < self.db_len_cumsum[i]: db_idx = i break if db_idx == 0: data_idx = index else: data_idx = index - self.db_len_cumsum[db_idx-1] return self.dbs[db_idx][data_idx] ================================================ FILE: demo/demo.py ================================================ import sys import os import os.path as osp sys.path.insert(0, osp.join('..', 'main')) from config import cfg import argparse import json import torch from tqdm import tqdm import numpy as np import torch.backends.cudnn as cudnn from torch.nn.parallel.data_parallel import DataParallel import torchvision.transforms as transforms def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=str, default='0', dest='gpu_ids') parser.add_argument('--img_path', type=str, default='./input.jpg') parser.add_argument('--json_path', type=str, default='./pose2pose_result.json') parser.add_argument('--output_dir', type=str, default='./') args = parser.parse_args() if not args.gpu_ids: assert 0, "Please set propoer gpu ids" if '-' in args.gpu_ids: gpus = args.gpu_ids.split('-') gpus[0] = int(gpus[0]) gpus[1] = int(gpus[1]) + 1 args.gpu_ids = ','.join(map(lambda x: str(x), list(range(*gpus)))) return args args = parse_args() cfg.set_args(args.gpu_ids) cudnn.benchmark = True from model import get_model from base import check_data_parallel from utils.preprocessing import load_img, process_bbox, generate_patch_image, process_human_model_output from utils.postprocessing import renderer from utils.vis import save_result, render_result model_path = os.path.join('.', 'snapshot_7.pth.tar') assert osp.exists(model_path), 'Cannot find model at ' + model_path print('Load checkpoint from {}'.format(model_path)) model = get_model('test') model = model.cuda() ckpt = torch.load(model_path) ckpt = check_data_parallel(ckpt['network']) model.load_state_dict(ckpt, strict=False) model.eval() transform = transforms.ToTensor() original_img = load_img(args.img_path) original_height, original_width = original_img.shape[:2] with open(args.json_path, 'r') as f: pose2pose_result = json.load(f) # prepare bbox bbox = [150, 38, 244, 559] bbox = process_bbox(bbox, original_width, original_height) img_numpy, _, img2bb_trans, bb2img_trans = generate_patch_image(original_img, bbox, 1.0, 0.0, False, cfg.input_img_shape) img = transform(img_numpy.astype(np.float32))/255. img = img.cuda()[None,:,:,:] smpl_pose, smpl_shape, smpl_mesh = process_human_model_output(pose2pose_result['smpl_param'], pose2pose_result['cam_param'], False, (original_width, original_height), img2bb_trans, 0.0) cam_trans = np.array(pose2pose_result['smpl_param']['trans'], dtype=np.float32) smpl_pose, smpl_shape, cam_trans = torch.tensor(smpl_pose)[None,:].cuda(), torch.tensor(smpl_shape)[None,:].cuda(), torch.tensor(cam_trans)[None,:].cuda() # forward inputs = {'img': img} targets = {} meta_info = {'smpl_pose': smpl_pose, 'smpl_shape': smpl_shape, 'cam_trans': cam_trans} with torch.no_grad(): out = model(inputs, targets, meta_info, 'test') for k,v in out.items(): if type(v) is torch.Tensor: out[k] = v[0].cpu().numpy() else: out[k] = v[0] mesh_verts, mesh_faces = save_result(out, osp.join(args.output_dir, 'output.obj')) renderer.set_renderer(focal=cfg.focal, princpt=cfg.princpt, img_shape=cfg.input_img_shape, anti_aliasing=True) render_result(mesh_verts, mesh_faces, img_numpy[:,:,::-1], osp.join(args.output_dir, 'render_cropped_img.jpg')) focal = [cfg.focal[0] / cfg.input_img_shape[1] * bbox[2], cfg.focal[1] / cfg.input_img_shape[0] * bbox[3]] princpt = [cfg.princpt[0] / cfg.input_img_shape[1] * bbox[2] + bbox[0], cfg.princpt[1] / cfg.input_img_shape[0] * bbox[3] + bbox[1]] renderer.set_renderer(focal=focal, princpt=princpt, img_shape=(original_height, original_width), anti_aliasing=True) render_result(mesh_verts, mesh_faces, original_img[:,:,::-1], osp.join(args.output_dir, 'render_original_img.jpg')) ================================================ FILE: demo/output.obj ================================================ [File too large to display: 20.5 MB] ================================================ FILE: demo/pose2pose_result.json ================================================ {"smpl_param": {"pose": [-2.9022064208984375, 0.14226926863193512, -0.6876412034034729, -0.37843915820121765, -0.010725350119173527, 0.24553968012332916, -0.0946338027715683, -0.09007483720779419, -0.1379311978816986, 0.3363078832626343, 0.059149932116270065, -0.03379477187991142, 0.5132664442062378, 0.0091248182579875, -0.0329560711979866, 0.5385148525238037, -0.016906920820474625, 0.08675778657197952, 0.0763339102268219, 0.055082399398088455, -0.01292260829359293, 0.006272528320550919, 0.04291265085339546, -0.11267384886741638, -0.1689642071723938, -0.09466277807950974, 0.050063978880643845, -0.1324988752603531, 0.020788447931408882, -0.0006215892499312758, 0.014061491005122662, -0.006473212502896786, 0.004046547692269087, 0.014014238491654396, 0.0007452652789652348, 0.0004654862859752029, 0.05512533709406853, 0.02992839552462101, -0.04127040132880211, -0.17167717218399048, -0.5288858413696289, -0.058588359504938126, -0.036302413791418076, 0.4928983449935913, 0.08087531477212906, -0.15405236184597015, -0.04361267015337944, -0.04372638836503029, -0.33442074060440063, -0.6012314558029175, -0.2605968117713928, 0.026607021689414978, 0.3870042562484741, 0.6654704809188843, -0.12385698407888412, -1.8147525787353516, 0.7356531620025635, 0.14879150688648224, 1.3634296655654907, -0.30997902154922485, -0.10583673417568207, -0.18372346460819244, 0.17793530225753784, -0.17634084820747375, 0.22043076157569885, 0.02467007003724575, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "shape": [-0.04103930667042732, -0.0273419301956892, -0.03568168729543686, -0.05271761119365692, 0.022580627351999283, -0.06175239384174347, -0.017034463584423065, -0.038146961480379105, 0.001704443129710853, -0.040624070912599564], "trans": [-0.10590886324644089, 0.16711418330669403, 40.84169387817383]}, "cam_param": {"focal": [13778.80891164144, 13778.808116912842], "princpt": [267.12500619888306, 316.79998779296875]}} ================================================ FILE: main/config.py ================================================ import os import os.path as osp import sys import numpy as np import torch class Config: ## dataset trainset_3d = [] trainset_2d = ['MSCOCO', 'DeepFashion2'] testset = ['MSCOCO', 'PW3D'] ## model setting resnet_type = 50 input_img_shape = (256, 192) output_parse_shape = (256, 192) output_dp_shape = (64, 48) output_uv_shape = (64, 64) focal = (5000, 5000) # virtual focal lengths princpt = (input_img_shape[1]/2, input_img_shape[0]/2) # virtual principal point position dp_point_num = 196 dp_patch_num = 24 cloth_types = ('uppercloth', 'coat', 'pants', 'skirts', 'shoes') ## training config lr = 1e-4 lr_dec_factor = 10 lr_dec_epoch = [5] end_epoch = 8 train_batch_size = 8 sdf_thresh = {'uppercloth': 0.1, 'coat': 0.1, 'pants': 0.1, 'skirts': 0.1, 'shoes': 0.01} dist_thresh = {'uppercloth': 0.03, 'coat': 0.1, 'pants': 0.03, 'skirts': 0.03, 'shoes': 0.03} min_dist_thresh = {'uppercloth': 0.0, 'coat': 0.03, 'pants': 0.0, 'skirts': 0.0, 'shoes': 0.0} cls_weight = 0.01 gender_weight = 0.01 dp_weight = 1.0 reg_weight = 0.1 cloth_reg_weight = {'uppercloth': 1.0, 'coat': 1.0, 'pants': 1.0, 'skirts': 1.0, 'shoes': 0.1} ## testing config test_batch_size = 1 cls_threshold = 0.25 calculate_cd = False calculate_bcc = False cloth_colors = {'smpl_body':(190,190,190), 'uppercloth':(140,110,160), 'coat':(170,120,60), 'pants':(110,130,100), 'skirts':(90,110,140), 'shoes':(120,60,60)} num_thread = 8 gpu_ids = '0' num_gpus = 1 continue_train = False ## directory cur_dir = osp.dirname(os.path.abspath(__file__)) root_dir = osp.join(cur_dir, '..') data_dir = osp.join(root_dir, 'data') output_dir = osp.join(root_dir, 'output') model_dir = osp.join(output_dir, 'model_dump') vis_dir = osp.join(output_dir, 'vis') os.makedirs(vis_dir, exist_ok=True) log_dir = osp.join(output_dir, 'log') result_dir = osp.join(output_dir, 'result') human_model_path = osp.join(data_dir, 'base_data', 'human_models') smplicit_path = osp.join(root_dir, 'common', 'utils', 'SMPLicit', 'SMPLicit') def set_args(self, gpu_ids, continue_train=False): self.gpu_ids = gpu_ids self.num_gpus = len(self.gpu_ids.split(',')) self.continue_train = continue_train os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = self.gpu_ids print('>>> Using GPU: {}'.format(self.gpu_ids)) if self.num_gpus != 1: assert 0, "Not support DataParallel." cfg = Config() np.random.seed(0) torch.manual_seed(0) sys.path.insert(0, osp.join(cfg.root_dir, 'common')) from utils.dir import add_pypath, make_folder add_pypath(osp.join(cfg.data_dir)) for i in range(len(cfg.trainset_2d)): add_pypath(osp.join(cfg.data_dir, cfg.trainset_2d[i])) for i in range(len(cfg.testset)): add_pypath(osp.join(cfg.data_dir, cfg.testset[i])) make_folder(cfg.model_dir) make_folder(cfg.vis_dir) make_folder(cfg.log_dir) make_folder(cfg.result_dir) ================================================ FILE: main/model.py ================================================ import torch import torch.nn as nn import numpy as np import copy from torch.nn import functional as F from nets.resnet import ResNetBackbone from nets.module import ClothNet from nets.loss import ClothClsLoss, GenderClsLoss, SdfParseLoss, SdfDPLoss, RegLoss from utils.human_models import smpl from config import cfg from utils.SMPLicit import SMPLicit class Model(nn.Module): def __init__(self, backbone, cloth_net, mode): super(Model, self).__init__() self.backbone = backbone self.cloth_net = cloth_net self.mode = mode self.smpl_layer = [copy.deepcopy(smpl.layer['neutral']).cuda(), copy.deepcopy(smpl.layer['male']).cuda(), copy.deepcopy(smpl.layer['female']).cuda()] self.smplicit_layer = SMPLicit.SMPLicit(cfg.smplicit_path, cfg.cloth_types).cuda() if mode == 'train': self.cloth_cls_loss = ClothClsLoss() self.gender_cls_loss = GenderClsLoss() self.sdf_dp_loss = SdfDPLoss() self.reg_loss = RegLoss() self.trainable_modules = [self.backbone, self.cloth_net] def forward(self, inputs, targets, meta_info, mode): batch_size = inputs['img'].shape[0] # feature extract & get cloth parameter img_feat = self.backbone(inputs['img']) pred_genders, pred_scores, z_cuts, z_styles = self.cloth_net(img_feat) # forward SMPL parameters to the SMPL layer smpl_pose = meta_info['smpl_pose'] smpl_shape = meta_info['smpl_shape'] cam_trans = meta_info['cam_trans'] if mode == 'train': # forward cloth & gender parameters to the SMPLicit layer smpl_gender = targets['gender'] sdfs, cloth_meshes, cloth_meshes_unposed = self.smplicit_layer(z_cuts, z_styles, smpl_pose, smpl_shape, smpl_gender, do_marching_cube=(mode=='test'), valid=torch.ones((len(z_cuts),), dtype=torch.bool), do_smooth=False) # loss functions loss = {} loss['cloth_cls'] = cfg.cls_weight * self.cloth_cls_loss(pred_scores, targets['smpl_patch_idx'], targets['smpl_cloth_idx']) loss['gender_cls'] = cfg.cls_weight * self.gender_cls_loss(pred_genders, smpl_gender) loss['sdf_dp'] = 0.0 loss['reg'] = 0.0 z_cut_reg, z_style_reg = 0.0, 0.0 for i in range(len(cfg.cloth_types)): cloth_type = cfg.cloth_types[i] if cloth_type == 'uppercloth': target_cloth_idx = (i+1, cfg.cloth_types.index('coat')+1) else: target_cloth_idx = (i+1,) if cloth_type == 'pants' or cloth_type == 'skirts': body_pose = smpl.Astar_pose.float().cuda().repeat(batch_size,1)[:,3:] else: body_pose = torch.zeros((batch_size,(smpl.joint_num-1)*3)).float().cuda() # DensePose based loss v_template = self.smpl_layer[0](global_orient=torch.zeros((batch_size,3)).float().cuda(), body_pose=body_pose, betas=smpl_shape).vertices loss['sdf_dp'] += cfg.dp_weight * self.sdf_dp_loss(sdfs[i], cloth_meshes_unposed[i], targets['smpl_cloth_idx'], meta_info['smpl_cloth_valid'], target_cloth_idx, cfg.sdf_thresh[cloth_type], cfg.dist_thresh[cloth_type], v_template) # Regularization loss cloth_exist = (sum([targets['smpl_cloth_idx'] == idx for idx in target_cloth_idx]) > 0).sum(1) > 0 if cloth_type != 'shoes': # shoes do not have z_cut z_cut_reg += cfg.cloth_reg_weight[cloth_type] * self.reg_loss(z_cuts[i], cloth_exist) z_style_reg += cfg.cloth_reg_weight[cloth_type] * self.reg_loss(z_styles[i], cloth_exist) loss['reg'] = cfg.reg_weight * (z_cut_reg + z_style_reg) / 2.0 return loss else: pred_clothes = [] pred_gender = [] cloth_meshes = [] for i in range(batch_size): z_cut = []; z_style = [] for j in range(len(cfg.cloth_types)): z_cut.append(z_cuts[j][i][None,:]) z_style.append(z_styles[j][i][None,:]) valid_clothes = pred_scores[i] > cfg.cls_threshold gender = torch.argmax(pred_genders[i])+1 # male:1, female:2 _, cloth_mesh, _ = self.smplicit_layer(z_cut, z_style, smpl_pose[None, i], smpl_shape[None, i], [gender], True, valid=valid_clothes) pred_clothes.append(valid_clothes) cloth_meshes.append(cloth_mesh) pred_gender.append(gender) cloth_meshes = [[i[0] for i in clothmesh] for clothmesh in zip(*cloth_meshes)] # add camera translations for i in range(len(cfg.cloth_types)): for j in range(batch_size): if cloth_meshes[i][j] is not None: cloth_meshes[i][j].vertices += cam_trans[j].detach().cpu().numpy() mesh_cam = self.get_coords(smpl_pose[:,:3], {'shape': smpl_shape, 'pose': smpl_pose[:,3:]}, cam_trans, pred_gender) # output out = {} out['pred_clothes'] = pred_clothes out['pred_gender'] = pred_gender out['smpl_mesh'] = mesh_cam for i,cloth_type in enumerate(cfg.cloth_types): out[cloth_type + '_mesh'] = cloth_meshes[i] for k,v in targets.items(): out[f'{k}_target'] = v return out def get_coords(self, root_pose, params, cam_trans, gender): batch_size = root_pose.shape[0] if self.mode != 'train': mesh_cam = [] for i in range(batch_size): output = self.smpl_layer[gender[i]](betas=params['shape'][None,i], body_pose=params['pose'][None,i], global_orient=root_pose[None,i], transl=cam_trans[None,i]) mesh_cam.append(output.vertices) mesh_cam = torch.cat(mesh_cam, dim=0) else: output = self.smpl_layer[0](betas=params['shape'], body_pose=params['pose'], global_orient=root_pose, transl=cam_trans) mesh_cam = output.vertices return mesh_cam def init_weights(m): if type(m) == nn.ConvTranspose2d: nn.init.normal_(m.weight,std=0.001) elif type(m) == nn.Conv2d: nn.init.normal_(m.weight,std=0.001) nn.init.constant_(m.bias, 0) elif type(m) == nn.BatchNorm2d: nn.init.constant_(m.weight,1) nn.init.constant_(m.bias,0) elif type(m) == nn.Linear: nn.init.normal_(m.weight,std=0.01) nn.init.constant_(m.bias,0) def get_model(mode): backbone = ResNetBackbone(cfg.resnet_type) cloth_net = ClothNet() if mode == 'train': backbone.init_weights() cloth_net.apply(init_weights) model = Model(backbone, cloth_net, mode) return model ================================================ FILE: main/test.py ================================================ import os from config import cfg import torch import argparse from tqdm import tqdm import numpy as np import torch.backends.cudnn as cudnn def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=str, default='0', dest='gpu_ids') parser.add_argument('--test_epoch', type=str, dest='test_epoch') parser.add_argument('--type', type=str) args = parser.parse_args() if not args.gpu_ids: assert 0, "Please set propoer gpu ids" if '-' in args.gpu_ids: gpus = args.gpu_ids.split('-') gpus[0] = int(gpus[0]) gpus[1] = int(gpus[1]) + 1 args.gpu_ids = ','.join(map(lambda x: str(x), list(range(*gpus)))) assert args.test_epoch, 'Test epoch is required.' return args def main(): args = parse_args() cfg.set_args(args.gpu_ids) cudnn.benchmark = True from base import Tester if args.type == 'cd': cfg.calculate_cd = True cfg.testset = ['PW3D'] elif args.type == 'bcc': cfg.calculate_bcc = True cfg.testset = ['MSCOCO'] else: assert 0, 'Test type is invalid.' tester = Tester(args.test_epoch) tester._make_batch_generator() tester._make_model() eval_result = {} cur_sample_idx = 0 for itr, (inputs, targets, meta_info) in enumerate(tqdm(tester.batch_generator)): if itr < cur_sample_idx: continue for k,v in inputs.items(): if type(v) is torch.Tensor: inputs[k] = v.cuda() for k,v in targets.items(): if type(v) is torch.Tensor: targets[k] = v.cuda() for k,v in meta_info.items(): if type(v) is torch.Tensor: meta_info[k] = v.cuda() # forward with torch.no_grad(): out = tester.model(inputs, targets, meta_info, 'test') # save output _out = {} for k,v in out.items(): if type(v) is torch.Tensor: _out[k] = v.cpu().numpy() batch_size = v.shape[0] else: _out[k] = v out = _out out = [{k: v[bid] for k,v in out.items()} for bid in range(batch_size)] # evaluate cur_eval_result = tester._evaluate(out, cur_sample_idx) for k,v in cur_eval_result.items(): if k in eval_result: eval_result[k] += v else: eval_result[k] = v cur_sample_idx += len(out) tester._print_eval_result(eval_result) if __name__ == "__main__": main() ================================================ FILE: main/train.py ================================================ import os import argparse from config import cfg import numpy as np import torch import torch.backends.cudnn as cudnn from tqdm import tqdm def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=str, default='0', dest='gpu_ids') parser.add_argument('--continue', dest='continue_train', action='store_true') args = parser.parse_args() if not args.gpu_ids: assert 0, "Please set proper gpu ids" if '-' in args.gpu_ids: gpus = args.gpu_ids.split('-') gpus[0] = int(gpus[0]) gpus[1] = int(gpus[1]) + 1 args.gpu_ids = ','.join(map(lambda x: str(x), list(range(*gpus)))) return args def main(): # argument parse and create log args = parse_args() cfg.set_args(args.gpu_ids, args.continue_train) cudnn.benchmark = True from base import Trainer trainer = Trainer() trainer._make_batch_generator() trainer._make_model() # train for epoch in range(trainer.start_epoch, cfg.end_epoch): trainer.set_lr(epoch) trainer.tot_timer.tic() trainer.read_timer.tic() for itr, (inputs, targets, meta_info) in enumerate(trainer.batch_generator): trainer.read_timer.toc() trainer.gpu_timer.tic() # forward trainer.optimizer.zero_grad() loss = trainer.model(inputs, targets, meta_info, 'train') loss = {k:loss[k].mean() for k in loss} # backward sum(loss[k] for k in loss).backward() trainer.optimizer.step() trainer.gpu_timer.toc() screen = [ 'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch), 'lr: %g' % (trainer.get_lr()), 'speed: %.2f(%.2fs r%.2f)s/itr' % ( trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time), '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch), ] screen += ['%s: %.4f' % ('loss_' + k, v.detach()) for k,v in loss.items()] trainer.logger.info(' '.join(screen)) trainer.tot_timer.toc() trainer.tot_timer.tic() trainer.read_timer.tic() trainer.save_model({ 'epoch': epoch, 'network': trainer.model.state_dict(), 'optimizer': trainer.optimizer.state_dict(), }, epoch) if __name__ == "__main__": main() ================================================ FILE: requirements.sh ================================================ pip install chumpy tqdm torchgeometry trimesh scipy smplx scikit-image opencv-python pycocotools