Repository: noahcao/Pixel2Mesh
Branch: master
Commit: 92a6f7bdbee5
Files: 94
Total size: 136.4 KB
Directory structure:
gitextract_re_dj0ft/
├── .gitignore
├── .gitmodules
├── README.md
├── config.py
├── datasets/
│ ├── base_dataset.py
│ ├── imagenet.py
│ ├── preprocess/
│ │ └── shapenet/
│ │ └── .gitignore
│ └── shapenet.py
├── entrypoint_eval.py
├── entrypoint_predict.py
├── entrypoint_train.py
├── experiments/
│ ├── backbone/
│ │ ├── vgg16.yml
│ │ ├── vgg16_1e-3.yml
│ │ └── vgg16_1e-4.yml
│ ├── baseline/
│ │ ├── chamfer_only.yml
│ │ ├── default.yml
│ │ ├── default_zthresh.yml
│ │ ├── large_laplace.yml
│ │ ├── lr_1e-3_weighted_chamfer.yml
│ │ ├── lr_1e-3_weighted_chamfer_oppo.yml
│ │ ├── lr_1e-3_zthresh.yml
│ │ ├── lr_1e-3_zthresh_resnet.yml
│ │ ├── lr_1e-4.yml
│ │ ├── lr_1e-4_dataset_all.yml
│ │ ├── lr_1e-4_dataset_tf_same_weights_step_adjusted.yml
│ │ ├── lr_1e-4_dataset_tf_same_weights_step_adjusted_more_epochs.yml
│ │ ├── lr_1e-4_k250_d256.yml
│ │ ├── lr_1e-4_plane_only.yml
│ │ ├── lr_1e-4_resnet_dataset_all.yml
│ │ ├── lr_1e-4_resnet_dataset_all_larger_sample.yml
│ │ ├── lr_1e-4_resnet_dataset_all_sample_9k.yml
│ │ ├── lr_1e-4_resnet_dataset_tf_larger_sample.yml
│ │ ├── lr_1e-4_resnet_dataset_tf_same_weights_step_adjusted.yml
│ │ ├── lr_1e-4_resnet_dataset_tf_sample_9k.yml
│ │ ├── lr_1e-4_resnet_dataset_tf_sample_9k_more_epochs.yml
│ │ ├── lr_1e-4_resnet_dataset_tf_sample_9k_more_epochs_same_weights.yml
│ │ ├── lr_1e-4_resnet_k250_d256.yml
│ │ ├── lr_1e-4_wd_1e-8.yml
│ │ ├── lr_1e-4_weighted_chamfer.yml
│ │ ├── lr_1e-4_weighted_chamfer_oppo.yml
│ │ ├── lr_1e-4_zthresh.yml
│ │ ├── lr_1e-4_zthresh_resnet.yml
│ │ ├── lr_1e-5.yml
│ │ ├── lr_1e-5_dataset_tf_same_weights_step_adjusted.yml
│ │ ├── lr_2.5e-5.yml
│ │ ├── lr_3e-5_dataset_tf_same_weights_step_adjusted.yml
│ │ ├── lr_5e-4_zthresh_resnet.yml
│ │ ├── lr_5e-5_dataset_all_more_epochs.yml
│ │ ├── normal_free.yml
│ │ ├── relu_free.yml
│ │ └── resnet.yml
│ └── default/
│ ├── resnet.yml
│ └── tensorflow.yml
├── external/
│ └── chamfer/
│ ├── chamfer.cu
│ ├── chamfer_cuda.cpp
│ ├── setup.py
│ └── test.py
├── functions/
│ ├── base.py
│ ├── evaluator.py
│ ├── predictor.py
│ ├── saver.py
│ └── trainer.py
├── logger.py
├── models/
│ ├── backbones/
│ │ ├── __init__.py
│ │ ├── resnet.py
│ │ └── vgg16.py
│ ├── classifier.py
│ ├── layers/
│ │ ├── chamfer_wrapper.py
│ │ ├── gbottleneck.py
│ │ ├── gconv.py
│ │ ├── gpooling.py
│ │ └── gprojection.py
│ ├── losses/
│ │ ├── classifier.py
│ │ └── p2m.py
│ └── p2m.py
├── options.py
├── slurm/
│ ├── eval.sh
│ ├── train.sh
│ ├── train_checkpoint.sh
│ └── train_checkpoint_1gpu.sh
├── test.py
└── utils/
├── average_meter.py
├── demo_selection/
│ └── select_demo_images.py
├── mesh.py
├── migrations/
│ ├── delete_unnecessary_keys.py
│ ├── extract_vgg_weights.py
│ ├── from_p2m_pytorch.py
│ ├── official_config_pytorch_256.txt
│ ├── official_config_tensorflow_256.txt
│ ├── official_model_converter.py
│ ├── tensorflow_to_pkl.py
│ └── validate_dataset_all.py
├── tensor.py
└── vis/
└── renderer.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
debug
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# yaml files
yamls/
# logs
logs/
# vim swap files
*.swp
# data
datasets/data
datasets/extras
datasets/examples/*.obj
summary
checkpoints
# IDEA
.idea
================================================
FILE: .gitmodules
================================================
[submodule "external/neural_renderer"]
path = external/neural_renderer
url = https://github.com/daniilidis-group/neural_renderer
================================================
FILE: README.md
================================================
# Pixel2Mesh
This is an implementation of Pixel2Mesh in PyTorch. Besides, we also:
- Provide retrained Pixel2Mesh checkpoints. Besides, the pretrained tensorflow pretrained model provided in [official implementation](https://github.com/nywang16/Pixel2Mesh) is also converted into a PyTorch checkpoint file for convenience.
- Provide a modified version of Pixel2Mesh whose backbone is ResNet instead of VGG.
- Clarify some details in previous implementation and provide a flexible training framework.
**If you have any urgent question or issue, please contact jinkuncao@gmail.com.**
## Get Started
### Environment
Current version only supports training and inference on GPU. It works well under dependencies as follows:
- Ubuntu 16.04 / 18.04
- Python 3.7
- PyTorch 1.1
- CUDA 9.0 (10.0 should also work)
- OpenCV 4.1
- Scipy 1.3
- Scikit-Image 0.15
Some minor dependencies are also needed, for which the latest version provided by conda/pip works well:
> easydict, pyyaml, tensorboardx, trimesh, shapely
Two another steps to prepare the codebase:
1. `git submodule update --init` to get [Neural Renderer](https://github.com/daniilidis-group/neural_renderer) ready.
2. `python setup.py install` in directory [external/chamfer](external/chamfer) and `external/neural_renderer` to compile the modules.
### Datasets
We use [ShapeNet](https://www.shapenet.org/) for model training and evaluation. The official tensorflow implementation provides a subset of ShapeNet for it, you can download it [here](https://drive.google.com/drive/folders/131dH36qXCabym1JjSmEpSQZg4dmZVQid). Extract it and link it to `data_tf` directory as follows. Before that, some meta files [here](https://drive.google.com/file/d/16d9druvCpsjKWsxHmsTD5HSOWiCWtDzo/view?usp=sharing) will help you establish the folder tree, demonstrated as follows.
~~*P.S. In case more data is needed, another larger data package of ShapeNet is also [available](https://drive.google.com/file/d/1Z8gt4HdPujBNFABYrthhau9VZW10WWYe/view). You can extract it and place it in the `data` directory. But this would take much time and needs about 300GB storage.*~~
P.S.S. For the larger data package, we provide a temporal access here on [OneDrive](https://1drv.ms/u/s!AtMVLfbdnqr4nGZjQ8GuPHlEUSg9?e=0dIEbK).
```
datasets/data
├── ellipsoid
│ ├── face1.obj
│ ├── face2.obj
│ ├── face3.obj
│ └── info_ellipsoid.dat
├── pretrained
│ ... (.pth files)
└── shapenet
├── data (larger data package, optional)
│ ├── 02691156
│ │ └── 3a123ae34379ea6871a70be9f12ce8b0_02.dat
│ ├── 02828884
│ └── ...
├── data_tf (standard data used in official implementation)
│ ├── 02691156 (put the folders directly in data_tf)
│ │ └── 10115655850468db78d106ce0a280f87
│ ├── 02828884
│ └── ...
└── meta
...
```
Difference between the two versions of dataset is worth some explanation:
- `data_tf` has images of 137x137 resolution and four channels (RGB + alpha), 175,132 samples for training and 43,783 for evaluation.
- `data` has RGB images of 224x224 resolution with background set all white. It contains altogether 1,050,240 for training and evaluation.
*P.S. We trained model with both datasets and evaluated on both benchmarks. To save time and align our results with the official paper/implementation, we use `data_tf` by default.*
### Usage
#### Configuration
You can modify configuration in a `yml` file for training/evaluation. It overrides dsefault settings in `options.py`. We provide some examples in the `experiments` directory.
#### Training
```
python entrypoint_train.py --name xxx --options path/to/yaml
```
*P.S. To train on slurm clusters, we also provide settings reference. Refer to [slurm](slurm) folder for details.*
#### Evaluation
```shell
python entrypoint_eval.py --name xxx --options path/to/yml --checkpoint path/to/checkpoint
```
#### Inference
You can do inference on your own images by a simple command:
```
python entrypoint_predict.py --options /path/to/yml --checkpoint /path/to/checkpoint --folder /path/to/images
```
*P.S. we only support do training/evaluation/inference with GPU by default.*
## Results
We tested performance of some models. The [official tensorflow implementation](https://github.com/nywang16/Pixel2Mesh) reports much higher performance than claimed in the [original paper](https://arxiv.org/abs/1804.01654) as follows. The results are listed as follows, which is close to that reported in [MeshRCNN](https://arxiv.org/abs/1906.02739). The original paper evaluates result on simple mean, without considerations of different categories containing different number of samples, while some later papers use weighted-mean. We report results under both two metrics for caution.
| Checkpoint |
Eval Protocol
| CD |
F1τ |
F12τ |
| Official Pretrained (tensorflow) |
Mean |
0.482 |
65.22 |
78.80 |
| Weighted-mean |
0.439 |
66.56 |
80.17 |
| Migrated Checkpoint |
Mean |
0.498 |
64.21 |
78.03 |
| Weighted-mean |
0.451 |
65.67 |
79.51 |
| ResNet |
Mean |
0.443 |
65.36 |
79.24 |
| Weighted-mean |
0.411 |
66.13 |
80.13 |
*P.S. Due to time limit, the resnet checkpoint has not been trained in detail and sufficiently.*
### Pretrained checkpoints
- **VGG backbone:** The checkpoint converted from official pretrained model (based on VGG) can be downloaded [here](https://drive.google.com/file/d/1Gk3M4KQekEenG9qQm60OFsxNar0sG8bN/view?usp=sharing). (scripts to migrate tensorflow checkpoints into `.pth` are available in `utils/migrations`. )
- **ResNet backbone:** As we provide another backbone choice of resenet, we also provide a corresponding checkpoint [here](https://drive.google.com/file/d/1pZm_IIWDUDje6gRZHW-GDhx5FCDM2Qg_/view?usp=sharing).
## Details of Improvement
We explain some improvement of this version of implementation compared with the official version here.
- **Larger batch size:** We support larger batch size on multiple GPUs for training. Since Chamfer distances cannot be calculated if samples in a batch with different ground-truth pointcloud size, "resizing" the pointcloud is necessary. Instead of resampling points, we simply upsample/downsample from the dataset.
- **Better backbone:** We enable replacing VGG by ResNet50 for model backbone. The training progress is more stable and final performance is higher.
- **More stable training:** We do normalization on the deformed sphere, so that it's deformed at location $(0,0,0)$; we use a threshold activation on $z$-axis during projection, so that $z$ will always be positive or negative and never be $0$. These seem not to result in better performance but more stable training loss.
## Demo
Generated mesh samples are provided in [datasets/examples](datasets/examples) from our ResNet model. Three mesh models in a line are deformed from a single ellipsoid mesh with different number of vertices (156 vs 268 vs 2466) as configurated in the original paper.




## Acknowledgements
Our work is based on the official version of [Pixel2Mesh](https://github.com/nywang16/Pixel2Mesh); Some part of code are borrowed from [a previous PyTorch implementation of Pixel2Mesh](https://github.com/Tong-ZHAO/Pixel2Mesh-Pytorch). The packed files for two version of datasets are also provided by them two. Most codework is done by [Yuge Zhang](https://github.com/ultmaster).
================================================
FILE: config.py
================================================
import os
# dataset root
DATASET_ROOT = "datasets/data"
SHAPENET_ROOT = os.path.join(DATASET_ROOT, "shapenet")
IMAGENET_ROOT = os.path.join(DATASET_ROOT, "imagenet")
# ellipsoid path
ELLIPSOID_PATH = os.path.join(DATASET_ROOT, "ellipsoid/info_ellipsoid.dat")
# pretrained weights path
PRETRAINED_WEIGHTS_PATH = {
"vgg16": os.path.join(DATASET_ROOT, "pretrained/vgg16-397923af.pth"),
"resnet50": os.path.join(DATASET_ROOT, "pretrained/resnet50-19c8e357.pth"),
"vgg16p2m": os.path.join(DATASET_ROOT, "pretrained/vgg16-p2m.pth"),
}
# Mean and standard deviation for normalizing input image
IMG_NORM_MEAN = [0.485, 0.456, 0.406]
IMG_NORM_STD = [0.229, 0.224, 0.225]
IMG_SIZE = 224
================================================
FILE: datasets/base_dataset.py
================================================
from torch.utils.data.dataset import Dataset
from torchvision.transforms import Normalize
import config
class BaseDataset(Dataset):
def __init__(self):
self.normalize_img = Normalize(mean=config.IMG_NORM_MEAN, std=config.IMG_NORM_STD)
================================================
FILE: datasets/imagenet.py
================================================
import os
import numpy as np
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import ImageFile, Image
ImageFile.LOAD_TRUNCATED_IMAGES = True
class ImageNet(Dataset):
def __init__(self, root_dir, split="train"):
self.image_dir = os.path.join(root_dir, split)
self.images = []
self.labels = []
with open(os.path.join(root_dir, "meta", split + ".txt"), "r") as f:
for line in f.readlines():
image, label = line.strip().split()
self.images.append(image)
self.labels.append(int(label))
self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
if split == "train":
self.transform = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
self.normalize
])
else:
self.transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
self.normalize
])
def __getitem__(self, index):
image = Image.open(os.path.join(self.image_dir, self.images[index]))
image = image.convert('RGB')
image = self.transform(image)
return {
"images": image,
"labels": self.labels[index],
"filename": self.images[index],
}
def __len__(self):
return len(self.images)
================================================
FILE: datasets/preprocess/shapenet/.gitignore
================================================
data
================================================
FILE: datasets/shapenet.py
================================================
import json
import os
import pickle
import numpy as np
import torch
from PIL import Image
from skimage import io, transform
from torch.utils.data.dataloader import default_collate
import config
from datasets.base_dataset import BaseDataset
class ShapeNet(BaseDataset):
"""
Dataset wrapping images and target meshes for ShapeNet dataset.
"""
def __init__(self, file_root, file_list_name, mesh_pos, normalization, shapenet_options):
super().__init__()
self.file_root = file_root
with open(os.path.join(self.file_root, "meta", "shapenet.json"), "r") as fp:
self.labels_map = sorted(list(json.load(fp).keys()))
self.labels_map = {k: i for i, k in enumerate(self.labels_map)}
# Read file list
with open(os.path.join(self.file_root, "meta", file_list_name + ".txt"), "r") as fp:
self.file_names = fp.read().split("\n")[:-1]
self.tensorflow = "_tf" in file_list_name # tensorflow version of data
self.normalization = normalization
self.mesh_pos = mesh_pos
self.resize_with_constant_border = shapenet_options.resize_with_constant_border
def __getitem__(self, index):
if self.tensorflow:
filename = self.file_names[index][17:]
label = filename.split("/", maxsplit=1)[0]
pkl_path = os.path.join(self.file_root, "data_tf", filename)
img_path = pkl_path[:-4] + ".png"
with open(pkl_path) as f:
data = pickle.load(open(pkl_path, 'rb'), encoding="latin1")
pts, normals = data[:, :3], data[:, 3:]
img = io.imread(img_path)
img[np.where(img[:, :, 3] == 0)] = 255
if self.resize_with_constant_border:
img = transform.resize(img, (config.IMG_SIZE, config.IMG_SIZE),
mode='constant', anti_aliasing=False) # to match behavior of old versions
else:
img = transform.resize(img, (config.IMG_SIZE, config.IMG_SIZE))
img = img[:, :, :3].astype(np.float32)
else:
label, filename = self.file_names[index].split("_", maxsplit=1)
with open(os.path.join(self.file_root, "data", label, filename), "rb") as f:
data = pickle.load(f, encoding="latin1")
img, pts, normals = data[0].astype(np.float32) / 255.0, data[1][:, :3], data[1][:, 3:]
pts -= np.array(self.mesh_pos)
assert pts.shape[0] == normals.shape[0]
length = pts.shape[0]
img = torch.from_numpy(np.transpose(img, (2, 0, 1)))
img_normalized = self.normalize_img(img) if self.normalization else img
return {
"images": img_normalized,
"images_orig": img,
"points": pts,
"normals": normals,
"labels": self.labels_map[label],
"filename": filename,
"length": length
}
def __len__(self):
return len(self.file_names)
class ShapeNetImageFolder(BaseDataset):
def __init__(self, folder, normalization, shapenet_options):
super().__init__()
self.normalization = normalization
self.resize_with_constant_border = shapenet_options.resize_with_constant_border
self.file_list = []
for fl in os.listdir(folder):
file_path = os.path.join(folder, fl)
# check image before hand
try:
if file_path.endswith(".gif"):
raise ValueError("gif's are results. Not acceptable")
Image.open(file_path)
self.file_list.append(file_path)
except (IOError, ValueError):
print("=> Ignoring %s because it's not a valid image" % file_path)
def __getitem__(self, item):
img_path = self.file_list[item]
img = io.imread(img_path)
if img.shape[2] > 3: # has alpha channel
img[np.where(img[:, :, 3] == 0)] = 255
if self.resize_with_constant_border:
img = transform.resize(img, (config.IMG_SIZE, config.IMG_SIZE),
mode='constant', anti_aliasing=False)
else:
img = transform.resize(img, (config.IMG_SIZE, config.IMG_SIZE))
img = img[:, :, :3].astype(np.float32)
img = torch.from_numpy(np.transpose(img, (2, 0, 1)))
img_normalized = self.normalize_img(img) if self.normalization else img
return {
"images": img_normalized,
"images_orig": img,
"filepath": self.file_list[item]
}
def __len__(self):
return len(self.file_list)
def get_shapenet_collate(num_points):
"""
:param num_points: This option will not be activated when batch size = 1
:return: shapenet_collate function
"""
def shapenet_collate(batch):
if len(batch) > 1:
all_equal = True
for t in batch:
if t["length"] != batch[0]["length"]:
all_equal = False
break
points_orig, normals_orig = [], []
if not all_equal:
for t in batch:
pts, normal = t["points"], t["normals"]
length = pts.shape[0]
choices = np.resize(np.random.permutation(length), num_points)
t["points"], t["normals"] = pts[choices], normal[choices]
points_orig.append(torch.from_numpy(pts))
normals_orig.append(torch.from_numpy(normal))
ret = default_collate(batch)
ret["points_orig"] = points_orig
ret["normals_orig"] = normals_orig
return ret
ret = default_collate(batch)
ret["points_orig"] = ret["points"]
ret["normals_orig"] = ret["normals"]
return ret
return shapenet_collate
================================================
FILE: entrypoint_eval.py
================================================
import argparse
import sys
from functions.evaluator import Evaluator
from options import update_options, options, reset_options
def parse_args():
parser = argparse.ArgumentParser(description='Pixel2Mesh Evaluation Entrypoint')
parser.add_argument('--options', help='experiment options file name', required=False, type=str)
args, rest = parser.parse_known_args()
if args.options is None:
print("Running without options file...", file=sys.stderr)
else:
update_options(args.options)
parser.add_argument('--batch-size', help='batch size', type=int)
parser.add_argument('--shuffle', help='shuffle samples', default=False, action='store_true')
parser.add_argument('--checkpoint', help='trained checkpoint file', type=str, required=True)
parser.add_argument('--version', help='version of task (timestamp by default)', type=str)
parser.add_argument('--name', help='subfolder name of this experiment', required=True, type=str)
parser.add_argument('--gpus', help='number of GPUs to use', type=int)
args = parser.parse_args()
return args
def main():
args = parse_args()
logger, writer = reset_options(options, args, phase='eval')
evaluator = Evaluator(options, logger, writer)
evaluator.evaluate()
if __name__ == "__main__":
main()
================================================
FILE: entrypoint_predict.py
================================================
import argparse
import sys
from functions.predictor import Predictor
from options import update_options, options, reset_options
def parse_args():
parser = argparse.ArgumentParser(description='Pixel2Mesh Prediction Entrypoint')
parser.add_argument('--options', help='experiment options file name', required=False, type=str)
args, rest = parser.parse_known_args()
if args.options is None:
print("Running without options file...", file=sys.stderr)
else:
update_options(args.options)
parser.add_argument('--batch-size', help='batch size', type=int)
parser.add_argument('--checkpoint', help='trained model file', type=str, required=True)
parser.add_argument('--name', required=True, type=str)
parser.add_argument('--folder', required=True, type=str)
options.dataset.name += '_demo'
args = parser.parse_args()
return args
def main():
args = parse_args()
logger, writer = reset_options(options, args, phase='predict')
predictor = Predictor(options, logger, writer)
predictor.predict()
if __name__ == "__main__":
main()
================================================
FILE: entrypoint_train.py
================================================
import argparse
import sys
from functions.trainer import Trainer
from options import update_options, options, reset_options
def parse_args():
parser = argparse.ArgumentParser(description='Pixel2Mesh Training Entrypoint')
parser.add_argument('--options', help='experiment options file name', required=False, type=str)
args, rest = parser.parse_known_args()
if args.options is None:
print("Running without options file...", file=sys.stderr)
else:
update_options(args.options)
# training
parser.add_argument('--batch-size', help='batch size', type=int)
parser.add_argument('--checkpoint', help='checkpoint file', type=str)
parser.add_argument('--num-epochs', help='number of epochs', type=int)
parser.add_argument('--version', help='version of task (timestamp by default)', type=str)
parser.add_argument('--name', required=True, type=str)
args = parser.parse_args()
return args
def main():
args = parse_args()
logger, writer = reset_options(options, args)
trainer = Trainer(options, logger, writer)
trainer.train()
if __name__ == "__main__":
main()
================================================
FILE: experiments/backbone/vgg16.yml
================================================
dataset:
name: imagenet
num_classes: 1000
train:
num_epochs: 80
batch_size: 32
model:
name: classifier
backbone: vgg16
optim:
name: sgd
lr: 1.0e-2
wd: 5.0e-4
lr_step:
- 20
- 40
- 60
test:
batch_size: 32
num_workers: 16
num_gpus: 8
================================================
FILE: experiments/backbone/vgg16_1e-3.yml
================================================
based_on:
- vgg16.yml
optim:
lr: 1.0e-4
================================================
FILE: experiments/backbone/vgg16_1e-4.yml
================================================
based_on:
- vgg16.yml
optim:
lr: 1.0e-3
================================================
FILE: experiments/baseline/chamfer_only.yml
================================================
based_on:
- default.yml
loss:
weights:
normal: 0.
laplace: 0.
edge: 0.
================================================
FILE: experiments/baseline/default.yml
================================================
num_gpus: 8
num_workers: 16
train:
batch_size: 24
test:
batch_size: 24
================================================
FILE: experiments/baseline/default_zthresh.yml
================================================
based_on:
- default.yml
model:
z_threshold: -0.05
================================================
FILE: experiments/baseline/large_laplace.yml
================================================
based_on:
- default.yml
loss:
weights:
laplace: 45.0
move: 3.0
================================================
FILE: experiments/baseline/lr_1e-3_weighted_chamfer.yml
================================================
based_on:
- lr_1e-4_weighted_chamfer.yml
optim:
lr: 1.0E-3
================================================
FILE: experiments/baseline/lr_1e-3_weighted_chamfer_oppo.yml
================================================
based_on:
- lr_1e-4_weighted_chamfer_oppo.yml
optim:
lr: 1.0E-3
================================================
FILE: experiments/baseline/lr_1e-3_zthresh.yml
================================================
based_on:
- default.yml
optim:
lr: 1.0E-3
model:
z_threshold: -0.05
================================================
FILE: experiments/baseline/lr_1e-3_zthresh_resnet.yml
================================================
based_on:
- lr_1e-3_zthresh.yml
model:
backbone: resnet50
train:
batch_size: 8
test:
batch_size: 8
================================================
FILE: experiments/baseline/lr_1e-4.yml
================================================
based_on:
- default.yml
optim:
lr: 1.0E-4
================================================
FILE: experiments/baseline/lr_1e-4_dataset_all.yml
================================================
based_on:
- lr_1e-4.yml
dataset:
subset_train: train_all
subset_eval: test_all
optim:
lr_factor: 0.2
lr_step:
- 25
- 45
train:
num_epochs: 60
================================================
FILE: experiments/baseline/lr_1e-4_dataset_tf_same_weights_step_adjusted.yml
================================================
based_on:
- lr_1e-4_resnet_dataset_tf_sample_9k.yml
model:
backbone: vgg16
train:
batch_size: 24
test:
batch_size: 24
loss:
weights:
chamfer_opposite: 0.55
laplace: 0.5
edge: 0.1
move: 0.033
================================================
FILE: experiments/baseline/lr_1e-4_dataset_tf_same_weights_step_adjusted_more_epochs.yml
================================================
based_on:
- lr_1e-4_dataset_tf_same_weights_step_adjusted.yml
train:
num_epochs: 110
optim:
lr_step:
- 40
- 80
- 100
================================================
FILE: experiments/baseline/lr_1e-4_k250_d256.yml
================================================
based_on:
- lr_1e-4_dataset_all.yml
model:
hidden_dim: 256
last_hidden_dim: 128
dataset:
camera_f: [250., 250.]
================================================
FILE: experiments/baseline/lr_1e-4_plane_only.yml
================================================
based_on:
- lr_1e-4.yml
train:
num_epochs: 100
optim:
lr_step:
- 60
- 90
dataset:
subset_train: train_plane
subset_eval: test_plane
================================================
FILE: experiments/baseline/lr_1e-4_resnet_dataset_all.yml
================================================
based_on:
- lr_1e-4.yml
model:
backbone: resnet50
train:
batch_size: 8
test:
batch_size: 8
dataset:
subset_train: train_all
subset_eval: test_all
================================================
FILE: experiments/baseline/lr_1e-4_resnet_dataset_all_larger_sample.yml
================================================
based_on:
- lr_1e-4.yml
model:
backbone: resnet50
train:
batch_size: 8
num_epochs: 70
test:
batch_size: 8
dataset:
subset_train: train_all
subset_eval: test_all
shapenet:
num_points: 5000
optim:
lr_factor: 0.3
lr_step:
- 25
- 45
- 60
================================================
FILE: experiments/baseline/lr_1e-4_resnet_dataset_all_sample_9k.yml
================================================
based_on:
- lr_1e-4_resnet_dataset_all_larger_sample.yml
dataset:
shapenet:
num_points: 9000
================================================
FILE: experiments/baseline/lr_1e-4_resnet_dataset_tf_larger_sample.yml
================================================
based_on:
- lr_1e-4_resnet_dataset_all_larger_sample.yml
dataset:
subset_train: train_tf
subset_eval: test_tf
================================================
FILE: experiments/baseline/lr_1e-4_resnet_dataset_tf_same_weights_step_adjusted.yml
================================================
based_on:
- lr_1e-4_resnet_dataset_tf_sample_9k.yml
loss:
weights:
chamfer_opposite: 0.55
laplace: 0.5
edge: 0.1
move: 0.033
optim:
lr_step:
- 30
- 70
- 90
train:
num_epochs: 110
================================================
FILE: experiments/baseline/lr_1e-4_resnet_dataset_tf_sample_9k.yml
================================================
based_on:
- lr_1e-4_resnet_dataset_all_sample_9k.yml
dataset:
subset_train: train_tf
subset_eval: test_tf
================================================
FILE: experiments/baseline/lr_1e-4_resnet_dataset_tf_sample_9k_more_epochs.yml
================================================
based_on:
- lr_1e-4_resnet_dataset_tf_sample_9k.yml
train:
num_epochs: 110
optim:
lr_step:
- 40
- 70
- 90
================================================
FILE: experiments/baseline/lr_1e-4_resnet_dataset_tf_sample_9k_more_epochs_same_weights.yml
================================================
based_on:
- lr_1e-4_resnet_dataset_tf_sample_9k_more_epochs.yml
loss:
weights:
chamfer_opposite: 0.55
laplace: 0.5
edge: 0.1
move: 0.033
================================================
FILE: experiments/baseline/lr_1e-4_resnet_k250_d256.yml
================================================
based_on:
- lr_1e-4_k250_d256.yml
model:
backbone: resnet50
train:
batch_size: 8
test:
batch_size: 8
================================================
FILE: experiments/baseline/lr_1e-4_wd_1e-8.yml
================================================
based_on:
- lr_1e-4.yml
optim:
wd: 1.0e-8
================================================
FILE: experiments/baseline/lr_1e-4_weighted_chamfer.yml
================================================
based_on:
- lr_1e-4.yml
loss:
weights:
chamfer: [0.05, 0.4, 2.]
chamfer_opposite: 0.55
================================================
FILE: experiments/baseline/lr_1e-4_weighted_chamfer_oppo.yml
================================================
based_on:
- lr_1e-4.yml
loss:
weights:
chamfer_opposite: 0.55
================================================
FILE: experiments/baseline/lr_1e-4_zthresh.yml
================================================
based_on:
- lr_1e-4.yml
model:
z_threshold: -0.05
================================================
FILE: experiments/baseline/lr_1e-4_zthresh_resnet.yml
================================================
based_on:
- lr_1e-4_zthresh.yml
model:
backbone: resnet50
train:
batch_size: 8
test:
batch_size: 8
================================================
FILE: experiments/baseline/lr_1e-5.yml
================================================
based_on:
- default.yml
optim:
lr: 1.0E-5
================================================
FILE: experiments/baseline/lr_1e-5_dataset_tf_same_weights_step_adjusted.yml
================================================
based_on:
- lr_1e-4_dataset_tf_same_weights_step_adjusted.yml
optim:
lr: 1.0e-5
================================================
FILE: experiments/baseline/lr_2.5e-5.yml
================================================
based_on:
- default.yml
optim:
lr: 2.5E-5
================================================
FILE: experiments/baseline/lr_3e-5_dataset_tf_same_weights_step_adjusted.yml
================================================
based_on:
- lr_1e-4_dataset_tf_same_weights_step_adjusted.yml
optim:
lr: 3.0e-5
================================================
FILE: experiments/baseline/lr_5e-4_zthresh_resnet.yml
================================================
based_on:
- lr_1e-4_zthresh_resnet.yml
optim:
lr: 5.0e-4
================================================
FILE: experiments/baseline/lr_5e-5_dataset_all_more_epochs.yml
================================================
based_on:
- lr_1e-4_dataset_all.yml
optim:
lr: 5.0e-5
lr_factor: 0.2
lr_step:
- 40
- 70
- 90
train:
num_epochs: 100
================================================
FILE: experiments/baseline/normal_free.yml
================================================
based_on:
- default.yml
loss:
weights:
normal: 0.
================================================
FILE: experiments/baseline/relu_free.yml
================================================
based_on:
- default.yml
model:
gconv_activation: false
================================================
FILE: experiments/baseline/resnet.yml
================================================
based_on:
- default.yml
model:
backbone: resnet50
train:
batch_size: 8
test:
batch_size: 8
================================================
FILE: experiments/default/resnet.yml
================================================
checkpoint: null
checkpoint_dir: checkpoints
dataset:
camera_c:
- 111.5
- 111.5
camera_f:
- 248.0
- 248.0
mesh_pos:
- 0.0
- 0.0
- -0.8
name: shapenet
normalization: true
num_classes: 13
predict:
folder: /tmp
shapenet:
num_points: 9000
resize_with_constant_border: false
subset_eval: test_tf
subset_train: train_tf
log_dir: logs
log_level: info
loss:
weights:
chamfer:
- 1.0
- 1.0
- 1.0
chamfer_opposite: 0.55
constant: 1.0
edge: 0.1
laplace: 0.5
move: 0.033
normal: 0.00016
reconst: 0.0
model:
align_with_tensorflow: false
backbone: resnet50
coord_dim: 3
gconv_activation: true
hidden_dim: 192
last_hidden_dim: 192
name: pixel2mesh
z_threshold: 0
name: p2m
num_gpus: 8
num_workers: 16
optim:
adam_beta1: 0.9
lr: 0.0001
lr_factor: 0.3
lr_step:
- 30
- 70
- 90
name: adam
sgd_momentum: 0.9
wd: 1.0e-06
pin_memory: true
summary_dir: summary
test:
batch_size: 8
dataset: []
shuffle: false
summary_steps: 50
weighted_mean: false
train:
batch_size: 8
checkpoint_steps: 10000
num_epochs: 110
shuffle: true
summary_steps: 50
test_epochs: 1
use_augmentation: true
version: null
================================================
FILE: experiments/default/tensorflow.yml
================================================
checkpoint: null
checkpoint_dir: checkpoints
dataset:
camera_c:
- 112.0
- 112.0
camera_f:
- 250.0
- 250.0
mesh_pos:
- 0.0
- 0.0
- 0.0
name: shapenet
normalization: false
num_classes: 13
predict:
folder: /tmp
shapenet:
num_points: 9000
resize_with_constant_border: true
subset_eval: test_tf
subset_train: train_tf
log_dir: logs
log_level: info
loss:
weights:
chamfer:
- 1.0
- 1.0
- 1.0
chamfer_opposite: 0.55
constant: 1.0
edge: 0.1
laplace: 0.5
move: 0.033
normal: 0.00016
reconst: 0.0
model:
align_with_tensorflow: true
backbone: vgg16
coord_dim: 3
gconv_activation: true
hidden_dim: 256
last_hidden_dim: 128
name: pixel2mesh
z_threshold: 0
name: p2m
num_gpus: 1
num_workers: 16
optim:
adam_beta1: 0.9
lr: 1.0e-06
lr_factor: 0.1
lr_step:
- 30
- 45
name: adam
sgd_momentum: 0.9
wd: 1.0e-06
pin_memory: true
summary_dir: summary
test:
batch_size: 24
dataset: []
shuffle: true
summary_steps: 5
weighted_mean: false
train:
batch_size: 1
checkpoint_steps: 10000
num_epochs: 2
shuffle: true
summary_steps: 1
test_epochs: 1
use_augmentation: true
version: null
================================================
FILE: external/chamfer/chamfer.cu
================================================
#include
#include
#include
#include
#include
__global__ void NmDistanceKernel(int b, int n, const float *xyz, int m,
const float *xyz2, float *result, int *result_i) {
const int batch = 512;
__shared__ float buf[batch * 3];
for (int i = blockIdx.x; i < b; i += gridDim.x) {
for (int k2 = 0; k2 < m; k2 += batch) {
int end_k = min(m, k2 + batch) - k2;
for (int j = threadIdx.x; j < end_k * 3; j += blockDim.x) {
buf[j] = xyz2[(i * m + k2) * 3 + j];
}
__syncthreads();
for (int j = threadIdx.x + blockIdx.y * blockDim.x; j < n; j += blockDim.x * gridDim.y) {
float x1 = xyz[(i * n + j) * 3 + 0];
float y1 = xyz[(i * n + j) * 3 + 1];
float z1 = xyz[(i * n + j) * 3 + 2];
int best_i = 0;
float best = 0;
int end_ka = end_k - (end_k & 3);
if (end_ka == batch) {
for (int k = 0; k < batch; k += 4) {
{
float x2 = buf[k * 3 + 0] - x1;
float y2 = buf[k * 3 + 1] - y1;
float z2 = buf[k * 3 + 2] - z1;
float d = x2 * x2 + y2 * y2 + z2 * z2;
if (k == 0 || d < best) {
best = d;
best_i = k + k2;
}
}
{
float x2 = buf[k * 3 + 3] - x1;
float y2 = buf[k * 3 + 4] - y1;
float z2 = buf[k * 3 + 5] - z1;
float d = x2 * x2 + y2 * y2 + z2 * z2;
if (d < best) {
best = d;
best_i = k + k2 + 1;
}
}
{
float x2 = buf[k * 3 + 6] - x1;
float y2 = buf[k * 3 + 7] - y1;
float z2 = buf[k * 3 + 8] - z1;
float d = x2 * x2 + y2 * y2 + z2 * z2;
if (d < best) {
best = d;
best_i = k + k2 + 2;
}
}
{
float x2 = buf[k * 3 + 9] - x1;
float y2 = buf[k * 3 + 10] - y1;
float z2 = buf[k * 3 + 11] - z1;
float d = x2 * x2 + y2 * y2 + z2 * z2;
if (d < best) {
best = d;
best_i = k + k2 + 3;
}
}
}
} else {
for (int k = 0; k < end_ka; k += 4) {
{
float x2 = buf[k * 3 + 0] - x1;
float y2 = buf[k * 3 + 1] - y1;
float z2 = buf[k * 3 + 2] - z1;
float d = x2 * x2 + y2 * y2 + z2 * z2;
if (k == 0 || d < best) {
best = d;
best_i = k + k2;
}
}
{
float x2 = buf[k * 3 + 3] - x1;
float y2 = buf[k * 3 + 4] - y1;
float z2 = buf[k * 3 + 5] - z1;
float d = x2 * x2 + y2 * y2 + z2 * z2;
if (d < best) {
best = d;
best_i = k + k2 + 1;
}
}
{
float x2 = buf[k * 3 + 6] - x1;
float y2 = buf[k * 3 + 7] - y1;
float z2 = buf[k * 3 + 8] - z1;
float d = x2 * x2 + y2 * y2 + z2 * z2;
if (d < best) {
best = d;
best_i = k + k2 + 2;
}
}
{
float x2 = buf[k * 3 + 9] - x1;
float y2 = buf[k * 3 + 10] - y1;
float z2 = buf[k * 3 + 11] - z1;
float d = x2 * x2 + y2 * y2 + z2 * z2;
if (d < best) {
best = d;
best_i = k + k2 + 3;
}
}
}
}
for (int k = end_ka; k < end_k; k++) {
float x2 = buf[k * 3 + 0] - x1;
float y2 = buf[k * 3 + 1] - y1;
float z2 = buf[k * 3 + 2] - z1;
float d = x2 * x2 + y2 * y2 + z2 * z2;
if (k == 0 || d < best) {
best = d;
best_i = k + k2;
}
}
if (k2 == 0 || result[(i * n + j)] > best) {
result[(i * n + j)] = best;
result_i[(i * n + j)] = best_i;
}
}
__syncthreads();
}
}
}
int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1,
at::Tensor idx2) {
const auto batch_size = xyz1.size(0);
const auto n = xyz1.size(1); //num_points point cloud A
const auto m = xyz2.size(1); //num_points point cloud B
NmDistanceKernel <<< dim3(32, 16, 1), 512 >>> (batch_size, n, xyz1.data(), m,
xyz2.data(), dist1.data(), idx1.data());
NmDistanceKernel <<< dim3(32, 16, 1), 512 >>> (batch_size, m, xyz2.data(), n,
xyz1.data(), dist2.data(), idx2.data());
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in nnd updateOutput: %s\n", cudaGetErrorString(err));
return 0;
}
return 1;
}
__global__ void NmDistanceGradKernel(int b, int n, const float *xyz1, int m, const float *xyz2, const float *grad_dist1,
const int *idx1, float *grad_xyz1, float *grad_xyz2) {
for (int i = blockIdx.x; i < b; i += gridDim.x) {
for (int j = threadIdx.x + blockIdx.y * blockDim.x; j < n; j += blockDim.x * gridDim.y) {
float x1 = xyz1[(i * n + j) * 3 + 0];
float y1 = xyz1[(i * n + j) * 3 + 1];
float z1 = xyz1[(i * n + j) * 3 + 2];
int j2 = idx1[i * n + j];
float x2 = xyz2[(i * m + j2) * 3 + 0];
float y2 = xyz2[(i * m + j2) * 3 + 1];
float z2 = xyz2[(i * m + j2) * 3 + 2];
float g = grad_dist1[i * n + j] * 2;
atomicAdd(&(grad_xyz1[(i * n + j) * 3 + 0]), g * (x1 - x2));
atomicAdd(&(grad_xyz1[(i * n + j) * 3 + 1]), g * (y1 - y2));
atomicAdd(&(grad_xyz1[(i * n + j) * 3 + 2]), g * (z1 - z2));
atomicAdd(&(grad_xyz2[(i * m + j2) * 3 + 0]), -(g * (x1 - x2)));
atomicAdd(&(grad_xyz2[(i * m + j2) * 3 + 1]), -(g * (y1 - y2)));
atomicAdd(&(grad_xyz2[(i * m + j2) * 3 + 2]), -(g * (z1 - z2)));
}
}
}
int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1,
at::Tensor gradxyz2, at::Tensor graddist1,
at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2) {
const auto batch_size = xyz1.size(0);
const auto n = xyz1.size(1); // num_points point cloud A
const auto m = xyz2.size(1); // num_points point cloud B
NmDistanceGradKernel <<< dim3(1, 16, 1), 256 >>> (batch_size, n, xyz1.data(), m,
xyz2.data(), graddist1.data(), idx1.data(),
gradxyz1.data(), gradxyz2.data());
NmDistanceGradKernel <<< dim3(1, 16, 1), 256 >>> (batch_size, m, xyz2.data(), n,
xyz1.data(), graddist2.data(), idx2.data(),
gradxyz2.data(), gradxyz1.data());
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in nnd get grad: %s\n", cudaGetErrorString(err));
return 0;
}
return 1;
}
================================================
FILE: external/chamfer/chamfer_cuda.cpp
================================================
#include
#include
int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1,
at::Tensor idx2);
int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2,
at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2);
int chamfer_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2,
at::Tensor idx1, at::Tensor idx2) {
return chamfer_cuda_forward(xyz1, xyz2, dist1, dist2, idx1, idx2);
}
int chamfer_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2,
at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2) {
return chamfer_cuda_backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &chamfer_forward, "chamfer forward (CUDA)");
m.def("backward", &chamfer_backward, "chamfer backward (CUDA)");
}
================================================
FILE: external/chamfer/setup.py
================================================
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
setup(
name='chamfer',
ext_modules=[
CUDAExtension('chamfer', [
'chamfer_cuda.cpp',
'chamfer.cu',
]),
],
cmdclass={
'build_ext': BuildExtension
})
================================================
FILE: external/chamfer/test.py
================================================
import sys
import os
for file in os.listdir("build"):
if file.startswith("lib"):
sys.path.insert(0, os.path.join("build", file))
# torch must be imported before we import chamfer
import torch
import chamfer
batch_size = 8
n, m = 30, 20
xyz1 = torch.rand((batch_size, n, 3)).cuda()
xyz2 = torch.rand((batch_size, m, 3)).cuda()
dist1 = torch.zeros(batch_size, n).cuda()
dist2 = torch.zeros(batch_size, m).cuda()
idx1 = torch.zeros((batch_size, n), dtype=torch.int).cuda()
idx2 = torch.zeros((batch_size, m), dtype=torch.int).cuda()
chamfer.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
print(dist1)
print(dist2)
print(idx1)
print(idx2)
================================================
FILE: functions/base.py
================================================
import os
import time
from datetime import timedelta
from logging import Logger
import torch
import torch.nn
from tensorboardX import SummaryWriter
from torch.utils.data.dataloader import default_collate
import config
from datasets.imagenet import ImageNet
from datasets.shapenet import ShapeNet, get_shapenet_collate, ShapeNetImageFolder
from functions.saver import CheckpointSaver
class CheckpointRunner(object):
def __init__(self, options, logger: Logger, summary_writer: SummaryWriter,
dataset=None, training=True, shared_model=None):
self.options = options
self.logger = logger
# GPUs
if not torch.cuda.is_available() and self.options.num_gpus > 0:
raise ValueError("CUDA not found yet number of GPUs is set to be greater than 0")
if os.environ.get("CUDA_VISIBLE_DEVICES"):
logger.info("CUDA visible devices is activated here, number of GPU setting is not working")
self.gpus = list(map(int, os.environ["CUDA_VISIBLE_DEVICES"].split(",")))
self.options.num_gpus = len(self.gpus)
enumerate_gpus = list(range(self.options.num_gpus))
logger.info("CUDA is asking for " + str(self.gpus) + ", PyTorch to doing a mapping, changing it to " +
str(enumerate_gpus))
self.gpus = enumerate_gpus
else:
self.gpus = list(range(self.options.num_gpus))
logger.info("Using GPUs: " + str(self.gpus))
# initialize summary writer
self.summary_writer = summary_writer
# initialize dataset
if dataset is None:
dataset = options.dataset # useful during training
self.dataset = self.load_dataset(dataset, training)
self.dataset_collate_fn = self.load_collate_fn(dataset, training)
# by default, epoch_count = step_count = 0
self.epoch_count = self.step_count = 0
self.time_start = time.time()
# override this function to define your model, optimizers etc.
# in case you want to use a model that is defined in a trainer or other place in the code,
# shared_model should help. in this case, checkpoint is not used
self.logger.info("Running model initialization...")
self.init_fn(shared_model=shared_model)
if shared_model is None:
# checkpoint is loaded if any
self.saver = CheckpointSaver(self.logger, checkpoint_dir=str(self.options.checkpoint_dir),
checkpoint_file=self.options.checkpoint)
self.init_with_checkpoint()
def load_dataset(self, dataset, training):
self.logger.info("Loading datasets: %s" % dataset.name)
if dataset.name == "shapenet":
return ShapeNet(config.SHAPENET_ROOT, dataset.subset_train if training else dataset.subset_eval,
dataset.mesh_pos, dataset.normalization, dataset.shapenet)
elif dataset.name == "shapenet_demo":
return ShapeNetImageFolder(dataset.predict.folder, dataset.normalization, dataset.shapenet)
elif dataset.name == "imagenet":
return ImageNet(config.IMAGENET_ROOT, "train" if training else "val")
raise NotImplementedError("Unsupported dataset")
def load_collate_fn(self, dataset, training):
if dataset.name == "shapenet":
return get_shapenet_collate(dataset.shapenet.num_points)
else:
return default_collate
def init_fn(self, shared_model=None, **kwargs):
raise NotImplementedError('You need to provide an _init_fn method')
# Pack models and optimizers in a dict - necessary for checkpointing
def models_dict(self):
return None
def optimizers_dict(self):
# NOTE: optimizers and models cannot have conflicting names
return None
def init_with_checkpoint(self):
checkpoint = self.saver.load_checkpoint()
if checkpoint is None:
self.logger.info("Checkpoint not loaded")
return
for model_name, model in self.models_dict().items():
if model_name in checkpoint:
if isinstance(model, torch.nn.DataParallel):
model.module.load_state_dict(checkpoint[model_name], strict=False)
else:
model.load_state_dict(checkpoint[model_name], strict=False)
if self.optimizers_dict() is not None:
for optimizer_name, optimizer in self.optimizers_dict().items():
if optimizer_name in checkpoint:
optimizer.load_state_dict(checkpoint[optimizer_name])
else:
self.logger.warning("Optimizers not found in the runner, skipping...")
if "epoch" in checkpoint:
self.epoch_count = checkpoint["epoch"]
if "total_step_count" in checkpoint:
self.step_count = checkpoint["total_step_count"]
def dump_checkpoint(self):
checkpoint = {
"epoch": self.epoch_count,
"total_step_count": self.step_count
}
for model_name, model in self.models_dict().items():
if isinstance(model, torch.nn.DataParallel):
checkpoint[model_name] = model.module.state_dict()
else:
checkpoint[model_name] = model.state_dict()
for k, v in list(checkpoint[model_name].items()):
if isinstance(v, torch.Tensor) and v.is_sparse:
checkpoint[model_name].pop(k)
if self.optimizers_dict() is not None:
for optimizer_name, optimizer in self.optimizers_dict().items():
checkpoint[optimizer_name] = optimizer.state_dict()
self.saver.save_checkpoint(checkpoint, "%06d_%06d" % (self.step_count, self.epoch_count))
@property
def time_elapsed(self):
return timedelta(seconds=time.time() - self.time_start)
================================================
FILE: functions/evaluator.py
================================================
from logging import Logger
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from functions.base import CheckpointRunner
from models.classifier import Classifier
from models.layers.chamfer_wrapper import ChamferDist
from models.p2m import P2MModel
from utils.average_meter import AverageMeter
from utils.mesh import Ellipsoid
from utils.vis.renderer import MeshRenderer
class Evaluator(CheckpointRunner):
def __init__(self, options, logger: Logger, writer, shared_model=None):
super().__init__(options, logger, writer, training=False, shared_model=shared_model)
# noinspection PyAttributeOutsideInit
def init_fn(self, shared_model=None, **kwargs):
if self.options.model.name == "pixel2mesh":
# Renderer for visualization
self.renderer = MeshRenderer(self.options.dataset.camera_f, self.options.dataset.camera_c,
self.options.dataset.mesh_pos)
# Initialize distance module
self.chamfer = ChamferDist()
# create ellipsoid
self.ellipsoid = Ellipsoid(self.options.dataset.mesh_pos)
# use weighted mean evaluation metrics or not
self.weighted_mean = self.options.test.weighted_mean
else:
self.renderer = None
self.num_classes = self.options.dataset.num_classes
if shared_model is not None:
self.model = shared_model
else:
if self.options.model.name == "pixel2mesh":
# create model
self.model = P2MModel(self.options.model, self.ellipsoid,
self.options.dataset.camera_f, self.options.dataset.camera_c,
self.options.dataset.mesh_pos)
elif self.options.model.name == "classifier":
self.model = Classifier(self.options.model, self.options.dataset.num_classes)
else:
raise NotImplementedError("Your model is not found")
self.model = torch.nn.DataParallel(self.model, device_ids=self.gpus).cuda()
# Evaluate step count, useful in summary
self.evaluate_step_count = 0
self.total_step_count = 0
def models_dict(self):
return {'model': self.model}
def evaluate_f1(self, dis_to_pred, dis_to_gt, pred_length, gt_length, thresh):
recall = np.sum(dis_to_gt < thresh) / gt_length
prec = np.sum(dis_to_pred < thresh) / pred_length
return 2 * prec * recall / (prec + recall + 1e-8)
def evaluate_chamfer_and_f1(self, pred_vertices, gt_points, labels):
# calculate accurate chamfer distance; ground truth points with different lengths;
# therefore cannot be batched
batch_size = pred_vertices.size(0)
pred_length = pred_vertices.size(1)
for i in range(batch_size):
gt_length = gt_points[i].size(0)
label = labels[i].cpu().item()
d1, d2, i1, i2 = self.chamfer(pred_vertices[i].unsqueeze(0), gt_points[i].unsqueeze(0))
d1, d2 = d1.cpu().numpy(), d2.cpu().numpy() # convert to millimeter
self.chamfer_distance[label].update(np.mean(d1) + np.mean(d2))
self.f1_tau[label].update(self.evaluate_f1(d1, d2, pred_length, gt_length, 1E-4))
self.f1_2tau[label].update(self.evaluate_f1(d1, d2, pred_length, gt_length, 2E-4))
def evaluate_accuracy(self, output, target):
"""Computes the accuracy over the k top predictions for the specified values of k"""
top_k = [1, 5]
maxk = max(top_k)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
for k in top_k:
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
acc = correct_k.mul_(1.0 / batch_size)
if k == 1:
self.acc_1.update(acc)
elif k == 5:
self.acc_5.update(acc)
def evaluate_step(self, input_batch):
self.model.eval()
# Run inference
with torch.no_grad():
# Get ground truth
images = input_batch['images']
out = self.model(images)
if self.options.model.name == "pixel2mesh":
pred_vertices = out["pred_coord"][-1]
gt_points = input_batch["points_orig"]
if isinstance(gt_points, list):
gt_points = [pts.cuda() for pts in gt_points]
self.evaluate_chamfer_and_f1(pred_vertices, gt_points, input_batch["labels"])
elif self.options.model.name == "classifier":
self.evaluate_accuracy(out, input_batch["labels"])
return out
# noinspection PyAttributeOutsideInit
def evaluate(self):
self.logger.info("Running evaluations...")
# clear evaluate_step_count, but keep total count uncleared
self.evaluate_step_count = 0
test_data_loader = DataLoader(self.dataset,
batch_size=self.options.test.batch_size * self.options.num_gpus,
num_workers=self.options.num_workers,
pin_memory=self.options.pin_memory,
shuffle=self.options.test.shuffle,
collate_fn=self.dataset_collate_fn)
if self.options.model.name == "pixel2mesh":
self.chamfer_distance = [AverageMeter() for _ in range(self.num_classes)]
self.f1_tau = [AverageMeter() for _ in range(self.num_classes)]
self.f1_2tau = [AverageMeter() for _ in range(self.num_classes)]
elif self.options.model.name == "classifier":
self.acc_1 = AverageMeter()
self.acc_5 = AverageMeter()
# Iterate over all batches in an epoch
for step, batch in enumerate(test_data_loader):
# Send input to GPU
batch = {k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in batch.items()}
# Run evaluation step
out = self.evaluate_step(batch)
# Tensorboard logging every summary_steps steps
if self.evaluate_step_count % self.options.test.summary_steps == 0:
self.evaluate_summaries(batch, out)
# add later to log at step 0
self.evaluate_step_count += 1
self.total_step_count += 1
for key, val in self.get_result_summary().items():
scalar = val
if isinstance(val, AverageMeter):
scalar = val.avg
self.logger.info("Test [%06d] %s: %.6f" % (self.total_step_count, key, scalar))
self.summary_writer.add_scalar("eval_" + key, scalar, self.total_step_count + 1)
def average_of_average_meters(self, average_meters):
s = sum([meter.sum for meter in average_meters])
c = sum([meter.count for meter in average_meters])
weighted_avg = s / c if c > 0 else 0.
avg = sum([meter.avg for meter in average_meters]) / len(average_meters)
ret = AverageMeter()
if self.weighted_mean:
ret.val, ret.avg = avg, weighted_avg
else:
ret.val, ret.avg = weighted_avg, avg
return ret
def get_result_summary(self):
if self.options.model.name == "pixel2mesh":
return {
"cd": self.average_of_average_meters(self.chamfer_distance),
"f1_tau": self.average_of_average_meters(self.f1_tau),
"f1_2tau": self.average_of_average_meters(self.f1_2tau),
}
elif self.options.model.name == "classifier":
return {
"acc_1": self.acc_1,
"acc_5": self.acc_5,
}
def evaluate_summaries(self, input_batch, out_summary):
self.logger.info("Test Step %06d/%06d (%06d) " % (self.evaluate_step_count,
len(self.dataset) // (
self.options.num_gpus * self.options.test.batch_size),
self.total_step_count,) \
+ ", ".join([key + " " + (str(val) if isinstance(val, AverageMeter) else "%.6f" % val)
for key, val in self.get_result_summary().items()]))
self.summary_writer.add_histogram("eval_labels", input_batch["labels"].cpu().numpy(),
self.total_step_count)
if self.renderer is not None:
# Do visualization for the first 2 images of the batch
render_mesh = self.renderer.p2m_batch_visualize(input_batch, out_summary, self.ellipsoid.faces)
self.summary_writer.add_image("eval_render_mesh", render_mesh, self.total_step_count)
================================================
FILE: functions/predictor.py
================================================
import os
import random
from logging import Logger
import imageio
import numpy as np
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
from functions.base import CheckpointRunner
from models.p2m import P2MModel
from utils.mesh import Ellipsoid
from utils.vis.renderer import MeshRenderer
class Predictor(CheckpointRunner):
def __init__(self, options, logger: Logger, writer, shared_model=None):
super().__init__(options, logger, writer, training=False, shared_model=shared_model)
# noinspection PyAttributeOutsideInit
def init_fn(self, shared_model=None, **kwargs):
self.gpu_inference = self.options.num_gpus > 0
if self.gpu_inference == 0:
raise NotImplementedError("CPU inference is currently buggy. This takes some extra efforts and "
"might be fixed in the future.")
# self.logger.warning("Render part would be disabled since you are using CPU. "
# "Neural renderer requires GPU to run. Please use other softwares "
# "or packages to view .obj file generated.")
if self.options.model.name == "pixel2mesh":
# create ellipsoid
self.ellipsoid = Ellipsoid(self.options.dataset.mesh_pos)
# create model
self.model = P2MModel(self.options.model, self.ellipsoid,
self.options.dataset.camera_f, self.options.dataset.camera_c,
self.options.dataset.mesh_pos)
if self.gpu_inference:
self.model.cuda()
# create renderer
self.renderer = MeshRenderer(self.options.dataset.camera_f, self.options.dataset.camera_c,
self.options.dataset.mesh_pos)
else:
raise NotImplementedError("Currently the predictor only supports pixel2mesh")
def models_dict(self):
return {'model': self.model}
def predict_step(self, input_batch):
self.model.eval()
# Run inference
with torch.no_grad():
images = input_batch['images']
out = self.model(images)
self.save_inference_results(input_batch, out)
def predict(self):
self.logger.info("Running predictions...")
predict_data_loader = DataLoader(self.dataset,
batch_size=self.options.test.batch_size,
pin_memory=self.options.pin_memory,
collate_fn=self.dataset_collate_fn)
for step, batch in enumerate(predict_data_loader):
self.logger.info("Predicting [%05d/%05d]" % (step * self.options.test.batch_size, len(self.dataset)))
if self.gpu_inference:
# Send input to GPU
batch = {k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in batch.items()}
self.predict_step(batch)
def save_inference_results(self, inputs, outputs):
if self.options.model.name == "pixel2mesh":
batch_size = inputs["images"].size(0)
for i in range(batch_size):
basename, ext = os.path.splitext(inputs["filepath"][i])
mesh_center = np.mean(outputs["pred_coord_before_deform"][0][i].cpu().numpy(), 0)
verts = [outputs["pred_coord"][k][i].cpu().numpy() for k in range(3)]
for k, vert in enumerate(verts):
meshname = basename + ".%d.obj" % (k + 1)
vert_v = np.hstack((np.full([vert.shape[0], 1], "v"), vert))
mesh = np.vstack((vert_v, self.ellipsoid.obj_fmt_faces[k]))
np.savetxt(meshname, mesh, fmt='%s', delimiter=" ")
if self.gpu_inference:
# generate gif here
color_repo = ['light_blue', 'purple', 'orange', 'light_yellow']
rot_degree = 10
rot_radius = rot_degree / 180 * np.pi
rot_matrix = np.array([
[np.cos(rot_radius), 0, -np.sin(rot_radius)],
[0., 1., 0.],
[np.sin(rot_radius), 0, np.cos(rot_radius)]
])
writer = imageio.get_writer(basename + ".gif", mode='I')
color = random.choice(color_repo)
for _ in tqdm(range(360 // rot_degree), desc="Rendering sample %d" % i):
image = inputs["images_orig"][i].cpu().numpy()
ret = image
for k, vert in enumerate(verts):
vert = rot_matrix.dot((vert - mesh_center).T).T + mesh_center
rend_result = self.renderer.visualize_reconstruction(None,
vert + \
np.array(
self.options.dataset.mesh_pos),
self.ellipsoid.faces[k],
image,
mesh_only=True,
color=color)
ret = np.concatenate((ret, rend_result), axis=2)
verts[k] = vert
ret = np.transpose(ret, (1, 2, 0))
writer.append_data((255 * ret).astype(np.uint8))
writer.close()
================================================
FILE: functions/saver.py
================================================
import os
import torch
import torch.nn
class CheckpointSaver(object):
"""Class that handles saving and loading checkpoints during training."""
def __init__(self, logger, checkpoint_dir=None, checkpoint_file=None):
self.logger = logger
if checkpoint_file is not None:
if not os.path.exists(checkpoint_file):
raise ValueError("Checkpoint file [%s] does not exist!" % checkpoint_file)
self.save_dir = os.path.dirname(os.path.abspath(checkpoint_file))
self.checkpoint_file = os.path.abspath(checkpoint_file)
return
if checkpoint_dir is None:
raise ValueError("Checkpoint directory must be not None in case file is not provided!")
self.save_dir = os.path.abspath(checkpoint_dir)
self.checkpoint_file = self.get_latest_checkpoint()
def load_checkpoint(self):
if self.checkpoint_file is None:
self.logger.info("Checkpoint file not found, skipping...")
return None
self.logger.info("Loading checkpoint file: %s" % self.checkpoint_file)
try:
return torch.load(self.checkpoint_file)
except UnicodeDecodeError:
# to be compatible with old encoding methods
return torch.load(self.checkpoint_file, encoding="bytes")
def save_checkpoint(self, obj, name):
self.checkpoint_file = os.path.join(self.save_dir, "%s.pt" % name)
self.logger.info("Dumping to checkpoint file: %s" % self.checkpoint_file)
torch.save(obj, self.checkpoint_file)
def get_latest_checkpoint(self):
# this will automatically find the checkpoint with latest modified time
checkpoint_list = []
for dirpath, dirnames, filenames in os.walk(self.save_dir):
for filename in filenames:
if filename.endswith('.pt'):
file_path = os.path.abspath(os.path.join(dirpath, filename))
modified_time = os.path.getmtime(file_path)
checkpoint_list.append((file_path, modified_time))
checkpoint_list = sorted(checkpoint_list, key=lambda x: x[1])
return None if not checkpoint_list else checkpoint_list[-1][0]
================================================
FILE: functions/trainer.py
================================================
import time
from datetime import timedelta
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from functions.base import CheckpointRunner
from functions.evaluator import Evaluator
from models.classifier import Classifier
from models.losses.classifier import CrossEntropyLoss
from models.losses.p2m import P2MLoss
from models.p2m import P2MModel
from utils.average_meter import AverageMeter
from utils.mesh import Ellipsoid
from utils.tensor import recursive_detach
from utils.vis.renderer import MeshRenderer
class Trainer(CheckpointRunner):
# noinspection PyAttributeOutsideInit
def init_fn(self, shared_model=None, **kwargs):
if self.options.model.name == "pixel2mesh":
# Visualization renderer
self.renderer = MeshRenderer(self.options.dataset.camera_f, self.options.dataset.camera_c,
self.options.dataset.mesh_pos)
# create ellipsoid
self.ellipsoid = Ellipsoid(self.options.dataset.mesh_pos)
else:
self.renderer = None
if shared_model is not None:
self.model = shared_model
else:
if self.options.model.name == "pixel2mesh":
# create model
self.model = P2MModel(self.options.model, self.ellipsoid,
self.options.dataset.camera_f, self.options.dataset.camera_c,
self.options.dataset.mesh_pos)
elif self.options.model.name == "classifier":
self.model = Classifier(self.options.model, self.options.dataset.num_classes)
else:
raise NotImplementedError("Your model is not found")
self.model = torch.nn.DataParallel(self.model, device_ids=self.gpus).cuda()
# Setup a joint optimizer for the 2 models
if self.options.optim.name == "adam":
self.optimizer = torch.optim.Adam(
params=list(self.model.parameters()),
lr=self.options.optim.lr,
betas=(self.options.optim.adam_beta1, 0.999),
weight_decay=self.options.optim.wd
)
elif self.options.optim.name == "sgd":
self.optimizer = torch.optim.SGD(
params=list(self.model.parameters()),
lr=self.options.optim.lr,
momentum=self.options.optim.sgd_momentum,
weight_decay=self.options.optim.wd
)
else:
raise NotImplementedError("Your optimizer is not found")
self.lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
self.optimizer, self.options.optim.lr_step, self.options.optim.lr_factor
)
# Create loss functions
if self.options.model.name == "pixel2mesh":
self.criterion = P2MLoss(self.options.loss, self.ellipsoid).cuda()
elif self.options.model.name == "classifier":
self.criterion = CrossEntropyLoss()
else:
raise NotImplementedError("Your loss is not found")
# Create AverageMeters for losses
self.losses = AverageMeter()
# Evaluators
self.evaluators = [Evaluator(self.options, self.logger, self.summary_writer, shared_model=self.model)]
def models_dict(self):
return {'model': self.model}
def optimizers_dict(self):
return {'optimizer': self.optimizer,
'lr_scheduler': self.lr_scheduler}
def train_step(self, input_batch):
self.model.train()
# Grab data from the batch
images = input_batch["images"]
# predict with model
out = self.model(images)
# compute loss
loss, loss_summary = self.criterion(out, input_batch)
self.losses.update(loss.detach().cpu().item())
# Do backprop
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
# Pack output arguments to be used for visualization
return recursive_detach(out), recursive_detach(loss_summary)
def train(self):
# Run training for num_epochs epochs
for epoch in range(self.epoch_count, self.options.train.num_epochs):
self.epoch_count += 1
# Create a new data loader for every epoch
train_data_loader = DataLoader(self.dataset,
batch_size=self.options.train.batch_size * self.options.num_gpus,
num_workers=self.options.num_workers,
pin_memory=self.options.pin_memory,
shuffle=self.options.train.shuffle,
collate_fn=self.dataset_collate_fn)
# Reset loss
self.losses.reset()
# Iterate over all batches in an epoch
for step, batch in enumerate(train_data_loader):
# Send input to GPU
batch = {k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in batch.items()}
# Run training step
out = self.train_step(batch)
self.step_count += 1
# Tensorboard logging every summary_steps steps
if self.step_count % self.options.train.summary_steps == 0:
self.train_summaries(batch, *out)
# Save checkpoint every checkpoint_steps steps
if self.step_count % self.options.train.checkpoint_steps == 0:
self.dump_checkpoint()
# save checkpoint after each epoch
self.dump_checkpoint()
# Run validation every test_epochs
if self.epoch_count % self.options.train.test_epochs == 0:
self.test()
# lr scheduler step
self.lr_scheduler.step()
def train_summaries(self, input_batch, out_summary, loss_summary):
if self.renderer is not None:
# Do visualization for the first 2 images of the batch
render_mesh = self.renderer.p2m_batch_visualize(input_batch, out_summary, self.ellipsoid.faces)
self.summary_writer.add_image("render_mesh", render_mesh, self.step_count)
self.summary_writer.add_histogram("length_distribution", input_batch["length"].cpu().numpy(),
self.step_count)
# Debug info for filenames
self.logger.debug(input_batch["filename"])
# Save results in Tensorboard
for k, v in loss_summary.items():
self.summary_writer.add_scalar(k, v, self.step_count)
# Save results to log
self.logger.info("Epoch %03d, Step %06d/%06d, Time elapsed %s, Loss %.9f (%.9f)" % (
self.epoch_count, self.step_count,
self.options.train.num_epochs * len(self.dataset) // (
self.options.train.batch_size * self.options.num_gpus),
self.time_elapsed, self.losses.val, self.losses.avg))
def test(self):
for evaluator in self.evaluators:
evaluator.evaluate()
================================================
FILE: logger.py
================================================
import logging
import os
def create_logger(cfg, phase='train'):
log_file = '{}_{}.log'.format(cfg.version, phase)
final_log_file = os.path.join(cfg.log_dir, log_file)
head = '%(asctime)-15s %(message)s'
logging.basicConfig(filename=str(final_log_file), format=head)
logger = logging.getLogger()
if cfg.log_level == "info":
logger.setLevel(logging.INFO)
elif cfg.log_level == "debug":
logger.setLevel(logging.DEBUG)
else:
raise NotImplementedError("Log level has to be one of info and debug")
console = logging.StreamHandler()
logging.getLogger('').addHandler(console)
return logger
================================================
FILE: models/backbones/__init__.py
================================================
from models.backbones.resnet import resnet50
from models.backbones.vgg16 import VGG16TensorflowAlign, VGG16P2M, VGG16Recons
def get_backbone(options):
if options.backbone.startswith("vgg16"):
if options.align_with_tensorflow:
nn_encoder = VGG16TensorflowAlign()
else:
nn_encoder = VGG16P2M(pretrained="pretrained" in options.backbone)
nn_decoder = VGG16Recons()
elif options.backbone == "resnet50":
nn_encoder = resnet50()
nn_decoder = None
else:
raise NotImplementedError("No implemented backbone called '%s' found" % options.backbone)
return nn_encoder, nn_decoder
================================================
FILE: models/backbones/resnet.py
================================================
import torch
from torchvision.models import ResNet
from torchvision.models.resnet import Bottleneck
import config
class P2MResNet(ResNet):
def __init__(self, *args, **kwargs):
self.output_dim = 0
super().__init__(*args, **kwargs)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
res = super()._make_layer(block, planes, blocks, stride=stride, dilate=dilate)
self.output_dim += self.inplanes
return res
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
features = []
x = self.layer1(x)
features.append(x)
x = self.layer2(x)
features.append(x)
x = self.layer3(x)
features.append(x)
x = self.layer4(x)
features.append(x)
return features
@property
def features_dim(self):
return self.output_dim
def resnet50():
model = P2MResNet(Bottleneck, [3, 4, 6, 3])
state_dict = torch.load(config.PRETRAINED_WEIGHTS_PATH["resnet50"])
model.load_state_dict(state_dict)
return model
================================================
FILE: models/backbones/vgg16.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import config
class VGG16TensorflowAlign(nn.Module):
def __init__(self, n_classes_input=3):
super(VGG16TensorflowAlign, self).__init__()
self.features_dim = 960
# this is to align with tensorflow padding (with stride)
# https://bugxch.github.io/tf%E4%B8%AD%E7%9A%84padding%E6%96%B9%E5%BC%8FSAME%E5%92%8CVALID%E6%9C%89%E4%BB%80%E4%B9%88%E5%8C%BA%E5%88%AB/
self.same_padding = nn.ZeroPad2d(1)
self.tf_padding = nn.ZeroPad2d((0, 1, 0, 1))
self.tf_padding_2 = nn.ZeroPad2d((1, 2, 1, 2))
self.conv0_1 = nn.Conv2d(n_classes_input, 16, 3, stride=1, padding=0)
self.conv0_2 = nn.Conv2d(16, 16, 3, stride=1, padding=0)
self.conv1_1 = nn.Conv2d(16, 32, 3, stride=2, padding=0) # 224 -> 112
self.conv1_2 = nn.Conv2d(32, 32, 3, stride=1, padding=0)
self.conv1_3 = nn.Conv2d(32, 32, 3, stride=1, padding=0)
self.conv2_1 = nn.Conv2d(32, 64, 3, stride=2, padding=0) # 112 -> 56
self.conv2_2 = nn.Conv2d(64, 64, 3, stride=1, padding=0)
self.conv2_3 = nn.Conv2d(64, 64, 3, stride=1, padding=0)
self.conv3_1 = nn.Conv2d(64, 128, 3, stride=2, padding=0) # 56 -> 28
self.conv3_2 = nn.Conv2d(128, 128, 3, stride=1, padding=0)
self.conv3_3 = nn.Conv2d(128, 128, 3, stride=1, padding=0)
self.conv4_1 = nn.Conv2d(128, 256, 5, stride=2, padding=0) # 28 -> 14
self.conv4_2 = nn.Conv2d(256, 256, 3, stride=1, padding=0)
self.conv4_3 = nn.Conv2d(256, 256, 3, stride=1, padding=0)
self.conv5_1 = nn.Conv2d(256, 512, 5, stride=2, padding=0) # 14 -> 7
self.conv5_2 = nn.Conv2d(512, 512, 3, stride=1, padding=0)
self.conv5_3 = nn.Conv2d(512, 512, 3, stride=1, padding=0)
self.conv5_4 = nn.Conv2d(512, 512, 3, stride=1, padding=0)
def forward(self, img):
img = F.relu(self.conv0_1(self.same_padding(img)))
img = F.relu(self.conv0_2(self.same_padding(img)))
img = F.relu(self.conv1_1(self.tf_padding(img)))
img = F.relu(self.conv1_2(self.same_padding(img)))
img = F.relu(self.conv1_3(self.same_padding(img)))
img = F.relu(self.conv2_1(self.tf_padding(img)))
img = F.relu(self.conv2_2(self.same_padding(img)))
img = F.relu(self.conv2_3(self.same_padding(img)))
img2 = img
img = F.relu(self.conv3_1(self.tf_padding(img)))
img = F.relu(self.conv3_2(self.same_padding(img)))
img = F.relu(self.conv3_3(self.same_padding(img)))
img3 = img
img = F.relu(self.conv4_1(self.tf_padding_2(img)))
img = F.relu(self.conv4_2(self.same_padding(img)))
img = F.relu(self.conv4_3(self.same_padding(img)))
img4 = img
img = F.relu(self.conv5_1(self.tf_padding_2(img)))
img = F.relu(self.conv5_2(self.same_padding(img)))
img = F.relu(self.conv5_3(self.same_padding(img)))
img = F.relu(self.conv5_4(self.same_padding(img)))
img5 = img
return [img2, img3, img4, img5]
class VGG16P2M(nn.Module):
def __init__(self, n_classes_input=3, pretrained=False):
super(VGG16P2M, self).__init__()
self.features_dim = 960
self.conv0_1 = nn.Conv2d(n_classes_input, 16, 3, stride=1, padding=1)
self.conv0_2 = nn.Conv2d(16, 16, 3, stride=1, padding=1)
self.conv1_1 = nn.Conv2d(16, 32, 3, stride=2, padding=1) # 224 -> 112
self.conv1_2 = nn.Conv2d(32, 32, 3, stride=1, padding=1)
self.conv1_3 = nn.Conv2d(32, 32, 3, stride=1, padding=1)
self.conv2_1 = nn.Conv2d(32, 64, 3, stride=2, padding=1) # 112 -> 56
self.conv2_2 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
self.conv2_3 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
self.conv3_1 = nn.Conv2d(64, 128, 3, stride=2, padding=1) # 56 -> 28
self.conv3_2 = nn.Conv2d(128, 128, 3, stride=1, padding=1)
self.conv3_3 = nn.Conv2d(128, 128, 3, stride=1, padding=1)
self.conv4_1 = nn.Conv2d(128, 256, 5, stride=2, padding=2) # 28 -> 14
self.conv4_2 = nn.Conv2d(256, 256, 3, stride=1, padding=1)
self.conv4_3 = nn.Conv2d(256, 256, 3, stride=1, padding=1)
self.conv5_1 = nn.Conv2d(256, 512, 5, stride=2, padding=2) # 14 -> 7
self.conv5_2 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
self.conv5_3 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
self.conv5_4 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
if "vgg16p2m" in config.PRETRAINED_WEIGHTS_PATH and pretrained:
state_dict = torch.load(config.PRETRAINED_WEIGHTS_PATH["vgg16p2m"])
self.load_state_dict(state_dict)
else:
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def forward(self, img):
img = F.relu(self.conv0_1(img))
img = F.relu(self.conv0_2(img))
# img0 = torch.squeeze(img) # 224
img = F.relu(self.conv1_1(img))
img = F.relu(self.conv1_2(img))
img = F.relu(self.conv1_3(img))
# img1 = torch.squeeze(img) # 112
img = F.relu(self.conv2_1(img))
img = F.relu(self.conv2_2(img))
img = F.relu(self.conv2_3(img))
img2 = img
img = F.relu(self.conv3_1(img))
img = F.relu(self.conv3_2(img))
img = F.relu(self.conv3_3(img))
img3 = img
img = F.relu(self.conv4_1(img))
img = F.relu(self.conv4_2(img))
img = F.relu(self.conv4_3(img))
img4 = img
img = F.relu(self.conv5_1(img))
img = F.relu(self.conv5_2(img))
img = F.relu(self.conv5_3(img))
img = F.relu(self.conv5_4(img))
img5 = img
return [img2, img3, img4, img5]
class VGG16Recons(nn.Module):
def __init__(self, input_dim=512, image_channel=3):
super(VGG16Recons, self).__init__()
self.conv_1 = nn.ConvTranspose2d(input_dim, 256, kernel_size=2, stride=2, padding=0) # 7 -> 14
self.conv_2 = nn.ConvTranspose2d(512, 128, kernel_size=4, stride=2, padding=1) # 14 -> 28
self.conv_3 = nn.ConvTranspose2d(256, 64, kernel_size=4, stride=2, padding=1) # 28 -> 56
self.conv_4 = nn.ConvTranspose2d(128, 32, kernel_size=6, stride=2, padding=2) # 56 -> 112
self.conv_5 = nn.ConvTranspose2d(32, image_channel, kernel_size=6, stride=2, padding=2) # 112 -> 224
def forward(self, img_feats):
x = F.relu(self.conv_1(img_feats[-1]))
x = torch.cat((x, img_feats[-2]), dim=1)
x = F.relu(self.conv_2(x))
x = torch.cat((x, img_feats[-3]), dim=1)
x = F.relu(self.conv_3(x))
x = torch.cat((x, img_feats[-4]), dim=1)
x = F.relu(self.conv_4(x))
x = F.relu(self.conv_5(x))
return torch.sigmoid(x)
================================================
FILE: models/classifier.py
================================================
import torch.nn as nn
from models.backbones import get_backbone
class Classifier(nn.Module):
def __init__(self, options, num_classes):
super(Classifier, self).__init__()
self.nn_encoder, self.nn_decoder = get_backbone(options)
if "vgg" in options.backbone:
self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
self.classifier = nn.Sequential(
nn.Linear(list(self.nn_encoder.children())[-1].out_channels * 7 * 7, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, num_classes),
)
elif "resnet" in options.backbone:
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.classifier = nn.Linear(self.nn_encoder.inplanes, num_classes)
else:
raise NotImplementedError
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def forward(self, img):
x = self.nn_encoder(img)[-1] # last layer
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
================================================
FILE: models/layers/chamfer_wrapper.py
================================================
import chamfer
import torch
import torch.nn as nn
from torch.autograd import Function
# Chamfer's distance module @thibaultgroueix
# GPU tensors only
class ChamferFunction(Function):
@staticmethod
def forward(ctx, xyz1, xyz2):
batchsize, n, _ = xyz1.size()
_, m, _ = xyz2.size()
dist1 = torch.zeros(batchsize, n)
dist2 = torch.zeros(batchsize, m)
idx1 = torch.zeros(batchsize, n).type(torch.IntTensor)
idx2 = torch.zeros(batchsize, m).type(torch.IntTensor)
dist1 = dist1.cuda()
dist2 = dist2.cuda()
idx1 = idx1.cuda()
idx2 = idx2.cuda()
chamfer.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
ctx.save_for_backward(xyz1, xyz2, idx1, idx2)
return dist1, dist2, idx1, idx2
@staticmethod
def backward(ctx, graddist1, graddist2, _idx1, _idx2):
xyz1, xyz2, idx1, idx2 = ctx.saved_tensors
graddist1 = graddist1.contiguous()
graddist2 = graddist2.contiguous()
gradxyz1 = torch.zeros(xyz1.size())
gradxyz2 = torch.zeros(xyz2.size())
gradxyz1 = gradxyz1.cuda()
gradxyz2 = gradxyz2.cuda()
chamfer.backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2)
return gradxyz1, gradxyz2
class ChamferDist(nn.Module):
def __init__(self):
super(ChamferDist, self).__init__()
def forward(self, input1, input2):
return ChamferFunction.apply(input1, input2)
================================================
FILE: models/layers/gbottleneck.py
================================================
import torch.nn as nn
import torch.nn.functional as F
from models.layers.gconv import GConv
class GResBlock(nn.Module):
def __init__(self, in_dim, hidden_dim, adj_mat, activation=None):
super(GResBlock, self).__init__()
self.conv1 = GConv(in_features=in_dim, out_features=hidden_dim, adj_mat=adj_mat)
self.conv2 = GConv(in_features=hidden_dim, out_features=in_dim, adj_mat=adj_mat)
self.activation = F.relu if activation else None
def forward(self, inputs):
x = self.conv1(inputs)
if self.activation:
x = self.activation(x)
x = self.conv2(x)
if self.activation:
x = self.activation(x)
return (inputs + x) * 0.5
class GBottleneck(nn.Module):
def __init__(self, block_num, in_dim, hidden_dim, out_dim, adj_mat, activation=None):
super(GBottleneck, self).__init__()
resblock_layers = [GResBlock(in_dim=hidden_dim, hidden_dim=hidden_dim, adj_mat=adj_mat, activation=activation)
for _ in range(block_num)]
self.blocks = nn.Sequential(*resblock_layers)
self.conv1 = GConv(in_features=in_dim, out_features=hidden_dim, adj_mat=adj_mat)
self.conv2 = GConv(in_features=hidden_dim, out_features=out_dim, adj_mat=adj_mat)
self.activation = F.relu if activation else None
def forward(self, inputs):
x = self.conv1(inputs)
if self.activation:
x = self.activation(x)
x_hidden = self.blocks(x)
x_out = self.conv2(x_hidden)
return x_out, x_hidden
================================================
FILE: models/layers/gconv.py
================================================
import math
import torch
import torch.nn as nn
from utils.tensor import dot
class GConv(nn.Module):
"""Simple GCN layer
Similar to https://arxiv.org/abs/1609.02907
"""
def __init__(self, in_features, out_features, adj_mat, bias=True):
super(GConv, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.adj_mat = nn.Parameter(adj_mat, requires_grad=False)
self.weight = nn.Parameter(torch.zeros((in_features, out_features), dtype=torch.float))
# Following https://github.com/Tong-ZHAO/Pixel2Mesh-Pytorch/blob/a0ae88c4a42eef6f8f253417b97df978db842708/model/gcn_layers.py#L45
# This seems to be different from the original implementation of P2M
self.loop_weight = nn.Parameter(torch.zeros((in_features, out_features), dtype=torch.float))
if bias:
self.bias = nn.Parameter(torch.zeros((out_features,), dtype=torch.float))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
nn.init.xavier_uniform_(self.weight.data)
nn.init.xavier_uniform_(self.loop_weight.data)
def forward(self, inputs):
support = torch.matmul(inputs, self.weight)
support_loop = torch.matmul(inputs, self.loop_weight)
output = dot(self.adj_mat, support, True) + support_loop
if self.bias is not None:
ret = output + self.bias
else:
ret = output
return ret
def __repr__(self):
return self.__class__.__name__ + ' (' \
+ str(self.in_features) + ' -> ' \
+ str(self.out_features) + ')'
================================================
FILE: models/layers/gpooling.py
================================================
import torch
import torch.nn as nn
import numpy as np
class GUnpooling(nn.Module):
"""Graph Pooling layer, aims to add additional vertices to the graph.
The middle point of each edges are added, and its feature is simply
the average of the two edge vertices.
Three middle points are connected in each triangle.
"""
def __init__(self, unpool_idx):
super(GUnpooling, self).__init__()
self.unpool_idx = unpool_idx
# save dim info
self.in_num = torch.max(unpool_idx).item()
self.out_num = self.in_num + len(unpool_idx)
def forward(self, inputs):
new_features = inputs[:, self.unpool_idx].clone()
new_vertices = 0.5 * new_features.sum(2)
output = torch.cat([inputs, new_vertices], 1)
return output
def __repr__(self):
return self.__class__.__name__ + ' (' \
+ str(self.in_num) + ' -> ' \
+ str(self.out_num) + ')'
================================================
FILE: models/layers/gprojection.py
================================================
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Threshold
class GProjection(nn.Module):
"""
Graph Projection layer, which pool 2D features to mesh
The layer projects a vertex of the mesh to the 2D image and use
bi-linear interpolation to get the corresponding feature.
"""
def __init__(self, mesh_pos, camera_f, camera_c, bound=0, tensorflow_compatible=False):
super(GProjection, self).__init__()
self.mesh_pos, self.camera_f, self.camera_c = mesh_pos, camera_f, camera_c
self.threshold = None
self.bound = 0
self.tensorflow_compatible = tensorflow_compatible
if self.bound != 0:
self.threshold = Threshold(bound, bound)
def bound_val(self, x):
"""
given x, return min(threshold, x), in case threshold is not None
"""
if self.bound < 0:
return -self.threshold(-x)
elif self.bound > 0:
return self.threshold(x)
return x
@staticmethod
def image_feature_shape(img):
return np.array([img.size(-1), img.size(-2)])
def project_tensorflow(self, x, y, img_size, img_feat):
x = torch.clamp(x, min=0, max=img_size[1] - 1)
y = torch.clamp(y, min=0, max=img_size[0] - 1)
# it's tedious and contains bugs...
# when x1 = x2, the area is 0, therefore it won't be processed
# keep it here to align with tensorflow version
x1, x2 = torch.floor(x).long(), torch.ceil(x).long()
y1, y2 = torch.floor(y).long(), torch.ceil(y).long()
Q11 = img_feat[:, x1, y1].clone()
Q12 = img_feat[:, x1, y2].clone()
Q21 = img_feat[:, x2, y1].clone()
Q22 = img_feat[:, x2, y2].clone()
weights = torch.mul(x2.float() - x, y2.float() - y)
Q11 = torch.mul(weights.unsqueeze(-1), torch.transpose(Q11, 0, 1))
weights = torch.mul(x2.float() - x, y - y1.float())
Q12 = torch.mul(weights.unsqueeze(-1), torch.transpose(Q12, 0, 1))
weights = torch.mul(x - x1.float(), y2.float() - y)
Q21 = torch.mul(weights.unsqueeze(-1), torch.transpose(Q21, 0, 1))
weights = torch.mul(x - x1.float(), y - y1.float())
Q22 = torch.mul(weights.unsqueeze(-1), torch.transpose(Q22, 0, 1))
output = Q11 + Q21 + Q12 + Q22
return output
def forward(self, resolution, img_features, inputs):
half_resolution = (resolution - 1) / 2
camera_c_offset = np.array(self.camera_c) - half_resolution
# map to [-1, 1]
# not sure why they render to negative x
positions = inputs + torch.tensor(self.mesh_pos, device=inputs.device, dtype=torch.float)
w = -self.camera_f[0] * (positions[:, :, 0] / self.bound_val(positions[:, :, 2])) + camera_c_offset[0]
h = self.camera_f[1] * (positions[:, :, 1] / self.bound_val(positions[:, :, 2])) + camera_c_offset[1]
if self.tensorflow_compatible:
# to align with tensorflow
# this is incorrect, I believe
w += half_resolution[0]
h += half_resolution[1]
else:
# directly do clamping
w /= half_resolution[0]
h /= half_resolution[1]
# clamp to [-1, 1]
w = torch.clamp(w, min=-1, max=1)
h = torch.clamp(h, min=-1, max=1)
feats = [inputs]
for img_feature in img_features:
feats.append(self.project(resolution, img_feature, torch.stack([w, h], dim=-1)))
output = torch.cat(feats, 2)
return output
def project(self, img_shape, img_feat, sample_points):
"""
:param img_shape: raw image shape
:param img_feat: [batch_size x channel x h x w]
:param sample_points: [batch_size x num_points x 2], in range [-1, 1]
:return: [batch_size x num_points x feat_dim]
"""
if self.tensorflow_compatible:
feature_shape = self.image_feature_shape(img_feat)
points_w = sample_points[:, :, 0] / (img_shape[0] / feature_shape[0])
points_h = sample_points[:, :, 1] / (img_shape[1] / feature_shape[1])
output = torch.stack([self.project_tensorflow(points_h[i], points_w[i],
feature_shape, img_feat[i]) for i in range(img_feat.size(0))], 0)
else:
output = F.grid_sample(img_feat, sample_points.unsqueeze(1))
output = torch.transpose(output.squeeze(2), 1, 2)
return output
================================================
FILE: models/losses/classifier.py
================================================
import torch
import torch.nn as nn
class CrossEntropyLoss(nn.Module):
def __init__(self):
super().__init__()
self.cross_entropy = nn.CrossEntropyLoss().cuda()
def forward(self, outputs, targets):
labels = targets["labels"]
loss = self.cross_entropy(outputs, labels)
_, predicted = torch.max(outputs.data, 1)
total = labels.size(0)
correct = (predicted == labels).sum().item()
return loss, {"loss": loss, "acc": correct / total}
================================================
FILE: models/losses/p2m.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from models.layers.chamfer_wrapper import ChamferDist
class P2MLoss(nn.Module):
def __init__(self, options, ellipsoid):
super().__init__()
self.options = options
self.l1_loss = nn.L1Loss(reduction='mean')
self.l2_loss = nn.MSELoss(reduction='mean')
self.chamfer_dist = ChamferDist()
self.laplace_idx = nn.ParameterList([
nn.Parameter(idx, requires_grad=False) for idx in ellipsoid.laplace_idx])
self.edges = nn.ParameterList([
nn.Parameter(edges, requires_grad=False) for edges in ellipsoid.edges])
def edge_regularization(self, pred, edges):
"""
:param pred: batch_size * num_points * 3
:param edges: num_edges * 2
:return:
"""
return self.l2_loss(pred[:, edges[:, 0]], pred[:, edges[:, 1]]) * pred.size(-1)
@staticmethod
def laplace_coord(inputs, lap_idx):
"""
:param inputs: nodes Tensor, size (n_pts, n_features = 3)
:param lap_idx: laplace index matrix Tensor, size (n_pts, 10)
for each vertex, the laplace vector shows: [neighbor_index * 8, self_index, neighbor_count]
:returns
The laplacian coordinates of input with respect to edges as in lap_idx
"""
indices = lap_idx[:, :-2]
invalid_mask = indices < 0
all_valid_indices = indices.clone()
all_valid_indices[invalid_mask] = 0 # do this to avoid negative indices
vertices = inputs[:, all_valid_indices]
vertices[:, invalid_mask] = 0
neighbor_sum = torch.sum(vertices, 2)
neighbor_count = lap_idx[:, -1].float()
laplace = inputs - neighbor_sum / neighbor_count[None, :, None]
return laplace
def laplace_regularization(self, input1, input2, block_idx):
"""
:param input1: vertices tensor before deformation
:param input2: vertices after the deformation
:param block_idx: idx to select laplace index matrix tensor
:return:
if different than 1 then adds a move loss as in the original TF code
"""
lap1 = self.laplace_coord(input1, self.laplace_idx[block_idx])
lap2 = self.laplace_coord(input2, self.laplace_idx[block_idx])
laplace_loss = self.l2_loss(lap1, lap2) * lap1.size(-1)
move_loss = self.l2_loss(input1, input2) * input1.size(-1) if block_idx > 0 else 0
return laplace_loss, move_loss
def normal_loss(self, gt_normal, indices, pred_points, adj_list):
edges = F.normalize(pred_points[:, adj_list[:, 0]] - pred_points[:, adj_list[:, 1]], dim=2)
nearest_normals = torch.stack([t[i] for t, i in zip(gt_normal, indices.long())])
normals = F.normalize(nearest_normals[:, adj_list[:, 0]], dim=2)
cosine = torch.abs(torch.sum(edges * normals, 2))
return torch.mean(cosine)
def image_loss(self, gt_img, pred_img):
rect_loss = F.binary_cross_entropy(pred_img, gt_img)
return rect_loss
def forward(self, outputs, targets):
"""
:param outputs: outputs from P2MModel
:param targets: targets from input
:return: loss, loss_summary (dict)
"""
chamfer_loss, edge_loss, normal_loss, lap_loss, move_loss = 0., 0., 0., 0., 0.
lap_const = [0.2, 1., 1.]
gt_coord, gt_normal, gt_images = targets["points"], targets["normals"], targets["images"]
pred_coord, pred_coord_before_deform = outputs["pred_coord"], outputs["pred_coord_before_deform"]
image_loss = 0.
if outputs["reconst"] is not None and self.options.weights.reconst != 0:
image_loss = self.image_loss(gt_images, outputs["reconst"])
for i in range(3):
dist1, dist2, idx1, idx2 = self.chamfer_dist(gt_coord, pred_coord[i])
chamfer_loss += self.options.weights.chamfer[i] * (torch.mean(dist1) +
self.options.weights.chamfer_opposite * torch.mean(dist2))
normal_loss += self.normal_loss(gt_normal, idx2, pred_coord[i], self.edges[i])
edge_loss += self.edge_regularization(pred_coord[i], self.edges[i])
lap, move = self.laplace_regularization(pred_coord_before_deform[i],
pred_coord[i], i)
lap_loss += lap_const[i] * lap
move_loss += lap_const[i] * move
loss = chamfer_loss + image_loss * self.options.weights.reconst + \
self.options.weights.laplace * lap_loss + \
self.options.weights.move * move_loss + \
self.options.weights.edge * edge_loss + \
self.options.weights.normal * normal_loss
loss = loss * self.options.weights.constant
return loss, {
"loss": loss,
"loss_chamfer": chamfer_loss,
"loss_edge": edge_loss,
"loss_laplace": lap_loss,
"loss_move": move_loss,
"loss_normal": normal_loss,
}
================================================
FILE: models/p2m.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from models.backbones import get_backbone
from models.layers.gbottleneck import GBottleneck
from models.layers.gconv import GConv
from models.layers.gpooling import GUnpooling
from models.layers.gprojection import GProjection
class P2MModel(nn.Module):
def __init__(self, options, ellipsoid, camera_f, camera_c, mesh_pos):
super(P2MModel, self).__init__()
self.hidden_dim = options.hidden_dim
self.coord_dim = options.coord_dim
self.last_hidden_dim = options.last_hidden_dim
self.init_pts = nn.Parameter(ellipsoid.coord, requires_grad=False)
self.gconv_activation = options.gconv_activation
self.nn_encoder, self.nn_decoder = get_backbone(options)
self.features_dim = self.nn_encoder.features_dim + self.coord_dim
self.gcns = nn.ModuleList([
GBottleneck(6, self.features_dim, self.hidden_dim, self.coord_dim,
ellipsoid.adj_mat[0], activation=self.gconv_activation),
GBottleneck(6, self.features_dim + self.hidden_dim, self.hidden_dim, self.coord_dim,
ellipsoid.adj_mat[1], activation=self.gconv_activation),
GBottleneck(6, self.features_dim + self.hidden_dim, self.hidden_dim, self.last_hidden_dim,
ellipsoid.adj_mat[2], activation=self.gconv_activation)
])
self.unpooling = nn.ModuleList([
GUnpooling(ellipsoid.unpool_idx[0]),
GUnpooling(ellipsoid.unpool_idx[1])
])
# if options.align_with_tensorflow:
# self.projection = GProjection
# else:
# self.projection = GProjection
self.projection = GProjection(mesh_pos, camera_f, camera_c, bound=options.z_threshold,
tensorflow_compatible=options.align_with_tensorflow)
self.gconv = GConv(in_features=self.last_hidden_dim, out_features=self.coord_dim,
adj_mat=ellipsoid.adj_mat[2])
def forward(self, img):
batch_size = img.size(0)
img_feats = self.nn_encoder(img)
img_shape = self.projection.image_feature_shape(img)
init_pts = self.init_pts.data.unsqueeze(0).expand(batch_size, -1, -1)
# GCN Block 1
x = self.projection(img_shape, img_feats, init_pts)
x1, x_hidden = self.gcns[0](x)
# before deformation 2
x1_up = self.unpooling[0](x1)
# GCN Block 2
x = self.projection(img_shape, img_feats, x1)
x = self.unpooling[0](torch.cat([x, x_hidden], 2))
# after deformation 2
x2, x_hidden = self.gcns[1](x)
# before deformation 3
x2_up = self.unpooling[1](x2)
# GCN Block 3
x = self.projection(img_shape, img_feats, x2)
x = self.unpooling[1](torch.cat([x, x_hidden], 2))
x3, _ = self.gcns[2](x)
if self.gconv_activation:
x3 = F.relu(x3)
# after deformation 3
x3 = self.gconv(x3)
if self.nn_decoder is not None:
reconst = self.nn_decoder(img_feats)
else:
reconst = None
return {
"pred_coord": [x1, x2, x3],
"pred_coord_before_deform": [init_pts, x1_up, x2_up],
"reconst": reconst
}
================================================
FILE: options.py
================================================
import os
import pprint
from argparse import ArgumentParser
from datetime import datetime
import numpy as np
import yaml
from easydict import EasyDict as edict
from tensorboardX import SummaryWriter
from logger import create_logger
options = edict()
options.name = 'p2m'
options.version = None
options.num_workers = 1
options.num_gpus = 1
options.pin_memory = True
options.log_dir = "logs"
options.log_level = "info"
options.summary_dir = "summary"
options.checkpoint_dir = "checkpoints"
options.checkpoint = None
options.dataset = edict()
options.dataset.name = "shapenet"
options.dataset.subset_train = "train_small"
options.dataset.subset_eval = "test_small"
options.dataset.camera_f = [248., 248.]
options.dataset.camera_c = [111.5, 111.5]
options.dataset.mesh_pos = [0., 0., -0.8]
options.dataset.normalization = True
options.dataset.num_classes = 13
options.dataset.shapenet = edict()
options.dataset.shapenet.num_points = 3000
options.dataset.shapenet.resize_with_constant_border = False
options.dataset.predict = edict()
options.dataset.predict.folder = "/tmp"
options.model = edict()
options.model.name = "pixel2mesh"
options.model.hidden_dim = 192
options.model.last_hidden_dim = 192
options.model.coord_dim = 3
options.model.backbone = "vgg16"
options.model.gconv_activation = True
# provide a boundary for z, so that z will never be equal to 0, on denominator
# if z is greater than 0, it will never be less than z;
# if z is less than 0, it will never be greater than z.
options.model.z_threshold = 0
# align with original tensorflow model
# please follow experiments/tensorflow.yml
options.model.align_with_tensorflow = False
options.loss = edict()
options.loss.weights = edict()
options.loss.weights.normal = 1.6e-4
options.loss.weights.edge = 0.3
options.loss.weights.laplace = 0.5
options.loss.weights.move = 0.1
options.loss.weights.constant = 1.
options.loss.weights.chamfer = [1., 1., 1.]
options.loss.weights.chamfer_opposite = 1.
options.loss.weights.reconst = 0.
options.train = edict()
options.train.num_epochs = 50
options.train.batch_size = 4
options.train.summary_steps = 50
options.train.checkpoint_steps = 10000
options.train.test_epochs = 1
options.train.use_augmentation = True
options.train.shuffle = True
options.test = edict()
options.test.dataset = []
options.test.summary_steps = 50
options.test.batch_size = 4
options.test.shuffle = False
options.test.weighted_mean = False
options.optim = edict()
options.optim.name = "adam"
options.optim.adam_beta1 = 0.9
options.optim.sgd_momentum = 0.9
options.optim.lr = 5.0E-5
options.optim.wd = 1.0E-6
options.optim.lr_step = [30, 45]
options.optim.lr_factor = 0.1
def _update_dict(full_key, val, d):
for vk, vv in val.items():
if vk not in d:
raise ValueError("{}.{} does not exist in options".format(full_key, vk))
if isinstance(vv, list):
d[vk] = np.array(vv)
elif isinstance(vv, dict):
_update_dict(full_key + "." + vk, vv, d[vk])
else:
d[vk] = vv
def _update_options(options_file):
# do scan twice
# in the first round, MODEL.NAME is located so that we can initialize MODEL.EXTRA
# in the second round, we update everything
with open(options_file) as f:
options_dict = yaml.safe_load(f)
# do a dfs on `BASED_ON` options files
if "based_on" in options_dict:
for base_options in options_dict["based_on"]:
_update_options(os.path.join(os.path.dirname(options_file), base_options))
options_dict.pop("based_on")
_update_dict("", options_dict, options)
def update_options(options_file):
_update_options(options_file)
def gen_options(options_file):
def to_dict(ed):
ret = dict(ed)
for k, v in ret.items():
if isinstance(v, edict):
ret[k] = to_dict(v)
elif isinstance(v, np.ndarray):
ret[k] = v.tolist()
return ret
cfg = to_dict(options)
with open(options_file, 'w') as f:
yaml.safe_dump(dict(cfg), f, default_flow_style=False)
def slugify(filename):
filename = os.path.relpath(filename, ".")
if filename.startswith("experiments/"):
filename = filename[len("experiments/"):]
return os.path.splitext(filename)[0].lower().replace("/", "_").replace(".", "_")
def reset_options(options, args, phase='train'):
if hasattr(args, "batch_size") and args.batch_size:
options.train.batch_size = options.test.batch_size = args.batch_size
if hasattr(args, "version") and args.version:
options.version = args.version
if hasattr(args, "num_epochs") and args.num_epochs:
options.train.num_epochs = args.num_epochs
if hasattr(args, "checkpoint") and args.checkpoint:
options.checkpoint = args.checkpoint
if hasattr(args, "folder") and args.folder:
options.dataset.predict.folder = args.folder
if hasattr(args, "gpus") and args.gpus:
options.num_gpus = args.gpus
if hasattr(args, "shuffle") and args.shuffle:
options.train.shuffle = options.test.shuffle = True
options.name = args.name
if options.version is None:
prefix = ""
if args.options:
prefix = slugify(args.options) + "_"
options.version = prefix + datetime.now().strftime('%m%d%H%M%S') # ignore %Y
options.log_dir = os.path.join(options.log_dir, options.name)
print('=> creating {}'.format(options.log_dir))
os.makedirs(options.log_dir, exist_ok=True)
options.checkpoint_dir = os.path.join(options.checkpoint_dir, options.name, options.version)
print('=> creating {}'.format(options.checkpoint_dir))
os.makedirs(options.checkpoint_dir, exist_ok=True)
options.summary_dir = os.path.join(options.summary_dir, options.name, options.version)
print('=> creating {}'.format(options.summary_dir))
os.makedirs(options.summary_dir, exist_ok=True)
logger = create_logger(options, phase=phase)
options_text = pprint.pformat(vars(options))
logger.info(options_text)
print('=> creating summary writer')
writer = SummaryWriter(options.summary_dir)
return logger, writer
if __name__ == "__main__":
parser = ArgumentParser("Read options and freeze")
parser.add_argument("--input", type=str, required=True)
parser.add_argument("--output", type=str, required=True)
args = parser.parse_args()
update_options(args.input)
gen_options(args.output)
================================================
FILE: slurm/eval.sh
================================================
#!/usr/bin/env bash
set -x
if [[ $# -lt 4 ]] ; then
echo 'too few arguments supplied'
exit 1
fi
PARTITION=$1
NAME=$2
OPTIONS=$3
CHECKPOINT=$4
srun -p ${PARTITION} \
--job-name=MeshEval \
--gres=gpu:8 \
--ntasks=1 \
--kill-on-bad-exit=1 \
python entrypoint_eval.py --name ${NAME} --options ${OPTIONS} --checkpoint ${CHECKPOINT} &
================================================
FILE: slurm/train.sh
================================================
#!/usr/bin/env bash
set -x
if [[ $# -lt 3 ]] ; then
echo 'too few arguments supplied'
exit 1
fi
PARTITION=$1
NAME=$2
OPTIONS=$3
srun -p ${PARTITION} \
--job-name=Mesh \
--gres=gpu:8 \
--ntasks=1 \
--kill-on-bad-exit=1 \
python entrypoint_train.py --name ${NAME} --options ${OPTIONS} &
================================================
FILE: slurm/train_checkpoint.sh
================================================
#!/usr/bin/env bash
set -x
if [[ $# -lt 4 ]] ; then
echo 'too few arguments supplied'
exit 1
fi
PARTITION=$1
NAME=$2
OPTIONS=$3
CHECKPOINT=$4
srun -p ${PARTITION} \
--job-name=Mesh \
--gres=gpu:8 \
--ntasks=1 \
--kill-on-bad-exit=1 \
python entrypoint_train.py --name ${NAME} --options ${OPTIONS} --checkpoint ${CHECKPOINT} &
================================================
FILE: slurm/train_checkpoint_1gpu.sh
================================================
#!/usr/bin/env bash
set -x
if [[ $# -lt 4 ]] ; then
echo 'too few arguments supplied'
exit 1
fi
PARTITION=$1
NAME=$2
OPTIONS=$3
CHECKPOINT=$4
srun -p ${PARTITION} \
--job-name=Mesh \
--gres=gpu:1 \
--ntasks=1 \
--kill-on-bad-exit=1 \
python entrypoint_train.py --name ${NAME} --options ${OPTIONS} --checkpoint ${CHECKPOINT} &
================================================
FILE: test.py
================================================
import torch
import torch.nn as nn
from models.layers.chamfer_wrapper import ChamferDist
def test():
torch.manual_seed(42)
chamfer = ChamferDist()
dense = nn.Linear(6, 3)
dense.cuda()
optimizer = torch.optim.Adam(dense.parameters(), 1e-3)
a = torch.rand(4, 5, 6).cuda()
b = torch.rand(4, 8, 3).cuda()
c = torch.rand(4, 5, 6).cuda()
for i in range(30000):
a_out = dense(a)
d1, d2, i1, i2 = chamfer(a_out, b)
loss = d1.mean() + d2.mean()
c_out = dense(a)
d1, d2, i1, i2 = chamfer(c_out, b)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(loss)
test()
================================================
FILE: utils/average_meter.py
================================================
from collections import Iterable
import torch
import numpy as np
# noinspection PyAttributeOutsideInit
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self, multiplier=1.0):
self.multiplier = multiplier
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
if isinstance(val, torch.Tensor):
val = val.cpu().numpy()
if isinstance(val, Iterable):
val = np.array(val)
self.update(np.mean(np.array(val)), n=val.size)
else:
self.val = self.multiplier * val
self.sum += self.multiplier * val * n
self.count += n
self.avg = self.sum / self.count if self.count != 0 else 0
def __str__(self):
return "%.6f (%.6f)" % (self.val, self.avg)
================================================
FILE: utils/demo_selection/select_demo_images.py
================================================
import json
import os
import random
import shutil
with open("datasets/data/shapenet/meta/shapenet.json") as fp:
labels_map = json.load(fp)
with open("datasets/data/shapenet/meta/test_tf.txt") as fp:
lines = [line.strip() for line in fp.readlines()]
for entry in labels_map.values():
file_list = list(filter(lambda x: (entry["id"] + "/") in x, lines))
chosen = random.choice(file_list)
file_location = os.path.join("datasets/data/shapenet/data_tf",
chosen[len("Data/ShapeNetP2M/"):-4] + ".png")
shutil.copyfile(file_location, "datasets/examples/%s.png" % entry["name"].split(",")[0])
================================================
FILE: utils/mesh.py
================================================
import os
import pickle
import numpy as np
import torch
import trimesh
from scipy.sparse import coo_matrix
import config
def torch_sparse_tensor(indices, value, size):
coo = coo_matrix((value, (indices[:, 0], indices[:, 1])), shape=size)
values = coo.data
indices = np.vstack((coo.row, coo.col))
i = torch.tensor(indices, dtype=torch.long)
v = torch.tensor(values, dtype=torch.float)
shape = coo.shape
return torch.sparse.FloatTensor(i, v, shape)
class Ellipsoid(object):
def __init__(self, mesh_pos, file=config.ELLIPSOID_PATH):
with open(file, "rb") as fp:
fp_info = pickle.load(fp, encoding='latin1')
# shape: n_pts * 3
self.coord = torch.tensor(fp_info[0]) - torch.tensor(mesh_pos, dtype=torch.float)
# edges & faces & lap_idx
# edge: num_edges * 2
# faces: num_faces * 4
# laplace_idx: num_pts * 10
self.edges, self.laplace_idx = [], []
for i in range(3):
self.edges.append(torch.tensor(fp_info[1 + i][1][0], dtype=torch.long))
self.laplace_idx.append(torch.tensor(fp_info[7][i], dtype=torch.long))
# unpool index
# num_pool_edges * 2
# pool_01: 462 * 2, pool_12: 1848 * 2
self.unpool_idx = [torch.tensor(fp_info[4][i], dtype=torch.long) for i in range(2)]
# loops and adjacent edges
self.adj_mat = []
for i in range(1, 4):
# 0: np.array, 2D, pos
# 1: np.array, 1D, vals
# 2: tuple - shape, n * n
adj_mat = torch_sparse_tensor(*fp_info[i][1])
self.adj_mat.append(adj_mat)
ellipsoid_dir = os.path.dirname(file)
self.faces = []
self.obj_fmt_faces = []
# faces: f * 3, original ellipsoid, and two after deformations
for i in range(1, 4):
face_file = os.path.join(ellipsoid_dir, "face%d.obj" % i)
faces = np.loadtxt(face_file, dtype='|S32')
self.obj_fmt_faces.append(faces)
self.faces.append(torch.tensor(faces[:, 1:].astype(np.int) - 1))
================================================
FILE: utils/migrations/delete_unnecessary_keys.py
================================================
from argparse import ArgumentParser
import torch
parser = ArgumentParser()
parser.add_argument("--input", type=str, required=True)
parser.add_argument("--output", type=str, required=True)
args = parser.parse_args()
data = torch.load(args.input)
compressed = dict()
compressed["model"] = data["model"]
torch.save(compressed, args.output)
================================================
FILE: utils/migrations/extract_vgg_weights.py
================================================
import torch
from models.classifier import Classifier
from options import options
options.model.backbone = "vgg16"
model = Classifier(options.model, 1000)
state_dict = torch.load("checkpoints/debug/migration/400400_000080.pt")
model.load_state_dict(state_dict["model"])
torch.save(model.nn_encoder.state_dict(), "checkpoints/debug/migration/vgg16-p2m.pth")
================================================
FILE: utils/migrations/from_p2m_pytorch.py
================================================
import re
import torch
checkpoint = torch.load("checkpoints/debug/20190705192654/000001_000001.pt")
pretrained = torch.load("checkpoints/pretrained/network_4.pth")
weights = checkpoint["model"]
for k in weights.keys():
match = k
match = re.sub("gcns\.(\d)", "GCN_\\1", match)
match = re.sub("conv(\d)\.weight", "conv\\1.weight_2", match)
match = re.sub("conv(\d)\.loop_weight", "conv\\1.weight_1", match)
match = re.sub("gconv\.weight", "GConv.weight_2", match)
match = re.sub("gconv\.loop_weight", "GConv.weight_1", match)
match = re.sub("gconv\.", "GConv.", match)
if match not in pretrained:
print(k, match)
else:
weights[k] = pretrained[match]
torch.save(checkpoint, "checkpoints/debug/migration/network_4.pt")
================================================
FILE: utils/migrations/official_config_pytorch_256.txt
================================================
nn_encoder.conv0_1.weight torch.Size([16, 3, 3, 3])
nn_encoder.conv0_1.bias torch.Size([16])
nn_encoder.conv0_2.weight torch.Size([16, 16, 3, 3])
nn_encoder.conv0_2.bias torch.Size([16])
nn_encoder.conv1_1.weight torch.Size([32, 16, 3, 3])
nn_encoder.conv1_1.bias torch.Size([32])
nn_encoder.conv1_2.weight torch.Size([32, 32, 3, 3])
nn_encoder.conv1_2.bias torch.Size([32])
nn_encoder.conv1_3.weight torch.Size([32, 32, 3, 3])
nn_encoder.conv1_3.bias torch.Size([32])
nn_encoder.conv2_1.weight torch.Size([64, 32, 3, 3])
nn_encoder.conv2_1.bias torch.Size([64])
nn_encoder.conv2_2.weight torch.Size([64, 64, 3, 3])
nn_encoder.conv2_2.bias torch.Size([64])
nn_encoder.conv2_3.weight torch.Size([64, 64, 3, 3])
nn_encoder.conv2_3.bias torch.Size([64])
nn_encoder.conv3_1.weight torch.Size([128, 64, 3, 3])
nn_encoder.conv3_1.bias torch.Size([128])
nn_encoder.conv3_2.weight torch.Size([128, 128, 3, 3])
nn_encoder.conv3_2.bias torch.Size([128])
nn_encoder.conv3_3.weight torch.Size([128, 128, 3, 3])
nn_encoder.conv3_3.bias torch.Size([128])
nn_encoder.conv4_1.weight torch.Size([256, 128, 5, 5])
nn_encoder.conv4_1.bias torch.Size([256])
nn_encoder.conv4_2.weight torch.Size([256, 256, 3, 3])
nn_encoder.conv4_2.bias torch.Size([256])
nn_encoder.conv4_3.weight torch.Size([256, 256, 3, 3])
nn_encoder.conv4_3.bias torch.Size([256])
nn_encoder.conv5_1.weight torch.Size([512, 256, 5, 5])
nn_encoder.conv5_1.bias torch.Size([512])
nn_encoder.conv5_2.weight torch.Size([512, 512, 3, 3])
nn_encoder.conv5_2.bias torch.Size([512])
nn_encoder.conv5_3.weight torch.Size([512, 512, 3, 3])
nn_encoder.conv5_3.bias torch.Size([512])
nn_encoder.conv5_4.weight torch.Size([512, 512, 3, 3])
nn_encoder.conv5_4.bias torch.Size([512])
gcns.0.conv1.loop_weight torch.Size([963, 256])
gcns.0.conv1.weight torch.Size([963, 256])
gcns.0.conv1.bias torch.Size([256])
gcns.0.blocks.0.conv1.loop_weight torch.Size([256, 256])
gcns.0.blocks.0.conv1.weight torch.Size([256, 256])
gcns.0.blocks.0.conv1.bias torch.Size([256])
gcns.0.blocks.0.conv2.loop_weight torch.Size([256, 256])
gcns.0.blocks.0.conv2.weight torch.Size([256, 256])
gcns.0.blocks.0.conv2.bias torch.Size([256])
gcns.0.blocks.1.conv1.loop_weight torch.Size([256, 256])
gcns.0.blocks.1.conv1.weight torch.Size([256, 256])
gcns.0.blocks.1.conv1.bias torch.Size([256])
gcns.0.blocks.1.conv2.loop_weight torch.Size([256, 256])
gcns.0.blocks.1.conv2.weight torch.Size([256, 256])
gcns.0.blocks.1.conv2.bias torch.Size([256])
gcns.0.blocks.2.conv1.loop_weight torch.Size([256, 256])
gcns.0.blocks.2.conv1.weight torch.Size([256, 256])
gcns.0.blocks.2.conv1.bias torch.Size([256])
gcns.0.blocks.2.conv2.loop_weight torch.Size([256, 256])
gcns.0.blocks.2.conv2.weight torch.Size([256, 256])
gcns.0.blocks.2.conv2.bias torch.Size([256])
gcns.0.blocks.3.conv1.loop_weight torch.Size([256, 256])
gcns.0.blocks.3.conv1.weight torch.Size([256, 256])
gcns.0.blocks.3.conv1.bias torch.Size([256])
gcns.0.blocks.3.conv2.loop_weight torch.Size([256, 256])
gcns.0.blocks.3.conv2.weight torch.Size([256, 256])
gcns.0.blocks.3.conv2.bias torch.Size([256])
gcns.0.blocks.4.conv1.loop_weight torch.Size([256, 256])
gcns.0.blocks.4.conv1.weight torch.Size([256, 256])
gcns.0.blocks.4.conv1.bias torch.Size([256])
gcns.0.blocks.4.conv2.loop_weight torch.Size([256, 256])
gcns.0.blocks.4.conv2.weight torch.Size([256, 256])
gcns.0.blocks.4.conv2.bias torch.Size([256])
gcns.0.blocks.5.conv1.loop_weight torch.Size([256, 256])
gcns.0.blocks.5.conv1.weight torch.Size([256, 256])
gcns.0.blocks.5.conv1.bias torch.Size([256])
gcns.0.blocks.5.conv2.loop_weight torch.Size([256, 256])
gcns.0.blocks.5.conv2.weight torch.Size([256, 256])
gcns.0.blocks.5.conv2.bias torch.Size([256])
gcns.0.conv2.loop_weight torch.Size([256, 3])
gcns.0.conv2.weight torch.Size([256, 3])
gcns.0.conv2.bias torch.Size([3])
gcns.1.conv1.loop_weight torch.Size([1219, 256])
gcns.1.conv1.weight torch.Size([1219, 256])
gcns.1.conv1.bias torch.Size([256])
gcns.1.blocks.0.conv1.loop_weight torch.Size([256, 256])
gcns.1.blocks.0.conv1.weight torch.Size([256, 256])
gcns.1.blocks.0.conv1.bias torch.Size([256])
gcns.1.blocks.0.conv2.loop_weight torch.Size([256, 256])
gcns.1.blocks.0.conv2.weight torch.Size([256, 256])
gcns.1.blocks.0.conv2.bias torch.Size([256])
gcns.1.blocks.1.conv1.loop_weight torch.Size([256, 256])
gcns.1.blocks.1.conv1.weight torch.Size([256, 256])
gcns.1.blocks.1.conv1.bias torch.Size([256])
gcns.1.blocks.1.conv2.loop_weight torch.Size([256, 256])
gcns.1.blocks.1.conv2.weight torch.Size([256, 256])
gcns.1.blocks.1.conv2.bias torch.Size([256])
gcns.1.blocks.2.conv1.loop_weight torch.Size([256, 256])
gcns.1.blocks.2.conv1.weight torch.Size([256, 256])
gcns.1.blocks.2.conv1.bias torch.Size([256])
gcns.1.blocks.2.conv2.loop_weight torch.Size([256, 256])
gcns.1.blocks.2.conv2.weight torch.Size([256, 256])
gcns.1.blocks.2.conv2.bias torch.Size([256])
gcns.1.blocks.3.conv1.loop_weight torch.Size([256, 256])
gcns.1.blocks.3.conv1.weight torch.Size([256, 256])
gcns.1.blocks.3.conv1.bias torch.Size([256])
gcns.1.blocks.3.conv2.loop_weight torch.Size([256, 256])
gcns.1.blocks.3.conv2.weight torch.Size([256, 256])
gcns.1.blocks.3.conv2.bias torch.Size([256])
gcns.1.blocks.4.conv1.loop_weight torch.Size([256, 256])
gcns.1.blocks.4.conv1.weight torch.Size([256, 256])
gcns.1.blocks.4.conv1.bias torch.Size([256])
gcns.1.blocks.4.conv2.loop_weight torch.Size([256, 256])
gcns.1.blocks.4.conv2.weight torch.Size([256, 256])
gcns.1.blocks.4.conv2.bias torch.Size([256])
gcns.1.blocks.5.conv1.loop_weight torch.Size([256, 256])
gcns.1.blocks.5.conv1.weight torch.Size([256, 256])
gcns.1.blocks.5.conv1.bias torch.Size([256])
gcns.1.blocks.5.conv2.loop_weight torch.Size([256, 256])
gcns.1.blocks.5.conv2.weight torch.Size([256, 256])
gcns.1.blocks.5.conv2.bias torch.Size([256])
gcns.1.conv2.loop_weight torch.Size([256, 3])
gcns.1.conv2.weight torch.Size([256, 3])
gcns.1.conv2.bias torch.Size([3])
gcns.2.conv1.loop_weight torch.Size([1219, 256])
gcns.2.conv1.weight torch.Size([1219, 256])
gcns.2.conv1.bias torch.Size([256])
gcns.2.blocks.0.conv1.loop_weight torch.Size([256, 256])
gcns.2.blocks.0.conv1.weight torch.Size([256, 256])
gcns.2.blocks.0.conv1.bias torch.Size([256])
gcns.2.blocks.0.conv2.loop_weight torch.Size([256, 256])
gcns.2.blocks.0.conv2.weight torch.Size([256, 256])
gcns.2.blocks.0.conv2.bias torch.Size([256])
gcns.2.blocks.1.conv1.loop_weight torch.Size([256, 256])
gcns.2.blocks.1.conv1.weight torch.Size([256, 256])
gcns.2.blocks.1.conv1.bias torch.Size([256])
gcns.2.blocks.1.conv2.loop_weight torch.Size([256, 256])
gcns.2.blocks.1.conv2.weight torch.Size([256, 256])
gcns.2.blocks.1.conv2.bias torch.Size([256])
gcns.2.blocks.2.conv1.loop_weight torch.Size([256, 256])
gcns.2.blocks.2.conv1.weight torch.Size([256, 256])
gcns.2.blocks.2.conv1.bias torch.Size([256])
gcns.2.blocks.2.conv2.loop_weight torch.Size([256, 256])
gcns.2.blocks.2.conv2.weight torch.Size([256, 256])
gcns.2.blocks.2.conv2.bias torch.Size([256])
gcns.2.blocks.3.conv1.loop_weight torch.Size([256, 256])
gcns.2.blocks.3.conv1.weight torch.Size([256, 256])
gcns.2.blocks.3.conv1.bias torch.Size([256])
gcns.2.blocks.3.conv2.loop_weight torch.Size([256, 256])
gcns.2.blocks.3.conv2.weight torch.Size([256, 256])
gcns.2.blocks.3.conv2.bias torch.Size([256])
gcns.2.blocks.4.conv1.loop_weight torch.Size([256, 256])
gcns.2.blocks.4.conv1.weight torch.Size([256, 256])
gcns.2.blocks.4.conv1.bias torch.Size([256])
gcns.2.blocks.4.conv2.loop_weight torch.Size([256, 256])
gcns.2.blocks.4.conv2.weight torch.Size([256, 256])
gcns.2.blocks.4.conv2.bias torch.Size([256])
gcns.2.blocks.5.conv1.loop_weight torch.Size([256, 256])
gcns.2.blocks.5.conv1.weight torch.Size([256, 256])
gcns.2.blocks.5.conv1.bias torch.Size([256])
gcns.2.blocks.5.conv2.loop_weight torch.Size([256, 256])
gcns.2.blocks.5.conv2.weight torch.Size([256, 256])
gcns.2.blocks.5.conv2.bias torch.Size([256])
gcns.2.conv2.loop_weight torch.Size([256, 256])
gcns.2.conv2.weight torch.Size([256, 256])
gcns.2.conv2.bias torch.Size([256])
gconv.loop_weight torch.Size([256, 3])
gconv.weight torch.Size([256, 3])
gconv.bias torch.Size([3])
================================================
FILE: utils/migrations/official_config_tensorflow_256.txt
================================================
gcn/Conv2D/W:0 (3, 3, 3, 16)
gcn/Conv2D/b:0 (16,)
gcn/Conv2D_1/W:0 (3, 3, 16, 16)
gcn/Conv2D_1/b:0 (16,)
gcn/Conv2D_2/W:0 (3, 3, 16, 32)
gcn/Conv2D_2/b:0 (32,)
gcn/Conv2D_3/W:0 (3, 3, 32, 32)
gcn/Conv2D_3/b:0 (32,)
gcn/Conv2D_4/W:0 (3, 3, 32, 32)
gcn/Conv2D_4/b:0 (32,)
gcn/Conv2D_5/W:0 (3, 3, 32, 64)
gcn/Conv2D_5/b:0 (64,)
gcn/Conv2D_6/W:0 (3, 3, 64, 64)
gcn/Conv2D_6/b:0 (64,)
gcn/Conv2D_7/W:0 (3, 3, 64, 64)
gcn/Conv2D_7/b:0 (64,)
gcn/Conv2D_8/W:0 (3, 3, 64, 128)
gcn/Conv2D_8/b:0 (128,)
gcn/Conv2D_9/W:0 (3, 3, 128, 128)
gcn/Conv2D_9/b:0 (128,)
gcn/Conv2D_10/W:0 (3, 3, 128, 128)
gcn/Conv2D_10/b:0 (128,)
gcn/Conv2D_11/W:0 (5, 5, 128, 256)
gcn/Conv2D_11/b:0 (256,)
gcn/Conv2D_12/W:0 (3, 3, 256, 256)
gcn/Conv2D_12/b:0 (256,)
gcn/Conv2D_13/W:0 (3, 3, 256, 256)
gcn/Conv2D_13/b:0 (256,)
gcn/Conv2D_14/W:0 (5, 5, 256, 512)
gcn/Conv2D_14/b:0 (512,)
gcn/Conv2D_15/W:0 (3, 3, 512, 512)
gcn/Conv2D_15/b:0 (512,)
gcn/Conv2D_16/W:0 (3, 3, 512, 512)
gcn/Conv2D_16/b:0 (512,)
gcn/Conv2D_17/W:0 (3, 3, 512, 512)
gcn/Conv2D_17/b:0 (512,)
gcn/graphconvolution_1_vars/weights_0:0 (963, 256)
gcn/graphconvolution_1_vars/weights_1:0 (963, 256)
gcn/graphconvolution_1_vars/bias:0 (256,)
gcn/graphconvolution_2_vars/weights_0:0 (256, 256)
gcn/graphconvolution_2_vars/weights_1:0 (256, 256)
gcn/graphconvolution_2_vars/bias:0 (256,)
gcn/graphconvolution_3_vars/weights_0:0 (256, 256)
gcn/graphconvolution_3_vars/weights_1:0 (256, 256)
gcn/graphconvolution_3_vars/bias:0 (256,)
gcn/graphconvolution_4_vars/weights_0:0 (256, 256)
gcn/graphconvolution_4_vars/weights_1:0 (256, 256)
gcn/graphconvolution_4_vars/bias:0 (256,)
gcn/graphconvolution_5_vars/weights_0:0 (256, 256)
gcn/graphconvolution_5_vars/weights_1:0 (256, 256)
gcn/graphconvolution_5_vars/bias:0 (256,)
gcn/graphconvolution_6_vars/weights_0:0 (256, 256)
gcn/graphconvolution_6_vars/weights_1:0 (256, 256)
gcn/graphconvolution_6_vars/bias:0 (256,)
gcn/graphconvolution_7_vars/weights_0:0 (256, 256)
gcn/graphconvolution_7_vars/weights_1:0 (256, 256)
gcn/graphconvolution_7_vars/bias:0 (256,)
gcn/graphconvolution_8_vars/weights_0:0 (256, 256)
gcn/graphconvolution_8_vars/weights_1:0 (256, 256)
gcn/graphconvolution_8_vars/bias:0 (256,)
gcn/graphconvolution_9_vars/weights_0:0 (256, 256)
gcn/graphconvolution_9_vars/weights_1:0 (256, 256)
gcn/graphconvolution_9_vars/bias:0 (256,)
gcn/graphconvolution_10_vars/weights_0:0 (256, 256)
gcn/graphconvolution_10_vars/weights_1:0 (256, 256)
gcn/graphconvolution_10_vars/bias:0 (256,)
gcn/graphconvolution_11_vars/weights_0:0 (256, 256)
gcn/graphconvolution_11_vars/weights_1:0 (256, 256)
gcn/graphconvolution_11_vars/bias:0 (256,)
gcn/graphconvolution_12_vars/weights_0:0 (256, 256)
gcn/graphconvolution_12_vars/weights_1:0 (256, 256)
gcn/graphconvolution_12_vars/bias:0 (256,)
gcn/graphconvolution_13_vars/weights_0:0 (256, 256)
gcn/graphconvolution_13_vars/weights_1:0 (256, 256)
gcn/graphconvolution_13_vars/bias:0 (256,)
gcn/graphconvolution_14_vars/weights_0:0 (256, 3)
gcn/graphconvolution_14_vars/weights_1:0 (256, 3)
gcn/graphconvolution_14_vars/bias:0 (3,)
gcn/graphconvolution_15_vars/weights_0:0 (1219, 256)
gcn/graphconvolution_15_vars/weights_1:0 (1219, 256)
gcn/graphconvolution_15_vars/bias:0 (256,)
gcn/graphconvolution_16_vars/weights_0:0 (256, 256)
gcn/graphconvolution_16_vars/weights_1:0 (256, 256)
gcn/graphconvolution_16_vars/bias:0 (256,)
gcn/graphconvolution_17_vars/weights_0:0 (256, 256)
gcn/graphconvolution_17_vars/weights_1:0 (256, 256)
gcn/graphconvolution_17_vars/bias:0 (256,)
gcn/graphconvolution_18_vars/weights_0:0 (256, 256)
gcn/graphconvolution_18_vars/weights_1:0 (256, 256)
gcn/graphconvolution_18_vars/bias:0 (256,)
gcn/graphconvolution_19_vars/weights_0:0 (256, 256)
gcn/graphconvolution_19_vars/weights_1:0 (256, 256)
gcn/graphconvolution_19_vars/bias:0 (256,)
gcn/graphconvolution_20_vars/weights_0:0 (256, 256)
gcn/graphconvolution_20_vars/weights_1:0 (256, 256)
gcn/graphconvolution_20_vars/bias:0 (256,)
gcn/graphconvolution_21_vars/weights_0:0 (256, 256)
gcn/graphconvolution_21_vars/weights_1:0 (256, 256)
gcn/graphconvolution_21_vars/bias:0 (256,)
gcn/graphconvolution_22_vars/weights_0:0 (256, 256)
gcn/graphconvolution_22_vars/weights_1:0 (256, 256)
gcn/graphconvolution_22_vars/bias:0 (256,)
gcn/graphconvolution_23_vars/weights_0:0 (256, 256)
gcn/graphconvolution_23_vars/weights_1:0 (256, 256)
gcn/graphconvolution_23_vars/bias:0 (256,)
gcn/graphconvolution_24_vars/weights_0:0 (256, 256)
gcn/graphconvolution_24_vars/weights_1:0 (256, 256)
gcn/graphconvolution_24_vars/bias:0 (256,)
gcn/graphconvolution_25_vars/weights_0:0 (256, 256)
gcn/graphconvolution_25_vars/weights_1:0 (256, 256)
gcn/graphconvolution_25_vars/bias:0 (256,)
gcn/graphconvolution_26_vars/weights_0:0 (256, 256)
gcn/graphconvolution_26_vars/weights_1:0 (256, 256)
gcn/graphconvolution_26_vars/bias:0 (256,)
gcn/graphconvolution_27_vars/weights_0:0 (256, 256)
gcn/graphconvolution_27_vars/weights_1:0 (256, 256)
gcn/graphconvolution_27_vars/bias:0 (256,)
gcn/graphconvolution_28_vars/weights_0:0 (256, 3)
gcn/graphconvolution_28_vars/weights_1:0 (256, 3)
gcn/graphconvolution_28_vars/bias:0 (3,)
gcn/graphconvolution_29_vars/weights_0:0 (1219, 256)
gcn/graphconvolution_29_vars/weights_1:0 (1219, 256)
gcn/graphconvolution_29_vars/bias:0 (256,)
gcn/graphconvolution_30_vars/weights_0:0 (256, 256)
gcn/graphconvolution_30_vars/weights_1:0 (256, 256)
gcn/graphconvolution_30_vars/bias:0 (256,)
gcn/graphconvolution_31_vars/weights_0:0 (256, 256)
gcn/graphconvolution_31_vars/weights_1:0 (256, 256)
gcn/graphconvolution_31_vars/bias:0 (256,)
gcn/graphconvolution_32_vars/weights_0:0 (256, 256)
gcn/graphconvolution_32_vars/weights_1:0 (256, 256)
gcn/graphconvolution_32_vars/bias:0 (256,)
gcn/graphconvolution_33_vars/weights_0:0 (256, 256)
gcn/graphconvolution_33_vars/weights_1:0 (256, 256)
gcn/graphconvolution_33_vars/bias:0 (256,)
gcn/graphconvolution_34_vars/weights_0:0 (256, 256)
gcn/graphconvolution_34_vars/weights_1:0 (256, 256)
gcn/graphconvolution_34_vars/bias:0 (256,)
gcn/graphconvolution_35_vars/weights_0:0 (256, 256)
gcn/graphconvolution_35_vars/weights_1:0 (256, 256)
gcn/graphconvolution_35_vars/bias:0 (256,)
gcn/graphconvolution_36_vars/weights_0:0 (256, 256)
gcn/graphconvolution_36_vars/weights_1:0 (256, 256)
gcn/graphconvolution_36_vars/bias:0 (256,)
gcn/graphconvolution_37_vars/weights_0:0 (256, 256)
gcn/graphconvolution_37_vars/weights_1:0 (256, 256)
gcn/graphconvolution_37_vars/bias:0 (256,)
gcn/graphconvolution_38_vars/weights_0:0 (256, 256)
gcn/graphconvolution_38_vars/weights_1:0 (256, 256)
gcn/graphconvolution_38_vars/bias:0 (256,)
gcn/graphconvolution_39_vars/weights_0:0 (256, 256)
gcn/graphconvolution_39_vars/weights_1:0 (256, 256)
gcn/graphconvolution_39_vars/bias:0 (256,)
gcn/graphconvolution_40_vars/weights_0:0 (256, 256)
gcn/graphconvolution_40_vars/weights_1:0 (256, 256)
gcn/graphconvolution_40_vars/bias:0 (256,)
gcn/graphconvolution_41_vars/weights_0:0 (256, 256)
gcn/graphconvolution_41_vars/weights_1:0 (256, 256)
gcn/graphconvolution_41_vars/bias:0 (256,)
gcn/graphconvolution_42_vars/weights_0:0 (256, 128)
gcn/graphconvolution_42_vars/weights_1:0 (256, 128)
gcn/graphconvolution_42_vars/bias:0 (128,)
gcn/graphconvolution_43_vars/weights_0:0 (128, 3)
gcn/graphconvolution_43_vars/weights_1:0 (128, 3)
gcn/graphconvolution_43_vars/bias:0 (3,)
================================================
FILE: utils/migrations/official_model_converter.py
================================================
import pickle
import torch
import numpy as np
with open("checkpoints/debug/migration/p2m-tensorflow.pkl", "rb") as f:
official = pickle.load(f)
for k, v in official.items():
print(k, v.shape)
with open("checkpoints/debug/host_template_256/000001_000001.pt", "rb") as f:
host = torch.load(f)
for k, v in host["model"].items():
print(k, v.shape)
with open("utils/migrations/official_config_pytorch_256.txt", "r") as f:
pt_names = [line.split()[0] for line in f.readlines()]
with open("utils/migrations/official_config_tensorflow_256.txt", "r") as f:
tf_names = [line.split()[0] for line in f.readlines()]
for pt, tf in zip(pt_names, tf_names):
if host["model"][pt].shape != official[tf].shape:
data = np.transpose(official[tf], (3, 2, 0, 1))
else:
data = official[tf]
print(pt, tf, host["model"][pt].data.shape, data.shape)
host["model"][pt].data = torch.from_numpy(data)
torch.save(host, "checkpoints/debug/migration/network_official.pt")
================================================
FILE: utils/migrations/tensorflow_to_pkl.py
================================================
import pickle
import tensorflow as tf
from tensorflow.python.framework import ops
nn_distance_module = tf.load_op_library('tf_ops/libtf_nndistance.so')
def nn_distance(xyz1, xyz2):
'''
Computes the distance of nearest neighbors for a pair of point clouds
input: xyz1: (batch_size,#points_1,3) the first point cloud
input: xyz2: (batch_size,#points_2,3) the second point cloud
output: dist1: (batch_size,#point_1) distance from first to second
output: idx1: (batch_size,#point_1) nearest neighbor from first to second
output: dist2: (batch_size,#point_2) distance from second to first
output: idx2: (batch_size,#point_2) nearest neighbor from second to first
'''
return nn_distance_module.nn_distance(xyz1, xyz2)
@ops.RegisterGradient('NnDistance')
def _nn_distance_grad(op, grad_dist1, grad_idx1, grad_dist2, grad_idx2):
xyz1 = op.inputs[0]
xyz2 = op.inputs[1]
idx1 = op.outputs[1]
idx2 = op.outputs[3]
return nn_distance_module.nn_distance_grad(xyz1, xyz2, grad_dist1, idx1, grad_dist2, idx2)
pickle_format = dict()
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('checkpoint/gcn.ckpt.meta')
what = new_saver.restore(sess, 'checkpoint/gcn.ckpt')
all_vars = tf.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
for v in all_vars:
try:
v_ = sess.run(v)
pickle_format[v.name] = v_
except:
pass
with open("result.pkl", "wb") as f:
pickle.dump(pickle_format, f)
================================================
FILE: utils/migrations/validate_dataset_all.py
================================================
import os
import sys
import requests
from tqdm import tqdm
def go(file_path, subset):
shapenet_root = "datasets/data/shapenet"
with open(file_path, "r") as f, open(os.path.join(shapenet_root, "meta", subset + "_all.txt"), "w") as g:
for line in tqdm(f.readlines()):
_, _, label, filename, _, index = line.strip().split("/")
converted = label + "_" + filename + "_" + index
file_path = os.path.join(shapenet_root, "data", label + "/" + filename + "_" + index)
if not os.path.exists(file_path):
print("fail! " + file_path)
continue
print(converted, file=g)
go(sys.argv[1], "train")
go(sys.argv[2], "test")
================================================
FILE: utils/tensor.py
================================================
"""
Helper functions that have not yet been implemented in pytorch
"""
import torch
def recursive_detach(t):
if isinstance(t, torch.Tensor):
return t.detach()
elif isinstance(t, list):
return [recursive_detach(x) for x in t]
elif isinstance(t, dict):
return {k: recursive_detach(v) for k, v in t.items()}
else:
return t
def batch_mm(matrix, batch):
"""
https://github.com/pytorch/pytorch/issues/14489
"""
# TODO: accelerate this with batch operations
return torch.stack([matrix.mm(b) for b in batch], dim=0)
def dot(x, y, sparse=False):
"""Wrapper for torch.matmul (sparse vs dense)."""
if sparse:
return batch_mm(x, y)
else:
return torch.matmul(x, y)
================================================
FILE: utils/vis/renderer.py
================================================
import cv2
import neural_renderer as nr
import numpy as np
import torch
def _process_render_result(img, height, width):
if isinstance(img, torch.Tensor):
img = img.cpu().numpy()
if img.ndim == 2:
# assuming single channel image
img = np.expand_dims(img, axis=0)
if img.shape[-1] == 3:
# assuming [height, width, rgb]
img = np.moveaxis(img, -1, 0)
# return 3 * width * height or width * height, in range [0, 1]
return np.clip(img[:height, :width], 0, 1)
def _mix_render_result_with_image(rgb, alpha, image):
alpha = np.expand_dims(alpha, 0)
return alpha * rgb + (1 - alpha) * image
class MeshRenderer(object):
def __init__(self, camera_f, camera_c, mesh_pos):
self.colors = {'pink': np.array([.9, .7, .7]),
'light_blue': np.array([0.65098039, 0.74117647, 0.85882353]),
'light_green': np.array([165., 216., 168.]) / 255,
'purple': np.array([216., 193., 165.]) / 255,
'orange': np.array([216., 165., 213.]) / 255,
'light_yellow': np.array([213., 216., 165.]) / 255,
}
self.camera_f, self.camera_c, self.mesh_pos = camera_f, camera_c, mesh_pos
self.renderer = nr.Renderer(camera_mode='projection',
light_intensity_directional=.8,
light_intensity_ambient=.3,
background_color=[1., 1., 1.],
light_direction=[0., 0., -1.])
def _render_mesh(self, vertices: np.ndarray, faces: np.ndarray, width, height,
camera_k, camera_dist_coeffs, rvec, tvec, color=None):
# render a square image, then crop
img_size = max(height, width)
# This is not thread safe!
self.renderer.image_size = img_size
vertices = torch.tensor(vertices, dtype=torch.float32)
faces = torch.tensor(faces, dtype=torch.int32)
if color is None:
color = 'light_blue'
color = self.colors[color]
texture_size = 2
textures = torch.tensor(color, dtype=torch.float32) \
.repeat(faces.size(0), texture_size, texture_size, texture_size, 1)
camera_k = torch.tensor(camera_k, dtype=torch.float32)
rotmat = torch.tensor(cv2.Rodrigues(rvec)[0], dtype=torch.float32)
tvec = torch.tensor(tvec, dtype=torch.float32)
camera_dist_coeffs = torch.tensor(camera_dist_coeffs, dtype=torch.float32)
rgb, _, alpha = self.renderer.render(vertices.unsqueeze(0).cuda(),
faces.unsqueeze(0).cuda(),
textures.unsqueeze(0).cuda(),
K=camera_k.unsqueeze(0).cuda(),
R=rotmat.unsqueeze(0).cuda(),
t=tvec.unsqueeze(0).cuda(),
dist_coeffs=camera_dist_coeffs.unsqueeze(0).cuda(),
orig_size=img_size)
# use the extra dimension of alpha for broadcasting
alpha = _process_render_result(alpha[0], height, width)
rgb = _process_render_result(rgb[0], height, width)
return rgb, alpha
def _render_pointcloud(self, vertices: np.ndarray, width, height,
camera_k, camera_dist_coeffs, rvec, tvec, color=None):
if color is None:
color = 'pink'
color = self.colors[color]
# return pointcloud
vertices_2d = cv2.projectPoints(np.expand_dims(vertices, -1),
rvec, tvec, camera_k, camera_dist_coeffs)[0]
vertices_2d = np.reshape(vertices_2d, (-1, 2))
alpha = np.zeros((height, width, 3), np.float)
whiteboard = np.ones((3, height, width), np.float)
if np.isnan(vertices_2d).any():
return whiteboard, alpha
for x, y in vertices_2d:
cv2.circle(alpha, (int(x), int(y)), radius=1, color=(1., 1., 1.), thickness=-1)
rgb = _process_render_result(alpha * color[None, None, :], height, width)
alpha = _process_render_result(alpha[:, :, 0], height, width)
rgb = _mix_render_result_with_image(rgb, alpha[0], whiteboard)
return rgb, alpha
def visualize_reconstruction(self, gt_coord, coord, faces, image, mesh_only=False, **kwargs):
camera_k = np.array([[self.camera_f[0], 0, self.camera_c[0]],
[0, self.camera_f[1], self.camera_c[1]],
[0, 0, 1]])
# inverse y and z, equivalent to inverse x, but gives positive z
rvec = np.array([np.pi, 0., 0.], dtype=np.float32)
tvec = np.zeros(3, dtype=np.float32)
dist_coeffs = np.zeros(5, dtype=np.float32)
mesh, _ = self._render_mesh(coord, faces, image.shape[2], image.shape[1],
camera_k, dist_coeffs, rvec, tvec, **kwargs)
if mesh_only:
return mesh
gt_pc, _ = self._render_pointcloud(gt_coord, image.shape[2], image.shape[1],
camera_k, dist_coeffs, rvec, tvec, **kwargs)
pred_pc, _ = self._render_pointcloud(coord, image.shape[2], image.shape[1],
camera_k, dist_coeffs, rvec, tvec, **kwargs)
return np.concatenate((image, gt_pc, pred_pc, mesh), 2)
def p2m_batch_visualize(self, batch_input, batch_output, faces, atmost=3):
"""
Every thing is tensor for now, needs to move to cpu and convert to numpy
"""
batch_size = min(batch_input["images_orig"].size(0), atmost)
images_stack = []
mesh_pos = np.array(self.mesh_pos)
for i in range(batch_size):
image = batch_input["images_orig"][i].cpu().numpy()
gt_points = batch_input["points"][i].cpu().numpy() + mesh_pos
for j in range(3):
for k in (["pred_coord_before_deform", "pred_coord"] if j == 0 else ["pred_coord"]):
coord = batch_output[k][j][i].cpu().numpy() + mesh_pos
images_stack.append(self.visualize_reconstruction(gt_points, coord, faces[j].cpu().numpy(), image))
return torch.from_numpy(np.concatenate(images_stack, 1))