Repository: noahcao/Pixel2Mesh Branch: master Commit: 92a6f7bdbee5 Files: 94 Total size: 136.4 KB Directory structure: gitextract_re_dj0ft/ ├── .gitignore ├── .gitmodules ├── README.md ├── config.py ├── datasets/ │ ├── base_dataset.py │ ├── imagenet.py │ ├── preprocess/ │ │ └── shapenet/ │ │ └── .gitignore │ └── shapenet.py ├── entrypoint_eval.py ├── entrypoint_predict.py ├── entrypoint_train.py ├── experiments/ │ ├── backbone/ │ │ ├── vgg16.yml │ │ ├── vgg16_1e-3.yml │ │ └── vgg16_1e-4.yml │ ├── baseline/ │ │ ├── chamfer_only.yml │ │ ├── default.yml │ │ ├── default_zthresh.yml │ │ ├── large_laplace.yml │ │ ├── lr_1e-3_weighted_chamfer.yml │ │ ├── lr_1e-3_weighted_chamfer_oppo.yml │ │ ├── lr_1e-3_zthresh.yml │ │ ├── lr_1e-3_zthresh_resnet.yml │ │ ├── lr_1e-4.yml │ │ ├── lr_1e-4_dataset_all.yml │ │ ├── lr_1e-4_dataset_tf_same_weights_step_adjusted.yml │ │ ├── lr_1e-4_dataset_tf_same_weights_step_adjusted_more_epochs.yml │ │ ├── lr_1e-4_k250_d256.yml │ │ ├── lr_1e-4_plane_only.yml │ │ ├── lr_1e-4_resnet_dataset_all.yml │ │ ├── lr_1e-4_resnet_dataset_all_larger_sample.yml │ │ ├── lr_1e-4_resnet_dataset_all_sample_9k.yml │ │ ├── lr_1e-4_resnet_dataset_tf_larger_sample.yml │ │ ├── lr_1e-4_resnet_dataset_tf_same_weights_step_adjusted.yml │ │ ├── lr_1e-4_resnet_dataset_tf_sample_9k.yml │ │ ├── lr_1e-4_resnet_dataset_tf_sample_9k_more_epochs.yml │ │ ├── lr_1e-4_resnet_dataset_tf_sample_9k_more_epochs_same_weights.yml │ │ ├── lr_1e-4_resnet_k250_d256.yml │ │ ├── lr_1e-4_wd_1e-8.yml │ │ ├── lr_1e-4_weighted_chamfer.yml │ │ ├── lr_1e-4_weighted_chamfer_oppo.yml │ │ ├── lr_1e-4_zthresh.yml │ │ ├── lr_1e-4_zthresh_resnet.yml │ │ ├── lr_1e-5.yml │ │ ├── lr_1e-5_dataset_tf_same_weights_step_adjusted.yml │ │ ├── lr_2.5e-5.yml │ │ ├── lr_3e-5_dataset_tf_same_weights_step_adjusted.yml │ │ ├── lr_5e-4_zthresh_resnet.yml │ │ ├── lr_5e-5_dataset_all_more_epochs.yml │ │ ├── normal_free.yml │ │ ├── relu_free.yml │ │ └── resnet.yml │ └── default/ │ ├── resnet.yml │ └── tensorflow.yml ├── external/ │ └── chamfer/ │ ├── chamfer.cu │ ├── chamfer_cuda.cpp │ ├── setup.py │ └── test.py ├── functions/ │ ├── base.py │ ├── evaluator.py │ ├── predictor.py │ ├── saver.py │ └── trainer.py ├── logger.py ├── models/ │ ├── backbones/ │ │ ├── __init__.py │ │ ├── resnet.py │ │ └── vgg16.py │ ├── classifier.py │ ├── layers/ │ │ ├── chamfer_wrapper.py │ │ ├── gbottleneck.py │ │ ├── gconv.py │ │ ├── gpooling.py │ │ └── gprojection.py │ ├── losses/ │ │ ├── classifier.py │ │ └── p2m.py │ └── p2m.py ├── options.py ├── slurm/ │ ├── eval.sh │ ├── train.sh │ ├── train_checkpoint.sh │ └── train_checkpoint_1gpu.sh ├── test.py └── utils/ ├── average_meter.py ├── demo_selection/ │ └── select_demo_images.py ├── mesh.py ├── migrations/ │ ├── delete_unnecessary_keys.py │ ├── extract_vgg_weights.py │ ├── from_p2m_pytorch.py │ ├── official_config_pytorch_256.txt │ ├── official_config_tensorflow_256.txt │ ├── official_model_converter.py │ ├── tensorflow_to_pkl.py │ └── validate_dataset_all.py ├── tensor.py └── vis/ └── renderer.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST debug # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ # yaml files yamls/ # logs logs/ # vim swap files *.swp # data datasets/data datasets/extras datasets/examples/*.obj summary checkpoints # IDEA .idea ================================================ FILE: .gitmodules ================================================ [submodule "external/neural_renderer"] path = external/neural_renderer url = https://github.com/daniilidis-group/neural_renderer ================================================ FILE: README.md ================================================ # Pixel2Mesh This is an implementation of Pixel2Mesh in PyTorch. Besides, we also: - Provide retrained Pixel2Mesh checkpoints. Besides, the pretrained tensorflow pretrained model provided in [official implementation](https://github.com/nywang16/Pixel2Mesh) is also converted into a PyTorch checkpoint file for convenience. - Provide a modified version of Pixel2Mesh whose backbone is ResNet instead of VGG. - Clarify some details in previous implementation and provide a flexible training framework. **If you have any urgent question or issue, please contact jinkuncao@gmail.com.** ## Get Started ### Environment Current version only supports training and inference on GPU. It works well under dependencies as follows: - Ubuntu 16.04 / 18.04 - Python 3.7 - PyTorch 1.1 - CUDA 9.0 (10.0 should also work) - OpenCV 4.1 - Scipy 1.3 - Scikit-Image 0.15 Some minor dependencies are also needed, for which the latest version provided by conda/pip works well: > easydict, pyyaml, tensorboardx, trimesh, shapely Two another steps to prepare the codebase: 1. `git submodule update --init` to get [Neural Renderer](https://github.com/daniilidis-group/neural_renderer) ready. 2. `python setup.py install` in directory [external/chamfer](external/chamfer) and `external/neural_renderer` to compile the modules. ### Datasets We use [ShapeNet](https://www.shapenet.org/) for model training and evaluation. The official tensorflow implementation provides a subset of ShapeNet for it, you can download it [here](https://drive.google.com/drive/folders/131dH36qXCabym1JjSmEpSQZg4dmZVQid). Extract it and link it to `data_tf` directory as follows. Before that, some meta files [here](https://drive.google.com/file/d/16d9druvCpsjKWsxHmsTD5HSOWiCWtDzo/view?usp=sharing) will help you establish the folder tree, demonstrated as follows. ~~*P.S. In case more data is needed, another larger data package of ShapeNet is also [available](https://drive.google.com/file/d/1Z8gt4HdPujBNFABYrthhau9VZW10WWYe/view). You can extract it and place it in the `data` directory. But this would take much time and needs about 300GB storage.*~~ P.S.S. For the larger data package, we provide a temporal access here on [OneDrive](https://1drv.ms/u/s!AtMVLfbdnqr4nGZjQ8GuPHlEUSg9?e=0dIEbK). ``` datasets/data ├── ellipsoid │ ├── face1.obj │ ├── face2.obj │ ├── face3.obj │ └── info_ellipsoid.dat ├── pretrained │ ... (.pth files) └── shapenet ├── data (larger data package, optional) │ ├── 02691156 │ │ └── 3a123ae34379ea6871a70be9f12ce8b0_02.dat │ ├── 02828884 │ └── ... ├── data_tf (standard data used in official implementation) │ ├── 02691156 (put the folders directly in data_tf) │ │ └── 10115655850468db78d106ce0a280f87 │ ├── 02828884 │ └── ... └── meta ... ``` Difference between the two versions of dataset is worth some explanation: - `data_tf` has images of 137x137 resolution and four channels (RGB + alpha), 175,132 samples for training and 43,783 for evaluation. - `data` has RGB images of 224x224 resolution with background set all white. It contains altogether 1,050,240 for training and evaluation. *P.S. We trained model with both datasets and evaluated on both benchmarks. To save time and align our results with the official paper/implementation, we use `data_tf` by default.* ### Usage #### Configuration You can modify configuration in a `yml` file for training/evaluation. It overrides dsefault settings in `options.py`. We provide some examples in the `experiments` directory. #### Training ``` python entrypoint_train.py --name xxx --options path/to/yaml ``` *P.S. To train on slurm clusters, we also provide settings reference. Refer to [slurm](slurm) folder for details.* #### Evaluation ```shell python entrypoint_eval.py --name xxx --options path/to/yml --checkpoint path/to/checkpoint ``` #### Inference You can do inference on your own images by a simple command: ``` python entrypoint_predict.py --options /path/to/yml --checkpoint /path/to/checkpoint --folder /path/to/images ``` *P.S. we only support do training/evaluation/inference with GPU by default.* ## Results We tested performance of some models. The [official tensorflow implementation](https://github.com/nywang16/Pixel2Mesh) reports much higher performance than claimed in the [original paper](https://arxiv.org/abs/1804.01654) as follows. The results are listed as follows, which is close to that reported in [MeshRCNN](https://arxiv.org/abs/1906.02739). The original paper evaluates result on simple mean, without considerations of different categories containing different number of samples, while some later papers use weighted-mean. We report results under both two metrics for caution.
Checkpoint Eval Protocol CD F1τ F1
Official Pretrained (tensorflow) Mean 0.482 65.22 78.80
Weighted-mean 0.439 66.56 80.17
Migrated Checkpoint Mean 0.498 64.21 78.03
Weighted-mean 0.451 65.67 79.51
ResNet Mean 0.443 65.36 79.24
Weighted-mean 0.411 66.13 80.13
*P.S. Due to time limit, the resnet checkpoint has not been trained in detail and sufficiently.* ### Pretrained checkpoints - **VGG backbone:** The checkpoint converted from official pretrained model (based on VGG) can be downloaded [here](https://drive.google.com/file/d/1Gk3M4KQekEenG9qQm60OFsxNar0sG8bN/view?usp=sharing). (scripts to migrate tensorflow checkpoints into `.pth` are available in `utils/migrations`. ) - **ResNet backbone:** As we provide another backbone choice of resenet, we also provide a corresponding checkpoint [here](https://drive.google.com/file/d/1pZm_IIWDUDje6gRZHW-GDhx5FCDM2Qg_/view?usp=sharing). ## Details of Improvement We explain some improvement of this version of implementation compared with the official version here. - **Larger batch size:** We support larger batch size on multiple GPUs for training. Since Chamfer distances cannot be calculated if samples in a batch with different ground-truth pointcloud size, "resizing" the pointcloud is necessary. Instead of resampling points, we simply upsample/downsample from the dataset. - **Better backbone:** We enable replacing VGG by ResNet50 for model backbone. The training progress is more stable and final performance is higher. - **More stable training:** We do normalization on the deformed sphere, so that it's deformed at location $(0,0,0)$; we use a threshold activation on $z$-axis during projection, so that $z$ will always be positive or negative and never be $0$. These seem not to result in better performance but more stable training loss. ## Demo Generated mesh samples are provided in [datasets/examples](datasets/examples) from our ResNet model. Three mesh models in a line are deformed from a single ellipsoid mesh with different number of vertices (156 vs 268 vs 2466) as configurated in the original paper. ![](datasets/examples/airplane.gif) ![](datasets/examples/lamp.gif) ![](datasets/examples/table.gif) ![](datasets/examples/display.gif) ## Acknowledgements Our work is based on the official version of [Pixel2Mesh](https://github.com/nywang16/Pixel2Mesh); Some part of code are borrowed from [a previous PyTorch implementation of Pixel2Mesh](https://github.com/Tong-ZHAO/Pixel2Mesh-Pytorch). The packed files for two version of datasets are also provided by them two. Most codework is done by [Yuge Zhang](https://github.com/ultmaster). ================================================ FILE: config.py ================================================ import os # dataset root DATASET_ROOT = "datasets/data" SHAPENET_ROOT = os.path.join(DATASET_ROOT, "shapenet") IMAGENET_ROOT = os.path.join(DATASET_ROOT, "imagenet") # ellipsoid path ELLIPSOID_PATH = os.path.join(DATASET_ROOT, "ellipsoid/info_ellipsoid.dat") # pretrained weights path PRETRAINED_WEIGHTS_PATH = { "vgg16": os.path.join(DATASET_ROOT, "pretrained/vgg16-397923af.pth"), "resnet50": os.path.join(DATASET_ROOT, "pretrained/resnet50-19c8e357.pth"), "vgg16p2m": os.path.join(DATASET_ROOT, "pretrained/vgg16-p2m.pth"), } # Mean and standard deviation for normalizing input image IMG_NORM_MEAN = [0.485, 0.456, 0.406] IMG_NORM_STD = [0.229, 0.224, 0.225] IMG_SIZE = 224 ================================================ FILE: datasets/base_dataset.py ================================================ from torch.utils.data.dataset import Dataset from torchvision.transforms import Normalize import config class BaseDataset(Dataset): def __init__(self): self.normalize_img = Normalize(mean=config.IMG_NORM_MEAN, std=config.IMG_NORM_STD) ================================================ FILE: datasets/imagenet.py ================================================ import os import numpy as np from torch.utils.data import Dataset from torchvision import transforms from PIL import ImageFile, Image ImageFile.LOAD_TRUNCATED_IMAGES = True class ImageNet(Dataset): def __init__(self, root_dir, split="train"): self.image_dir = os.path.join(root_dir, split) self.images = [] self.labels = [] with open(os.path.join(root_dir, "meta", split + ".txt"), "r") as f: for line in f.readlines(): image, label = line.strip().split() self.images.append(image) self.labels.append(int(label)) self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if split == "train": self.transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), self.normalize ]) else: self.transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), self.normalize ]) def __getitem__(self, index): image = Image.open(os.path.join(self.image_dir, self.images[index])) image = image.convert('RGB') image = self.transform(image) return { "images": image, "labels": self.labels[index], "filename": self.images[index], } def __len__(self): return len(self.images) ================================================ FILE: datasets/preprocess/shapenet/.gitignore ================================================ data ================================================ FILE: datasets/shapenet.py ================================================ import json import os import pickle import numpy as np import torch from PIL import Image from skimage import io, transform from torch.utils.data.dataloader import default_collate import config from datasets.base_dataset import BaseDataset class ShapeNet(BaseDataset): """ Dataset wrapping images and target meshes for ShapeNet dataset. """ def __init__(self, file_root, file_list_name, mesh_pos, normalization, shapenet_options): super().__init__() self.file_root = file_root with open(os.path.join(self.file_root, "meta", "shapenet.json"), "r") as fp: self.labels_map = sorted(list(json.load(fp).keys())) self.labels_map = {k: i for i, k in enumerate(self.labels_map)} # Read file list with open(os.path.join(self.file_root, "meta", file_list_name + ".txt"), "r") as fp: self.file_names = fp.read().split("\n")[:-1] self.tensorflow = "_tf" in file_list_name # tensorflow version of data self.normalization = normalization self.mesh_pos = mesh_pos self.resize_with_constant_border = shapenet_options.resize_with_constant_border def __getitem__(self, index): if self.tensorflow: filename = self.file_names[index][17:] label = filename.split("/", maxsplit=1)[0] pkl_path = os.path.join(self.file_root, "data_tf", filename) img_path = pkl_path[:-4] + ".png" with open(pkl_path) as f: data = pickle.load(open(pkl_path, 'rb'), encoding="latin1") pts, normals = data[:, :3], data[:, 3:] img = io.imread(img_path) img[np.where(img[:, :, 3] == 0)] = 255 if self.resize_with_constant_border: img = transform.resize(img, (config.IMG_SIZE, config.IMG_SIZE), mode='constant', anti_aliasing=False) # to match behavior of old versions else: img = transform.resize(img, (config.IMG_SIZE, config.IMG_SIZE)) img = img[:, :, :3].astype(np.float32) else: label, filename = self.file_names[index].split("_", maxsplit=1) with open(os.path.join(self.file_root, "data", label, filename), "rb") as f: data = pickle.load(f, encoding="latin1") img, pts, normals = data[0].astype(np.float32) / 255.0, data[1][:, :3], data[1][:, 3:] pts -= np.array(self.mesh_pos) assert pts.shape[0] == normals.shape[0] length = pts.shape[0] img = torch.from_numpy(np.transpose(img, (2, 0, 1))) img_normalized = self.normalize_img(img) if self.normalization else img return { "images": img_normalized, "images_orig": img, "points": pts, "normals": normals, "labels": self.labels_map[label], "filename": filename, "length": length } def __len__(self): return len(self.file_names) class ShapeNetImageFolder(BaseDataset): def __init__(self, folder, normalization, shapenet_options): super().__init__() self.normalization = normalization self.resize_with_constant_border = shapenet_options.resize_with_constant_border self.file_list = [] for fl in os.listdir(folder): file_path = os.path.join(folder, fl) # check image before hand try: if file_path.endswith(".gif"): raise ValueError("gif's are results. Not acceptable") Image.open(file_path) self.file_list.append(file_path) except (IOError, ValueError): print("=> Ignoring %s because it's not a valid image" % file_path) def __getitem__(self, item): img_path = self.file_list[item] img = io.imread(img_path) if img.shape[2] > 3: # has alpha channel img[np.where(img[:, :, 3] == 0)] = 255 if self.resize_with_constant_border: img = transform.resize(img, (config.IMG_SIZE, config.IMG_SIZE), mode='constant', anti_aliasing=False) else: img = transform.resize(img, (config.IMG_SIZE, config.IMG_SIZE)) img = img[:, :, :3].astype(np.float32) img = torch.from_numpy(np.transpose(img, (2, 0, 1))) img_normalized = self.normalize_img(img) if self.normalization else img return { "images": img_normalized, "images_orig": img, "filepath": self.file_list[item] } def __len__(self): return len(self.file_list) def get_shapenet_collate(num_points): """ :param num_points: This option will not be activated when batch size = 1 :return: shapenet_collate function """ def shapenet_collate(batch): if len(batch) > 1: all_equal = True for t in batch: if t["length"] != batch[0]["length"]: all_equal = False break points_orig, normals_orig = [], [] if not all_equal: for t in batch: pts, normal = t["points"], t["normals"] length = pts.shape[0] choices = np.resize(np.random.permutation(length), num_points) t["points"], t["normals"] = pts[choices], normal[choices] points_orig.append(torch.from_numpy(pts)) normals_orig.append(torch.from_numpy(normal)) ret = default_collate(batch) ret["points_orig"] = points_orig ret["normals_orig"] = normals_orig return ret ret = default_collate(batch) ret["points_orig"] = ret["points"] ret["normals_orig"] = ret["normals"] return ret return shapenet_collate ================================================ FILE: entrypoint_eval.py ================================================ import argparse import sys from functions.evaluator import Evaluator from options import update_options, options, reset_options def parse_args(): parser = argparse.ArgumentParser(description='Pixel2Mesh Evaluation Entrypoint') parser.add_argument('--options', help='experiment options file name', required=False, type=str) args, rest = parser.parse_known_args() if args.options is None: print("Running without options file...", file=sys.stderr) else: update_options(args.options) parser.add_argument('--batch-size', help='batch size', type=int) parser.add_argument('--shuffle', help='shuffle samples', default=False, action='store_true') parser.add_argument('--checkpoint', help='trained checkpoint file', type=str, required=True) parser.add_argument('--version', help='version of task (timestamp by default)', type=str) parser.add_argument('--name', help='subfolder name of this experiment', required=True, type=str) parser.add_argument('--gpus', help='number of GPUs to use', type=int) args = parser.parse_args() return args def main(): args = parse_args() logger, writer = reset_options(options, args, phase='eval') evaluator = Evaluator(options, logger, writer) evaluator.evaluate() if __name__ == "__main__": main() ================================================ FILE: entrypoint_predict.py ================================================ import argparse import sys from functions.predictor import Predictor from options import update_options, options, reset_options def parse_args(): parser = argparse.ArgumentParser(description='Pixel2Mesh Prediction Entrypoint') parser.add_argument('--options', help='experiment options file name', required=False, type=str) args, rest = parser.parse_known_args() if args.options is None: print("Running without options file...", file=sys.stderr) else: update_options(args.options) parser.add_argument('--batch-size', help='batch size', type=int) parser.add_argument('--checkpoint', help='trained model file', type=str, required=True) parser.add_argument('--name', required=True, type=str) parser.add_argument('--folder', required=True, type=str) options.dataset.name += '_demo' args = parser.parse_args() return args def main(): args = parse_args() logger, writer = reset_options(options, args, phase='predict') predictor = Predictor(options, logger, writer) predictor.predict() if __name__ == "__main__": main() ================================================ FILE: entrypoint_train.py ================================================ import argparse import sys from functions.trainer import Trainer from options import update_options, options, reset_options def parse_args(): parser = argparse.ArgumentParser(description='Pixel2Mesh Training Entrypoint') parser.add_argument('--options', help='experiment options file name', required=False, type=str) args, rest = parser.parse_known_args() if args.options is None: print("Running without options file...", file=sys.stderr) else: update_options(args.options) # training parser.add_argument('--batch-size', help='batch size', type=int) parser.add_argument('--checkpoint', help='checkpoint file', type=str) parser.add_argument('--num-epochs', help='number of epochs', type=int) parser.add_argument('--version', help='version of task (timestamp by default)', type=str) parser.add_argument('--name', required=True, type=str) args = parser.parse_args() return args def main(): args = parse_args() logger, writer = reset_options(options, args) trainer = Trainer(options, logger, writer) trainer.train() if __name__ == "__main__": main() ================================================ FILE: experiments/backbone/vgg16.yml ================================================ dataset: name: imagenet num_classes: 1000 train: num_epochs: 80 batch_size: 32 model: name: classifier backbone: vgg16 optim: name: sgd lr: 1.0e-2 wd: 5.0e-4 lr_step: - 20 - 40 - 60 test: batch_size: 32 num_workers: 16 num_gpus: 8 ================================================ FILE: experiments/backbone/vgg16_1e-3.yml ================================================ based_on: - vgg16.yml optim: lr: 1.0e-4 ================================================ FILE: experiments/backbone/vgg16_1e-4.yml ================================================ based_on: - vgg16.yml optim: lr: 1.0e-3 ================================================ FILE: experiments/baseline/chamfer_only.yml ================================================ based_on: - default.yml loss: weights: normal: 0. laplace: 0. edge: 0. ================================================ FILE: experiments/baseline/default.yml ================================================ num_gpus: 8 num_workers: 16 train: batch_size: 24 test: batch_size: 24 ================================================ FILE: experiments/baseline/default_zthresh.yml ================================================ based_on: - default.yml model: z_threshold: -0.05 ================================================ FILE: experiments/baseline/large_laplace.yml ================================================ based_on: - default.yml loss: weights: laplace: 45.0 move: 3.0 ================================================ FILE: experiments/baseline/lr_1e-3_weighted_chamfer.yml ================================================ based_on: - lr_1e-4_weighted_chamfer.yml optim: lr: 1.0E-3 ================================================ FILE: experiments/baseline/lr_1e-3_weighted_chamfer_oppo.yml ================================================ based_on: - lr_1e-4_weighted_chamfer_oppo.yml optim: lr: 1.0E-3 ================================================ FILE: experiments/baseline/lr_1e-3_zthresh.yml ================================================ based_on: - default.yml optim: lr: 1.0E-3 model: z_threshold: -0.05 ================================================ FILE: experiments/baseline/lr_1e-3_zthresh_resnet.yml ================================================ based_on: - lr_1e-3_zthresh.yml model: backbone: resnet50 train: batch_size: 8 test: batch_size: 8 ================================================ FILE: experiments/baseline/lr_1e-4.yml ================================================ based_on: - default.yml optim: lr: 1.0E-4 ================================================ FILE: experiments/baseline/lr_1e-4_dataset_all.yml ================================================ based_on: - lr_1e-4.yml dataset: subset_train: train_all subset_eval: test_all optim: lr_factor: 0.2 lr_step: - 25 - 45 train: num_epochs: 60 ================================================ FILE: experiments/baseline/lr_1e-4_dataset_tf_same_weights_step_adjusted.yml ================================================ based_on: - lr_1e-4_resnet_dataset_tf_sample_9k.yml model: backbone: vgg16 train: batch_size: 24 test: batch_size: 24 loss: weights: chamfer_opposite: 0.55 laplace: 0.5 edge: 0.1 move: 0.033 ================================================ FILE: experiments/baseline/lr_1e-4_dataset_tf_same_weights_step_adjusted_more_epochs.yml ================================================ based_on: - lr_1e-4_dataset_tf_same_weights_step_adjusted.yml train: num_epochs: 110 optim: lr_step: - 40 - 80 - 100 ================================================ FILE: experiments/baseline/lr_1e-4_k250_d256.yml ================================================ based_on: - lr_1e-4_dataset_all.yml model: hidden_dim: 256 last_hidden_dim: 128 dataset: camera_f: [250., 250.] ================================================ FILE: experiments/baseline/lr_1e-4_plane_only.yml ================================================ based_on: - lr_1e-4.yml train: num_epochs: 100 optim: lr_step: - 60 - 90 dataset: subset_train: train_plane subset_eval: test_plane ================================================ FILE: experiments/baseline/lr_1e-4_resnet_dataset_all.yml ================================================ based_on: - lr_1e-4.yml model: backbone: resnet50 train: batch_size: 8 test: batch_size: 8 dataset: subset_train: train_all subset_eval: test_all ================================================ FILE: experiments/baseline/lr_1e-4_resnet_dataset_all_larger_sample.yml ================================================ based_on: - lr_1e-4.yml model: backbone: resnet50 train: batch_size: 8 num_epochs: 70 test: batch_size: 8 dataset: subset_train: train_all subset_eval: test_all shapenet: num_points: 5000 optim: lr_factor: 0.3 lr_step: - 25 - 45 - 60 ================================================ FILE: experiments/baseline/lr_1e-4_resnet_dataset_all_sample_9k.yml ================================================ based_on: - lr_1e-4_resnet_dataset_all_larger_sample.yml dataset: shapenet: num_points: 9000 ================================================ FILE: experiments/baseline/lr_1e-4_resnet_dataset_tf_larger_sample.yml ================================================ based_on: - lr_1e-4_resnet_dataset_all_larger_sample.yml dataset: subset_train: train_tf subset_eval: test_tf ================================================ FILE: experiments/baseline/lr_1e-4_resnet_dataset_tf_same_weights_step_adjusted.yml ================================================ based_on: - lr_1e-4_resnet_dataset_tf_sample_9k.yml loss: weights: chamfer_opposite: 0.55 laplace: 0.5 edge: 0.1 move: 0.033 optim: lr_step: - 30 - 70 - 90 train: num_epochs: 110 ================================================ FILE: experiments/baseline/lr_1e-4_resnet_dataset_tf_sample_9k.yml ================================================ based_on: - lr_1e-4_resnet_dataset_all_sample_9k.yml dataset: subset_train: train_tf subset_eval: test_tf ================================================ FILE: experiments/baseline/lr_1e-4_resnet_dataset_tf_sample_9k_more_epochs.yml ================================================ based_on: - lr_1e-4_resnet_dataset_tf_sample_9k.yml train: num_epochs: 110 optim: lr_step: - 40 - 70 - 90 ================================================ FILE: experiments/baseline/lr_1e-4_resnet_dataset_tf_sample_9k_more_epochs_same_weights.yml ================================================ based_on: - lr_1e-4_resnet_dataset_tf_sample_9k_more_epochs.yml loss: weights: chamfer_opposite: 0.55 laplace: 0.5 edge: 0.1 move: 0.033 ================================================ FILE: experiments/baseline/lr_1e-4_resnet_k250_d256.yml ================================================ based_on: - lr_1e-4_k250_d256.yml model: backbone: resnet50 train: batch_size: 8 test: batch_size: 8 ================================================ FILE: experiments/baseline/lr_1e-4_wd_1e-8.yml ================================================ based_on: - lr_1e-4.yml optim: wd: 1.0e-8 ================================================ FILE: experiments/baseline/lr_1e-4_weighted_chamfer.yml ================================================ based_on: - lr_1e-4.yml loss: weights: chamfer: [0.05, 0.4, 2.] chamfer_opposite: 0.55 ================================================ FILE: experiments/baseline/lr_1e-4_weighted_chamfer_oppo.yml ================================================ based_on: - lr_1e-4.yml loss: weights: chamfer_opposite: 0.55 ================================================ FILE: experiments/baseline/lr_1e-4_zthresh.yml ================================================ based_on: - lr_1e-4.yml model: z_threshold: -0.05 ================================================ FILE: experiments/baseline/lr_1e-4_zthresh_resnet.yml ================================================ based_on: - lr_1e-4_zthresh.yml model: backbone: resnet50 train: batch_size: 8 test: batch_size: 8 ================================================ FILE: experiments/baseline/lr_1e-5.yml ================================================ based_on: - default.yml optim: lr: 1.0E-5 ================================================ FILE: experiments/baseline/lr_1e-5_dataset_tf_same_weights_step_adjusted.yml ================================================ based_on: - lr_1e-4_dataset_tf_same_weights_step_adjusted.yml optim: lr: 1.0e-5 ================================================ FILE: experiments/baseline/lr_2.5e-5.yml ================================================ based_on: - default.yml optim: lr: 2.5E-5 ================================================ FILE: experiments/baseline/lr_3e-5_dataset_tf_same_weights_step_adjusted.yml ================================================ based_on: - lr_1e-4_dataset_tf_same_weights_step_adjusted.yml optim: lr: 3.0e-5 ================================================ FILE: experiments/baseline/lr_5e-4_zthresh_resnet.yml ================================================ based_on: - lr_1e-4_zthresh_resnet.yml optim: lr: 5.0e-4 ================================================ FILE: experiments/baseline/lr_5e-5_dataset_all_more_epochs.yml ================================================ based_on: - lr_1e-4_dataset_all.yml optim: lr: 5.0e-5 lr_factor: 0.2 lr_step: - 40 - 70 - 90 train: num_epochs: 100 ================================================ FILE: experiments/baseline/normal_free.yml ================================================ based_on: - default.yml loss: weights: normal: 0. ================================================ FILE: experiments/baseline/relu_free.yml ================================================ based_on: - default.yml model: gconv_activation: false ================================================ FILE: experiments/baseline/resnet.yml ================================================ based_on: - default.yml model: backbone: resnet50 train: batch_size: 8 test: batch_size: 8 ================================================ FILE: experiments/default/resnet.yml ================================================ checkpoint: null checkpoint_dir: checkpoints dataset: camera_c: - 111.5 - 111.5 camera_f: - 248.0 - 248.0 mesh_pos: - 0.0 - 0.0 - -0.8 name: shapenet normalization: true num_classes: 13 predict: folder: /tmp shapenet: num_points: 9000 resize_with_constant_border: false subset_eval: test_tf subset_train: train_tf log_dir: logs log_level: info loss: weights: chamfer: - 1.0 - 1.0 - 1.0 chamfer_opposite: 0.55 constant: 1.0 edge: 0.1 laplace: 0.5 move: 0.033 normal: 0.00016 reconst: 0.0 model: align_with_tensorflow: false backbone: resnet50 coord_dim: 3 gconv_activation: true hidden_dim: 192 last_hidden_dim: 192 name: pixel2mesh z_threshold: 0 name: p2m num_gpus: 8 num_workers: 16 optim: adam_beta1: 0.9 lr: 0.0001 lr_factor: 0.3 lr_step: - 30 - 70 - 90 name: adam sgd_momentum: 0.9 wd: 1.0e-06 pin_memory: true summary_dir: summary test: batch_size: 8 dataset: [] shuffle: false summary_steps: 50 weighted_mean: false train: batch_size: 8 checkpoint_steps: 10000 num_epochs: 110 shuffle: true summary_steps: 50 test_epochs: 1 use_augmentation: true version: null ================================================ FILE: experiments/default/tensorflow.yml ================================================ checkpoint: null checkpoint_dir: checkpoints dataset: camera_c: - 112.0 - 112.0 camera_f: - 250.0 - 250.0 mesh_pos: - 0.0 - 0.0 - 0.0 name: shapenet normalization: false num_classes: 13 predict: folder: /tmp shapenet: num_points: 9000 resize_with_constant_border: true subset_eval: test_tf subset_train: train_tf log_dir: logs log_level: info loss: weights: chamfer: - 1.0 - 1.0 - 1.0 chamfer_opposite: 0.55 constant: 1.0 edge: 0.1 laplace: 0.5 move: 0.033 normal: 0.00016 reconst: 0.0 model: align_with_tensorflow: true backbone: vgg16 coord_dim: 3 gconv_activation: true hidden_dim: 256 last_hidden_dim: 128 name: pixel2mesh z_threshold: 0 name: p2m num_gpus: 1 num_workers: 16 optim: adam_beta1: 0.9 lr: 1.0e-06 lr_factor: 0.1 lr_step: - 30 - 45 name: adam sgd_momentum: 0.9 wd: 1.0e-06 pin_memory: true summary_dir: summary test: batch_size: 24 dataset: [] shuffle: true summary_steps: 5 weighted_mean: false train: batch_size: 1 checkpoint_steps: 10000 num_epochs: 2 shuffle: true summary_steps: 1 test_epochs: 1 use_augmentation: true version: null ================================================ FILE: external/chamfer/chamfer.cu ================================================ #include #include #include #include #include __global__ void NmDistanceKernel(int b, int n, const float *xyz, int m, const float *xyz2, float *result, int *result_i) { const int batch = 512; __shared__ float buf[batch * 3]; for (int i = blockIdx.x; i < b; i += gridDim.x) { for (int k2 = 0; k2 < m; k2 += batch) { int end_k = min(m, k2 + batch) - k2; for (int j = threadIdx.x; j < end_k * 3; j += blockDim.x) { buf[j] = xyz2[(i * m + k2) * 3 + j]; } __syncthreads(); for (int j = threadIdx.x + blockIdx.y * blockDim.x; j < n; j += blockDim.x * gridDim.y) { float x1 = xyz[(i * n + j) * 3 + 0]; float y1 = xyz[(i * n + j) * 3 + 1]; float z1 = xyz[(i * n + j) * 3 + 2]; int best_i = 0; float best = 0; int end_ka = end_k - (end_k & 3); if (end_ka == batch) { for (int k = 0; k < batch; k += 4) { { float x2 = buf[k * 3 + 0] - x1; float y2 = buf[k * 3 + 1] - y1; float z2 = buf[k * 3 + 2] - z1; float d = x2 * x2 + y2 * y2 + z2 * z2; if (k == 0 || d < best) { best = d; best_i = k + k2; } } { float x2 = buf[k * 3 + 3] - x1; float y2 = buf[k * 3 + 4] - y1; float z2 = buf[k * 3 + 5] - z1; float d = x2 * x2 + y2 * y2 + z2 * z2; if (d < best) { best = d; best_i = k + k2 + 1; } } { float x2 = buf[k * 3 + 6] - x1; float y2 = buf[k * 3 + 7] - y1; float z2 = buf[k * 3 + 8] - z1; float d = x2 * x2 + y2 * y2 + z2 * z2; if (d < best) { best = d; best_i = k + k2 + 2; } } { float x2 = buf[k * 3 + 9] - x1; float y2 = buf[k * 3 + 10] - y1; float z2 = buf[k * 3 + 11] - z1; float d = x2 * x2 + y2 * y2 + z2 * z2; if (d < best) { best = d; best_i = k + k2 + 3; } } } } else { for (int k = 0; k < end_ka; k += 4) { { float x2 = buf[k * 3 + 0] - x1; float y2 = buf[k * 3 + 1] - y1; float z2 = buf[k * 3 + 2] - z1; float d = x2 * x2 + y2 * y2 + z2 * z2; if (k == 0 || d < best) { best = d; best_i = k + k2; } } { float x2 = buf[k * 3 + 3] - x1; float y2 = buf[k * 3 + 4] - y1; float z2 = buf[k * 3 + 5] - z1; float d = x2 * x2 + y2 * y2 + z2 * z2; if (d < best) { best = d; best_i = k + k2 + 1; } } { float x2 = buf[k * 3 + 6] - x1; float y2 = buf[k * 3 + 7] - y1; float z2 = buf[k * 3 + 8] - z1; float d = x2 * x2 + y2 * y2 + z2 * z2; if (d < best) { best = d; best_i = k + k2 + 2; } } { float x2 = buf[k * 3 + 9] - x1; float y2 = buf[k * 3 + 10] - y1; float z2 = buf[k * 3 + 11] - z1; float d = x2 * x2 + y2 * y2 + z2 * z2; if (d < best) { best = d; best_i = k + k2 + 3; } } } } for (int k = end_ka; k < end_k; k++) { float x2 = buf[k * 3 + 0] - x1; float y2 = buf[k * 3 + 1] - y1; float z2 = buf[k * 3 + 2] - z1; float d = x2 * x2 + y2 * y2 + z2 * z2; if (k == 0 || d < best) { best = d; best_i = k + k2; } } if (k2 == 0 || result[(i * n + j)] > best) { result[(i * n + j)] = best; result_i[(i * n + j)] = best_i; } } __syncthreads(); } } } int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2) { const auto batch_size = xyz1.size(0); const auto n = xyz1.size(1); //num_points point cloud A const auto m = xyz2.size(1); //num_points point cloud B NmDistanceKernel <<< dim3(32, 16, 1), 512 >>> (batch_size, n, xyz1.data(), m, xyz2.data(), dist1.data(), idx1.data()); NmDistanceKernel <<< dim3(32, 16, 1), 512 >>> (batch_size, m, xyz2.data(), n, xyz1.data(), dist2.data(), idx2.data()); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("error in nnd updateOutput: %s\n", cudaGetErrorString(err)); return 0; } return 1; } __global__ void NmDistanceGradKernel(int b, int n, const float *xyz1, int m, const float *xyz2, const float *grad_dist1, const int *idx1, float *grad_xyz1, float *grad_xyz2) { for (int i = blockIdx.x; i < b; i += gridDim.x) { for (int j = threadIdx.x + blockIdx.y * blockDim.x; j < n; j += blockDim.x * gridDim.y) { float x1 = xyz1[(i * n + j) * 3 + 0]; float y1 = xyz1[(i * n + j) * 3 + 1]; float z1 = xyz1[(i * n + j) * 3 + 2]; int j2 = idx1[i * n + j]; float x2 = xyz2[(i * m + j2) * 3 + 0]; float y2 = xyz2[(i * m + j2) * 3 + 1]; float z2 = xyz2[(i * m + j2) * 3 + 2]; float g = grad_dist1[i * n + j] * 2; atomicAdd(&(grad_xyz1[(i * n + j) * 3 + 0]), g * (x1 - x2)); atomicAdd(&(grad_xyz1[(i * n + j) * 3 + 1]), g * (y1 - y2)); atomicAdd(&(grad_xyz1[(i * n + j) * 3 + 2]), g * (z1 - z2)); atomicAdd(&(grad_xyz2[(i * m + j2) * 3 + 0]), -(g * (x1 - x2))); atomicAdd(&(grad_xyz2[(i * m + j2) * 3 + 1]), -(g * (y1 - y2))); atomicAdd(&(grad_xyz2[(i * m + j2) * 3 + 2]), -(g * (z1 - z2))); } } } int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2) { const auto batch_size = xyz1.size(0); const auto n = xyz1.size(1); // num_points point cloud A const auto m = xyz2.size(1); // num_points point cloud B NmDistanceGradKernel <<< dim3(1, 16, 1), 256 >>> (batch_size, n, xyz1.data(), m, xyz2.data(), graddist1.data(), idx1.data(), gradxyz1.data(), gradxyz2.data()); NmDistanceGradKernel <<< dim3(1, 16, 1), 256 >>> (batch_size, m, xyz2.data(), n, xyz1.data(), graddist2.data(), idx2.data(), gradxyz2.data(), gradxyz1.data()); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("error in nnd get grad: %s\n", cudaGetErrorString(err)); return 0; } return 1; } ================================================ FILE: external/chamfer/chamfer_cuda.cpp ================================================ #include #include int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2); int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2); int chamfer_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2) { return chamfer_cuda_forward(xyz1, xyz2, dist1, dist2, idx1, idx2); } int chamfer_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2) { return chamfer_cuda_backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2); } PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("forward", &chamfer_forward, "chamfer forward (CUDA)"); m.def("backward", &chamfer_backward, "chamfer backward (CUDA)"); } ================================================ FILE: external/chamfer/setup.py ================================================ from setuptools import setup from torch.utils.cpp_extension import BuildExtension, CUDAExtension setup( name='chamfer', ext_modules=[ CUDAExtension('chamfer', [ 'chamfer_cuda.cpp', 'chamfer.cu', ]), ], cmdclass={ 'build_ext': BuildExtension }) ================================================ FILE: external/chamfer/test.py ================================================ import sys import os for file in os.listdir("build"): if file.startswith("lib"): sys.path.insert(0, os.path.join("build", file)) # torch must be imported before we import chamfer import torch import chamfer batch_size = 8 n, m = 30, 20 xyz1 = torch.rand((batch_size, n, 3)).cuda() xyz2 = torch.rand((batch_size, m, 3)).cuda() dist1 = torch.zeros(batch_size, n).cuda() dist2 = torch.zeros(batch_size, m).cuda() idx1 = torch.zeros((batch_size, n), dtype=torch.int).cuda() idx2 = torch.zeros((batch_size, m), dtype=torch.int).cuda() chamfer.forward(xyz1, xyz2, dist1, dist2, idx1, idx2) print(dist1) print(dist2) print(idx1) print(idx2) ================================================ FILE: functions/base.py ================================================ import os import time from datetime import timedelta from logging import Logger import torch import torch.nn from tensorboardX import SummaryWriter from torch.utils.data.dataloader import default_collate import config from datasets.imagenet import ImageNet from datasets.shapenet import ShapeNet, get_shapenet_collate, ShapeNetImageFolder from functions.saver import CheckpointSaver class CheckpointRunner(object): def __init__(self, options, logger: Logger, summary_writer: SummaryWriter, dataset=None, training=True, shared_model=None): self.options = options self.logger = logger # GPUs if not torch.cuda.is_available() and self.options.num_gpus > 0: raise ValueError("CUDA not found yet number of GPUs is set to be greater than 0") if os.environ.get("CUDA_VISIBLE_DEVICES"): logger.info("CUDA visible devices is activated here, number of GPU setting is not working") self.gpus = list(map(int, os.environ["CUDA_VISIBLE_DEVICES"].split(","))) self.options.num_gpus = len(self.gpus) enumerate_gpus = list(range(self.options.num_gpus)) logger.info("CUDA is asking for " + str(self.gpus) + ", PyTorch to doing a mapping, changing it to " + str(enumerate_gpus)) self.gpus = enumerate_gpus else: self.gpus = list(range(self.options.num_gpus)) logger.info("Using GPUs: " + str(self.gpus)) # initialize summary writer self.summary_writer = summary_writer # initialize dataset if dataset is None: dataset = options.dataset # useful during training self.dataset = self.load_dataset(dataset, training) self.dataset_collate_fn = self.load_collate_fn(dataset, training) # by default, epoch_count = step_count = 0 self.epoch_count = self.step_count = 0 self.time_start = time.time() # override this function to define your model, optimizers etc. # in case you want to use a model that is defined in a trainer or other place in the code, # shared_model should help. in this case, checkpoint is not used self.logger.info("Running model initialization...") self.init_fn(shared_model=shared_model) if shared_model is None: # checkpoint is loaded if any self.saver = CheckpointSaver(self.logger, checkpoint_dir=str(self.options.checkpoint_dir), checkpoint_file=self.options.checkpoint) self.init_with_checkpoint() def load_dataset(self, dataset, training): self.logger.info("Loading datasets: %s" % dataset.name) if dataset.name == "shapenet": return ShapeNet(config.SHAPENET_ROOT, dataset.subset_train if training else dataset.subset_eval, dataset.mesh_pos, dataset.normalization, dataset.shapenet) elif dataset.name == "shapenet_demo": return ShapeNetImageFolder(dataset.predict.folder, dataset.normalization, dataset.shapenet) elif dataset.name == "imagenet": return ImageNet(config.IMAGENET_ROOT, "train" if training else "val") raise NotImplementedError("Unsupported dataset") def load_collate_fn(self, dataset, training): if dataset.name == "shapenet": return get_shapenet_collate(dataset.shapenet.num_points) else: return default_collate def init_fn(self, shared_model=None, **kwargs): raise NotImplementedError('You need to provide an _init_fn method') # Pack models and optimizers in a dict - necessary for checkpointing def models_dict(self): return None def optimizers_dict(self): # NOTE: optimizers and models cannot have conflicting names return None def init_with_checkpoint(self): checkpoint = self.saver.load_checkpoint() if checkpoint is None: self.logger.info("Checkpoint not loaded") return for model_name, model in self.models_dict().items(): if model_name in checkpoint: if isinstance(model, torch.nn.DataParallel): model.module.load_state_dict(checkpoint[model_name], strict=False) else: model.load_state_dict(checkpoint[model_name], strict=False) if self.optimizers_dict() is not None: for optimizer_name, optimizer in self.optimizers_dict().items(): if optimizer_name in checkpoint: optimizer.load_state_dict(checkpoint[optimizer_name]) else: self.logger.warning("Optimizers not found in the runner, skipping...") if "epoch" in checkpoint: self.epoch_count = checkpoint["epoch"] if "total_step_count" in checkpoint: self.step_count = checkpoint["total_step_count"] def dump_checkpoint(self): checkpoint = { "epoch": self.epoch_count, "total_step_count": self.step_count } for model_name, model in self.models_dict().items(): if isinstance(model, torch.nn.DataParallel): checkpoint[model_name] = model.module.state_dict() else: checkpoint[model_name] = model.state_dict() for k, v in list(checkpoint[model_name].items()): if isinstance(v, torch.Tensor) and v.is_sparse: checkpoint[model_name].pop(k) if self.optimizers_dict() is not None: for optimizer_name, optimizer in self.optimizers_dict().items(): checkpoint[optimizer_name] = optimizer.state_dict() self.saver.save_checkpoint(checkpoint, "%06d_%06d" % (self.step_count, self.epoch_count)) @property def time_elapsed(self): return timedelta(seconds=time.time() - self.time_start) ================================================ FILE: functions/evaluator.py ================================================ from logging import Logger import numpy as np import torch import torch.nn as nn from torch.utils.data import DataLoader from functions.base import CheckpointRunner from models.classifier import Classifier from models.layers.chamfer_wrapper import ChamferDist from models.p2m import P2MModel from utils.average_meter import AverageMeter from utils.mesh import Ellipsoid from utils.vis.renderer import MeshRenderer class Evaluator(CheckpointRunner): def __init__(self, options, logger: Logger, writer, shared_model=None): super().__init__(options, logger, writer, training=False, shared_model=shared_model) # noinspection PyAttributeOutsideInit def init_fn(self, shared_model=None, **kwargs): if self.options.model.name == "pixel2mesh": # Renderer for visualization self.renderer = MeshRenderer(self.options.dataset.camera_f, self.options.dataset.camera_c, self.options.dataset.mesh_pos) # Initialize distance module self.chamfer = ChamferDist() # create ellipsoid self.ellipsoid = Ellipsoid(self.options.dataset.mesh_pos) # use weighted mean evaluation metrics or not self.weighted_mean = self.options.test.weighted_mean else: self.renderer = None self.num_classes = self.options.dataset.num_classes if shared_model is not None: self.model = shared_model else: if self.options.model.name == "pixel2mesh": # create model self.model = P2MModel(self.options.model, self.ellipsoid, self.options.dataset.camera_f, self.options.dataset.camera_c, self.options.dataset.mesh_pos) elif self.options.model.name == "classifier": self.model = Classifier(self.options.model, self.options.dataset.num_classes) else: raise NotImplementedError("Your model is not found") self.model = torch.nn.DataParallel(self.model, device_ids=self.gpus).cuda() # Evaluate step count, useful in summary self.evaluate_step_count = 0 self.total_step_count = 0 def models_dict(self): return {'model': self.model} def evaluate_f1(self, dis_to_pred, dis_to_gt, pred_length, gt_length, thresh): recall = np.sum(dis_to_gt < thresh) / gt_length prec = np.sum(dis_to_pred < thresh) / pred_length return 2 * prec * recall / (prec + recall + 1e-8) def evaluate_chamfer_and_f1(self, pred_vertices, gt_points, labels): # calculate accurate chamfer distance; ground truth points with different lengths; # therefore cannot be batched batch_size = pred_vertices.size(0) pred_length = pred_vertices.size(1) for i in range(batch_size): gt_length = gt_points[i].size(0) label = labels[i].cpu().item() d1, d2, i1, i2 = self.chamfer(pred_vertices[i].unsqueeze(0), gt_points[i].unsqueeze(0)) d1, d2 = d1.cpu().numpy(), d2.cpu().numpy() # convert to millimeter self.chamfer_distance[label].update(np.mean(d1) + np.mean(d2)) self.f1_tau[label].update(self.evaluate_f1(d1, d2, pred_length, gt_length, 1E-4)) self.f1_2tau[label].update(self.evaluate_f1(d1, d2, pred_length, gt_length, 2E-4)) def evaluate_accuracy(self, output, target): """Computes the accuracy over the k top predictions for the specified values of k""" top_k = [1, 5] maxk = max(top_k) batch_size = target.size(0) _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) for k in top_k: correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) acc = correct_k.mul_(1.0 / batch_size) if k == 1: self.acc_1.update(acc) elif k == 5: self.acc_5.update(acc) def evaluate_step(self, input_batch): self.model.eval() # Run inference with torch.no_grad(): # Get ground truth images = input_batch['images'] out = self.model(images) if self.options.model.name == "pixel2mesh": pred_vertices = out["pred_coord"][-1] gt_points = input_batch["points_orig"] if isinstance(gt_points, list): gt_points = [pts.cuda() for pts in gt_points] self.evaluate_chamfer_and_f1(pred_vertices, gt_points, input_batch["labels"]) elif self.options.model.name == "classifier": self.evaluate_accuracy(out, input_batch["labels"]) return out # noinspection PyAttributeOutsideInit def evaluate(self): self.logger.info("Running evaluations...") # clear evaluate_step_count, but keep total count uncleared self.evaluate_step_count = 0 test_data_loader = DataLoader(self.dataset, batch_size=self.options.test.batch_size * self.options.num_gpus, num_workers=self.options.num_workers, pin_memory=self.options.pin_memory, shuffle=self.options.test.shuffle, collate_fn=self.dataset_collate_fn) if self.options.model.name == "pixel2mesh": self.chamfer_distance = [AverageMeter() for _ in range(self.num_classes)] self.f1_tau = [AverageMeter() for _ in range(self.num_classes)] self.f1_2tau = [AverageMeter() for _ in range(self.num_classes)] elif self.options.model.name == "classifier": self.acc_1 = AverageMeter() self.acc_5 = AverageMeter() # Iterate over all batches in an epoch for step, batch in enumerate(test_data_loader): # Send input to GPU batch = {k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in batch.items()} # Run evaluation step out = self.evaluate_step(batch) # Tensorboard logging every summary_steps steps if self.evaluate_step_count % self.options.test.summary_steps == 0: self.evaluate_summaries(batch, out) # add later to log at step 0 self.evaluate_step_count += 1 self.total_step_count += 1 for key, val in self.get_result_summary().items(): scalar = val if isinstance(val, AverageMeter): scalar = val.avg self.logger.info("Test [%06d] %s: %.6f" % (self.total_step_count, key, scalar)) self.summary_writer.add_scalar("eval_" + key, scalar, self.total_step_count + 1) def average_of_average_meters(self, average_meters): s = sum([meter.sum for meter in average_meters]) c = sum([meter.count for meter in average_meters]) weighted_avg = s / c if c > 0 else 0. avg = sum([meter.avg for meter in average_meters]) / len(average_meters) ret = AverageMeter() if self.weighted_mean: ret.val, ret.avg = avg, weighted_avg else: ret.val, ret.avg = weighted_avg, avg return ret def get_result_summary(self): if self.options.model.name == "pixel2mesh": return { "cd": self.average_of_average_meters(self.chamfer_distance), "f1_tau": self.average_of_average_meters(self.f1_tau), "f1_2tau": self.average_of_average_meters(self.f1_2tau), } elif self.options.model.name == "classifier": return { "acc_1": self.acc_1, "acc_5": self.acc_5, } def evaluate_summaries(self, input_batch, out_summary): self.logger.info("Test Step %06d/%06d (%06d) " % (self.evaluate_step_count, len(self.dataset) // ( self.options.num_gpus * self.options.test.batch_size), self.total_step_count,) \ + ", ".join([key + " " + (str(val) if isinstance(val, AverageMeter) else "%.6f" % val) for key, val in self.get_result_summary().items()])) self.summary_writer.add_histogram("eval_labels", input_batch["labels"].cpu().numpy(), self.total_step_count) if self.renderer is not None: # Do visualization for the first 2 images of the batch render_mesh = self.renderer.p2m_batch_visualize(input_batch, out_summary, self.ellipsoid.faces) self.summary_writer.add_image("eval_render_mesh", render_mesh, self.total_step_count) ================================================ FILE: functions/predictor.py ================================================ import os import random from logging import Logger import imageio import numpy as np import torch from torch.utils.data import DataLoader from tqdm import tqdm from functions.base import CheckpointRunner from models.p2m import P2MModel from utils.mesh import Ellipsoid from utils.vis.renderer import MeshRenderer class Predictor(CheckpointRunner): def __init__(self, options, logger: Logger, writer, shared_model=None): super().__init__(options, logger, writer, training=False, shared_model=shared_model) # noinspection PyAttributeOutsideInit def init_fn(self, shared_model=None, **kwargs): self.gpu_inference = self.options.num_gpus > 0 if self.gpu_inference == 0: raise NotImplementedError("CPU inference is currently buggy. This takes some extra efforts and " "might be fixed in the future.") # self.logger.warning("Render part would be disabled since you are using CPU. " # "Neural renderer requires GPU to run. Please use other softwares " # "or packages to view .obj file generated.") if self.options.model.name == "pixel2mesh": # create ellipsoid self.ellipsoid = Ellipsoid(self.options.dataset.mesh_pos) # create model self.model = P2MModel(self.options.model, self.ellipsoid, self.options.dataset.camera_f, self.options.dataset.camera_c, self.options.dataset.mesh_pos) if self.gpu_inference: self.model.cuda() # create renderer self.renderer = MeshRenderer(self.options.dataset.camera_f, self.options.dataset.camera_c, self.options.dataset.mesh_pos) else: raise NotImplementedError("Currently the predictor only supports pixel2mesh") def models_dict(self): return {'model': self.model} def predict_step(self, input_batch): self.model.eval() # Run inference with torch.no_grad(): images = input_batch['images'] out = self.model(images) self.save_inference_results(input_batch, out) def predict(self): self.logger.info("Running predictions...") predict_data_loader = DataLoader(self.dataset, batch_size=self.options.test.batch_size, pin_memory=self.options.pin_memory, collate_fn=self.dataset_collate_fn) for step, batch in enumerate(predict_data_loader): self.logger.info("Predicting [%05d/%05d]" % (step * self.options.test.batch_size, len(self.dataset))) if self.gpu_inference: # Send input to GPU batch = {k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in batch.items()} self.predict_step(batch) def save_inference_results(self, inputs, outputs): if self.options.model.name == "pixel2mesh": batch_size = inputs["images"].size(0) for i in range(batch_size): basename, ext = os.path.splitext(inputs["filepath"][i]) mesh_center = np.mean(outputs["pred_coord_before_deform"][0][i].cpu().numpy(), 0) verts = [outputs["pred_coord"][k][i].cpu().numpy() for k in range(3)] for k, vert in enumerate(verts): meshname = basename + ".%d.obj" % (k + 1) vert_v = np.hstack((np.full([vert.shape[0], 1], "v"), vert)) mesh = np.vstack((vert_v, self.ellipsoid.obj_fmt_faces[k])) np.savetxt(meshname, mesh, fmt='%s', delimiter=" ") if self.gpu_inference: # generate gif here color_repo = ['light_blue', 'purple', 'orange', 'light_yellow'] rot_degree = 10 rot_radius = rot_degree / 180 * np.pi rot_matrix = np.array([ [np.cos(rot_radius), 0, -np.sin(rot_radius)], [0., 1., 0.], [np.sin(rot_radius), 0, np.cos(rot_radius)] ]) writer = imageio.get_writer(basename + ".gif", mode='I') color = random.choice(color_repo) for _ in tqdm(range(360 // rot_degree), desc="Rendering sample %d" % i): image = inputs["images_orig"][i].cpu().numpy() ret = image for k, vert in enumerate(verts): vert = rot_matrix.dot((vert - mesh_center).T).T + mesh_center rend_result = self.renderer.visualize_reconstruction(None, vert + \ np.array( self.options.dataset.mesh_pos), self.ellipsoid.faces[k], image, mesh_only=True, color=color) ret = np.concatenate((ret, rend_result), axis=2) verts[k] = vert ret = np.transpose(ret, (1, 2, 0)) writer.append_data((255 * ret).astype(np.uint8)) writer.close() ================================================ FILE: functions/saver.py ================================================ import os import torch import torch.nn class CheckpointSaver(object): """Class that handles saving and loading checkpoints during training.""" def __init__(self, logger, checkpoint_dir=None, checkpoint_file=None): self.logger = logger if checkpoint_file is not None: if not os.path.exists(checkpoint_file): raise ValueError("Checkpoint file [%s] does not exist!" % checkpoint_file) self.save_dir = os.path.dirname(os.path.abspath(checkpoint_file)) self.checkpoint_file = os.path.abspath(checkpoint_file) return if checkpoint_dir is None: raise ValueError("Checkpoint directory must be not None in case file is not provided!") self.save_dir = os.path.abspath(checkpoint_dir) self.checkpoint_file = self.get_latest_checkpoint() def load_checkpoint(self): if self.checkpoint_file is None: self.logger.info("Checkpoint file not found, skipping...") return None self.logger.info("Loading checkpoint file: %s" % self.checkpoint_file) try: return torch.load(self.checkpoint_file) except UnicodeDecodeError: # to be compatible with old encoding methods return torch.load(self.checkpoint_file, encoding="bytes") def save_checkpoint(self, obj, name): self.checkpoint_file = os.path.join(self.save_dir, "%s.pt" % name) self.logger.info("Dumping to checkpoint file: %s" % self.checkpoint_file) torch.save(obj, self.checkpoint_file) def get_latest_checkpoint(self): # this will automatically find the checkpoint with latest modified time checkpoint_list = [] for dirpath, dirnames, filenames in os.walk(self.save_dir): for filename in filenames: if filename.endswith('.pt'): file_path = os.path.abspath(os.path.join(dirpath, filename)) modified_time = os.path.getmtime(file_path) checkpoint_list.append((file_path, modified_time)) checkpoint_list = sorted(checkpoint_list, key=lambda x: x[1]) return None if not checkpoint_list else checkpoint_list[-1][0] ================================================ FILE: functions/trainer.py ================================================ import time from datetime import timedelta import torch import torch.nn as nn from torch.utils.data import DataLoader from functions.base import CheckpointRunner from functions.evaluator import Evaluator from models.classifier import Classifier from models.losses.classifier import CrossEntropyLoss from models.losses.p2m import P2MLoss from models.p2m import P2MModel from utils.average_meter import AverageMeter from utils.mesh import Ellipsoid from utils.tensor import recursive_detach from utils.vis.renderer import MeshRenderer class Trainer(CheckpointRunner): # noinspection PyAttributeOutsideInit def init_fn(self, shared_model=None, **kwargs): if self.options.model.name == "pixel2mesh": # Visualization renderer self.renderer = MeshRenderer(self.options.dataset.camera_f, self.options.dataset.camera_c, self.options.dataset.mesh_pos) # create ellipsoid self.ellipsoid = Ellipsoid(self.options.dataset.mesh_pos) else: self.renderer = None if shared_model is not None: self.model = shared_model else: if self.options.model.name == "pixel2mesh": # create model self.model = P2MModel(self.options.model, self.ellipsoid, self.options.dataset.camera_f, self.options.dataset.camera_c, self.options.dataset.mesh_pos) elif self.options.model.name == "classifier": self.model = Classifier(self.options.model, self.options.dataset.num_classes) else: raise NotImplementedError("Your model is not found") self.model = torch.nn.DataParallel(self.model, device_ids=self.gpus).cuda() # Setup a joint optimizer for the 2 models if self.options.optim.name == "adam": self.optimizer = torch.optim.Adam( params=list(self.model.parameters()), lr=self.options.optim.lr, betas=(self.options.optim.adam_beta1, 0.999), weight_decay=self.options.optim.wd ) elif self.options.optim.name == "sgd": self.optimizer = torch.optim.SGD( params=list(self.model.parameters()), lr=self.options.optim.lr, momentum=self.options.optim.sgd_momentum, weight_decay=self.options.optim.wd ) else: raise NotImplementedError("Your optimizer is not found") self.lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( self.optimizer, self.options.optim.lr_step, self.options.optim.lr_factor ) # Create loss functions if self.options.model.name == "pixel2mesh": self.criterion = P2MLoss(self.options.loss, self.ellipsoid).cuda() elif self.options.model.name == "classifier": self.criterion = CrossEntropyLoss() else: raise NotImplementedError("Your loss is not found") # Create AverageMeters for losses self.losses = AverageMeter() # Evaluators self.evaluators = [Evaluator(self.options, self.logger, self.summary_writer, shared_model=self.model)] def models_dict(self): return {'model': self.model} def optimizers_dict(self): return {'optimizer': self.optimizer, 'lr_scheduler': self.lr_scheduler} def train_step(self, input_batch): self.model.train() # Grab data from the batch images = input_batch["images"] # predict with model out = self.model(images) # compute loss loss, loss_summary = self.criterion(out, input_batch) self.losses.update(loss.detach().cpu().item()) # Do backprop self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Pack output arguments to be used for visualization return recursive_detach(out), recursive_detach(loss_summary) def train(self): # Run training for num_epochs epochs for epoch in range(self.epoch_count, self.options.train.num_epochs): self.epoch_count += 1 # Create a new data loader for every epoch train_data_loader = DataLoader(self.dataset, batch_size=self.options.train.batch_size * self.options.num_gpus, num_workers=self.options.num_workers, pin_memory=self.options.pin_memory, shuffle=self.options.train.shuffle, collate_fn=self.dataset_collate_fn) # Reset loss self.losses.reset() # Iterate over all batches in an epoch for step, batch in enumerate(train_data_loader): # Send input to GPU batch = {k: v.cuda() if isinstance(v, torch.Tensor) else v for k, v in batch.items()} # Run training step out = self.train_step(batch) self.step_count += 1 # Tensorboard logging every summary_steps steps if self.step_count % self.options.train.summary_steps == 0: self.train_summaries(batch, *out) # Save checkpoint every checkpoint_steps steps if self.step_count % self.options.train.checkpoint_steps == 0: self.dump_checkpoint() # save checkpoint after each epoch self.dump_checkpoint() # Run validation every test_epochs if self.epoch_count % self.options.train.test_epochs == 0: self.test() # lr scheduler step self.lr_scheduler.step() def train_summaries(self, input_batch, out_summary, loss_summary): if self.renderer is not None: # Do visualization for the first 2 images of the batch render_mesh = self.renderer.p2m_batch_visualize(input_batch, out_summary, self.ellipsoid.faces) self.summary_writer.add_image("render_mesh", render_mesh, self.step_count) self.summary_writer.add_histogram("length_distribution", input_batch["length"].cpu().numpy(), self.step_count) # Debug info for filenames self.logger.debug(input_batch["filename"]) # Save results in Tensorboard for k, v in loss_summary.items(): self.summary_writer.add_scalar(k, v, self.step_count) # Save results to log self.logger.info("Epoch %03d, Step %06d/%06d, Time elapsed %s, Loss %.9f (%.9f)" % ( self.epoch_count, self.step_count, self.options.train.num_epochs * len(self.dataset) // ( self.options.train.batch_size * self.options.num_gpus), self.time_elapsed, self.losses.val, self.losses.avg)) def test(self): for evaluator in self.evaluators: evaluator.evaluate() ================================================ FILE: logger.py ================================================ import logging import os def create_logger(cfg, phase='train'): log_file = '{}_{}.log'.format(cfg.version, phase) final_log_file = os.path.join(cfg.log_dir, log_file) head = '%(asctime)-15s %(message)s' logging.basicConfig(filename=str(final_log_file), format=head) logger = logging.getLogger() if cfg.log_level == "info": logger.setLevel(logging.INFO) elif cfg.log_level == "debug": logger.setLevel(logging.DEBUG) else: raise NotImplementedError("Log level has to be one of info and debug") console = logging.StreamHandler() logging.getLogger('').addHandler(console) return logger ================================================ FILE: models/backbones/__init__.py ================================================ from models.backbones.resnet import resnet50 from models.backbones.vgg16 import VGG16TensorflowAlign, VGG16P2M, VGG16Recons def get_backbone(options): if options.backbone.startswith("vgg16"): if options.align_with_tensorflow: nn_encoder = VGG16TensorflowAlign() else: nn_encoder = VGG16P2M(pretrained="pretrained" in options.backbone) nn_decoder = VGG16Recons() elif options.backbone == "resnet50": nn_encoder = resnet50() nn_decoder = None else: raise NotImplementedError("No implemented backbone called '%s' found" % options.backbone) return nn_encoder, nn_decoder ================================================ FILE: models/backbones/resnet.py ================================================ import torch from torchvision.models import ResNet from torchvision.models.resnet import Bottleneck import config class P2MResNet(ResNet): def __init__(self, *args, **kwargs): self.output_dim = 0 super().__init__(*args, **kwargs) def _make_layer(self, block, planes, blocks, stride=1, dilate=False): res = super()._make_layer(block, planes, blocks, stride=stride, dilate=dilate) self.output_dim += self.inplanes return res def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) features = [] x = self.layer1(x) features.append(x) x = self.layer2(x) features.append(x) x = self.layer3(x) features.append(x) x = self.layer4(x) features.append(x) return features @property def features_dim(self): return self.output_dim def resnet50(): model = P2MResNet(Bottleneck, [3, 4, 6, 3]) state_dict = torch.load(config.PRETRAINED_WEIGHTS_PATH["resnet50"]) model.load_state_dict(state_dict) return model ================================================ FILE: models/backbones/vgg16.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F import config class VGG16TensorflowAlign(nn.Module): def __init__(self, n_classes_input=3): super(VGG16TensorflowAlign, self).__init__() self.features_dim = 960 # this is to align with tensorflow padding (with stride) # https://bugxch.github.io/tf%E4%B8%AD%E7%9A%84padding%E6%96%B9%E5%BC%8FSAME%E5%92%8CVALID%E6%9C%89%E4%BB%80%E4%B9%88%E5%8C%BA%E5%88%AB/ self.same_padding = nn.ZeroPad2d(1) self.tf_padding = nn.ZeroPad2d((0, 1, 0, 1)) self.tf_padding_2 = nn.ZeroPad2d((1, 2, 1, 2)) self.conv0_1 = nn.Conv2d(n_classes_input, 16, 3, stride=1, padding=0) self.conv0_2 = nn.Conv2d(16, 16, 3, stride=1, padding=0) self.conv1_1 = nn.Conv2d(16, 32, 3, stride=2, padding=0) # 224 -> 112 self.conv1_2 = nn.Conv2d(32, 32, 3, stride=1, padding=0) self.conv1_3 = nn.Conv2d(32, 32, 3, stride=1, padding=0) self.conv2_1 = nn.Conv2d(32, 64, 3, stride=2, padding=0) # 112 -> 56 self.conv2_2 = nn.Conv2d(64, 64, 3, stride=1, padding=0) self.conv2_3 = nn.Conv2d(64, 64, 3, stride=1, padding=0) self.conv3_1 = nn.Conv2d(64, 128, 3, stride=2, padding=0) # 56 -> 28 self.conv3_2 = nn.Conv2d(128, 128, 3, stride=1, padding=0) self.conv3_3 = nn.Conv2d(128, 128, 3, stride=1, padding=0) self.conv4_1 = nn.Conv2d(128, 256, 5, stride=2, padding=0) # 28 -> 14 self.conv4_2 = nn.Conv2d(256, 256, 3, stride=1, padding=0) self.conv4_3 = nn.Conv2d(256, 256, 3, stride=1, padding=0) self.conv5_1 = nn.Conv2d(256, 512, 5, stride=2, padding=0) # 14 -> 7 self.conv5_2 = nn.Conv2d(512, 512, 3, stride=1, padding=0) self.conv5_3 = nn.Conv2d(512, 512, 3, stride=1, padding=0) self.conv5_4 = nn.Conv2d(512, 512, 3, stride=1, padding=0) def forward(self, img): img = F.relu(self.conv0_1(self.same_padding(img))) img = F.relu(self.conv0_2(self.same_padding(img))) img = F.relu(self.conv1_1(self.tf_padding(img))) img = F.relu(self.conv1_2(self.same_padding(img))) img = F.relu(self.conv1_3(self.same_padding(img))) img = F.relu(self.conv2_1(self.tf_padding(img))) img = F.relu(self.conv2_2(self.same_padding(img))) img = F.relu(self.conv2_3(self.same_padding(img))) img2 = img img = F.relu(self.conv3_1(self.tf_padding(img))) img = F.relu(self.conv3_2(self.same_padding(img))) img = F.relu(self.conv3_3(self.same_padding(img))) img3 = img img = F.relu(self.conv4_1(self.tf_padding_2(img))) img = F.relu(self.conv4_2(self.same_padding(img))) img = F.relu(self.conv4_3(self.same_padding(img))) img4 = img img = F.relu(self.conv5_1(self.tf_padding_2(img))) img = F.relu(self.conv5_2(self.same_padding(img))) img = F.relu(self.conv5_3(self.same_padding(img))) img = F.relu(self.conv5_4(self.same_padding(img))) img5 = img return [img2, img3, img4, img5] class VGG16P2M(nn.Module): def __init__(self, n_classes_input=3, pretrained=False): super(VGG16P2M, self).__init__() self.features_dim = 960 self.conv0_1 = nn.Conv2d(n_classes_input, 16, 3, stride=1, padding=1) self.conv0_2 = nn.Conv2d(16, 16, 3, stride=1, padding=1) self.conv1_1 = nn.Conv2d(16, 32, 3, stride=2, padding=1) # 224 -> 112 self.conv1_2 = nn.Conv2d(32, 32, 3, stride=1, padding=1) self.conv1_3 = nn.Conv2d(32, 32, 3, stride=1, padding=1) self.conv2_1 = nn.Conv2d(32, 64, 3, stride=2, padding=1) # 112 -> 56 self.conv2_2 = nn.Conv2d(64, 64, 3, stride=1, padding=1) self.conv2_3 = nn.Conv2d(64, 64, 3, stride=1, padding=1) self.conv3_1 = nn.Conv2d(64, 128, 3, stride=2, padding=1) # 56 -> 28 self.conv3_2 = nn.Conv2d(128, 128, 3, stride=1, padding=1) self.conv3_3 = nn.Conv2d(128, 128, 3, stride=1, padding=1) self.conv4_1 = nn.Conv2d(128, 256, 5, stride=2, padding=2) # 28 -> 14 self.conv4_2 = nn.Conv2d(256, 256, 3, stride=1, padding=1) self.conv4_3 = nn.Conv2d(256, 256, 3, stride=1, padding=1) self.conv5_1 = nn.Conv2d(256, 512, 5, stride=2, padding=2) # 14 -> 7 self.conv5_2 = nn.Conv2d(512, 512, 3, stride=1, padding=1) self.conv5_3 = nn.Conv2d(512, 512, 3, stride=1, padding=1) self.conv5_4 = nn.Conv2d(512, 512, 3, stride=1, padding=1) if "vgg16p2m" in config.PRETRAINED_WEIGHTS_PATH and pretrained: state_dict = torch.load(config.PRETRAINED_WEIGHTS_PATH["vgg16p2m"]) self.load_state_dict(state_dict) else: self._initialize_weights() def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0) def forward(self, img): img = F.relu(self.conv0_1(img)) img = F.relu(self.conv0_2(img)) # img0 = torch.squeeze(img) # 224 img = F.relu(self.conv1_1(img)) img = F.relu(self.conv1_2(img)) img = F.relu(self.conv1_3(img)) # img1 = torch.squeeze(img) # 112 img = F.relu(self.conv2_1(img)) img = F.relu(self.conv2_2(img)) img = F.relu(self.conv2_3(img)) img2 = img img = F.relu(self.conv3_1(img)) img = F.relu(self.conv3_2(img)) img = F.relu(self.conv3_3(img)) img3 = img img = F.relu(self.conv4_1(img)) img = F.relu(self.conv4_2(img)) img = F.relu(self.conv4_3(img)) img4 = img img = F.relu(self.conv5_1(img)) img = F.relu(self.conv5_2(img)) img = F.relu(self.conv5_3(img)) img = F.relu(self.conv5_4(img)) img5 = img return [img2, img3, img4, img5] class VGG16Recons(nn.Module): def __init__(self, input_dim=512, image_channel=3): super(VGG16Recons, self).__init__() self.conv_1 = nn.ConvTranspose2d(input_dim, 256, kernel_size=2, stride=2, padding=0) # 7 -> 14 self.conv_2 = nn.ConvTranspose2d(512, 128, kernel_size=4, stride=2, padding=1) # 14 -> 28 self.conv_3 = nn.ConvTranspose2d(256, 64, kernel_size=4, stride=2, padding=1) # 28 -> 56 self.conv_4 = nn.ConvTranspose2d(128, 32, kernel_size=6, stride=2, padding=2) # 56 -> 112 self.conv_5 = nn.ConvTranspose2d(32, image_channel, kernel_size=6, stride=2, padding=2) # 112 -> 224 def forward(self, img_feats): x = F.relu(self.conv_1(img_feats[-1])) x = torch.cat((x, img_feats[-2]), dim=1) x = F.relu(self.conv_2(x)) x = torch.cat((x, img_feats[-3]), dim=1) x = F.relu(self.conv_3(x)) x = torch.cat((x, img_feats[-4]), dim=1) x = F.relu(self.conv_4(x)) x = F.relu(self.conv_5(x)) return torch.sigmoid(x) ================================================ FILE: models/classifier.py ================================================ import torch.nn as nn from models.backbones import get_backbone class Classifier(nn.Module): def __init__(self, options, num_classes): super(Classifier, self).__init__() self.nn_encoder, self.nn_decoder = get_backbone(options) if "vgg" in options.backbone: self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) self.classifier = nn.Sequential( nn.Linear(list(self.nn_encoder.children())[-1].out_channels * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, num_classes), ) elif "resnet" in options.backbone: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Linear(self.nn_encoder.inplanes, num_classes) else: raise NotImplementedError def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0) def forward(self, img): x = self.nn_encoder(img)[-1] # last layer x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x ================================================ FILE: models/layers/chamfer_wrapper.py ================================================ import chamfer import torch import torch.nn as nn from torch.autograd import Function # Chamfer's distance module @thibaultgroueix # GPU tensors only class ChamferFunction(Function): @staticmethod def forward(ctx, xyz1, xyz2): batchsize, n, _ = xyz1.size() _, m, _ = xyz2.size() dist1 = torch.zeros(batchsize, n) dist2 = torch.zeros(batchsize, m) idx1 = torch.zeros(batchsize, n).type(torch.IntTensor) idx2 = torch.zeros(batchsize, m).type(torch.IntTensor) dist1 = dist1.cuda() dist2 = dist2.cuda() idx1 = idx1.cuda() idx2 = idx2.cuda() chamfer.forward(xyz1, xyz2, dist1, dist2, idx1, idx2) ctx.save_for_backward(xyz1, xyz2, idx1, idx2) return dist1, dist2, idx1, idx2 @staticmethod def backward(ctx, graddist1, graddist2, _idx1, _idx2): xyz1, xyz2, idx1, idx2 = ctx.saved_tensors graddist1 = graddist1.contiguous() graddist2 = graddist2.contiguous() gradxyz1 = torch.zeros(xyz1.size()) gradxyz2 = torch.zeros(xyz2.size()) gradxyz1 = gradxyz1.cuda() gradxyz2 = gradxyz2.cuda() chamfer.backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2) return gradxyz1, gradxyz2 class ChamferDist(nn.Module): def __init__(self): super(ChamferDist, self).__init__() def forward(self, input1, input2): return ChamferFunction.apply(input1, input2) ================================================ FILE: models/layers/gbottleneck.py ================================================ import torch.nn as nn import torch.nn.functional as F from models.layers.gconv import GConv class GResBlock(nn.Module): def __init__(self, in_dim, hidden_dim, adj_mat, activation=None): super(GResBlock, self).__init__() self.conv1 = GConv(in_features=in_dim, out_features=hidden_dim, adj_mat=adj_mat) self.conv2 = GConv(in_features=hidden_dim, out_features=in_dim, adj_mat=adj_mat) self.activation = F.relu if activation else None def forward(self, inputs): x = self.conv1(inputs) if self.activation: x = self.activation(x) x = self.conv2(x) if self.activation: x = self.activation(x) return (inputs + x) * 0.5 class GBottleneck(nn.Module): def __init__(self, block_num, in_dim, hidden_dim, out_dim, adj_mat, activation=None): super(GBottleneck, self).__init__() resblock_layers = [GResBlock(in_dim=hidden_dim, hidden_dim=hidden_dim, adj_mat=adj_mat, activation=activation) for _ in range(block_num)] self.blocks = nn.Sequential(*resblock_layers) self.conv1 = GConv(in_features=in_dim, out_features=hidden_dim, adj_mat=adj_mat) self.conv2 = GConv(in_features=hidden_dim, out_features=out_dim, adj_mat=adj_mat) self.activation = F.relu if activation else None def forward(self, inputs): x = self.conv1(inputs) if self.activation: x = self.activation(x) x_hidden = self.blocks(x) x_out = self.conv2(x_hidden) return x_out, x_hidden ================================================ FILE: models/layers/gconv.py ================================================ import math import torch import torch.nn as nn from utils.tensor import dot class GConv(nn.Module): """Simple GCN layer Similar to https://arxiv.org/abs/1609.02907 """ def __init__(self, in_features, out_features, adj_mat, bias=True): super(GConv, self).__init__() self.in_features = in_features self.out_features = out_features self.adj_mat = nn.Parameter(adj_mat, requires_grad=False) self.weight = nn.Parameter(torch.zeros((in_features, out_features), dtype=torch.float)) # Following https://github.com/Tong-ZHAO/Pixel2Mesh-Pytorch/blob/a0ae88c4a42eef6f8f253417b97df978db842708/model/gcn_layers.py#L45 # This seems to be different from the original implementation of P2M self.loop_weight = nn.Parameter(torch.zeros((in_features, out_features), dtype=torch.float)) if bias: self.bias = nn.Parameter(torch.zeros((out_features,), dtype=torch.float)) else: self.register_parameter('bias', None) self.reset_parameters() def reset_parameters(self): nn.init.xavier_uniform_(self.weight.data) nn.init.xavier_uniform_(self.loop_weight.data) def forward(self, inputs): support = torch.matmul(inputs, self.weight) support_loop = torch.matmul(inputs, self.loop_weight) output = dot(self.adj_mat, support, True) + support_loop if self.bias is not None: ret = output + self.bias else: ret = output return ret def __repr__(self): return self.__class__.__name__ + ' (' \ + str(self.in_features) + ' -> ' \ + str(self.out_features) + ')' ================================================ FILE: models/layers/gpooling.py ================================================ import torch import torch.nn as nn import numpy as np class GUnpooling(nn.Module): """Graph Pooling layer, aims to add additional vertices to the graph. The middle point of each edges are added, and its feature is simply the average of the two edge vertices. Three middle points are connected in each triangle. """ def __init__(self, unpool_idx): super(GUnpooling, self).__init__() self.unpool_idx = unpool_idx # save dim info self.in_num = torch.max(unpool_idx).item() self.out_num = self.in_num + len(unpool_idx) def forward(self, inputs): new_features = inputs[:, self.unpool_idx].clone() new_vertices = 0.5 * new_features.sum(2) output = torch.cat([inputs, new_vertices], 1) return output def __repr__(self): return self.__class__.__name__ + ' (' \ + str(self.in_num) + ' -> ' \ + str(self.out_num) + ')' ================================================ FILE: models/layers/gprojection.py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from torch.nn import Threshold class GProjection(nn.Module): """ Graph Projection layer, which pool 2D features to mesh The layer projects a vertex of the mesh to the 2D image and use bi-linear interpolation to get the corresponding feature. """ def __init__(self, mesh_pos, camera_f, camera_c, bound=0, tensorflow_compatible=False): super(GProjection, self).__init__() self.mesh_pos, self.camera_f, self.camera_c = mesh_pos, camera_f, camera_c self.threshold = None self.bound = 0 self.tensorflow_compatible = tensorflow_compatible if self.bound != 0: self.threshold = Threshold(bound, bound) def bound_val(self, x): """ given x, return min(threshold, x), in case threshold is not None """ if self.bound < 0: return -self.threshold(-x) elif self.bound > 0: return self.threshold(x) return x @staticmethod def image_feature_shape(img): return np.array([img.size(-1), img.size(-2)]) def project_tensorflow(self, x, y, img_size, img_feat): x = torch.clamp(x, min=0, max=img_size[1] - 1) y = torch.clamp(y, min=0, max=img_size[0] - 1) # it's tedious and contains bugs... # when x1 = x2, the area is 0, therefore it won't be processed # keep it here to align with tensorflow version x1, x2 = torch.floor(x).long(), torch.ceil(x).long() y1, y2 = torch.floor(y).long(), torch.ceil(y).long() Q11 = img_feat[:, x1, y1].clone() Q12 = img_feat[:, x1, y2].clone() Q21 = img_feat[:, x2, y1].clone() Q22 = img_feat[:, x2, y2].clone() weights = torch.mul(x2.float() - x, y2.float() - y) Q11 = torch.mul(weights.unsqueeze(-1), torch.transpose(Q11, 0, 1)) weights = torch.mul(x2.float() - x, y - y1.float()) Q12 = torch.mul(weights.unsqueeze(-1), torch.transpose(Q12, 0, 1)) weights = torch.mul(x - x1.float(), y2.float() - y) Q21 = torch.mul(weights.unsqueeze(-1), torch.transpose(Q21, 0, 1)) weights = torch.mul(x - x1.float(), y - y1.float()) Q22 = torch.mul(weights.unsqueeze(-1), torch.transpose(Q22, 0, 1)) output = Q11 + Q21 + Q12 + Q22 return output def forward(self, resolution, img_features, inputs): half_resolution = (resolution - 1) / 2 camera_c_offset = np.array(self.camera_c) - half_resolution # map to [-1, 1] # not sure why they render to negative x positions = inputs + torch.tensor(self.mesh_pos, device=inputs.device, dtype=torch.float) w = -self.camera_f[0] * (positions[:, :, 0] / self.bound_val(positions[:, :, 2])) + camera_c_offset[0] h = self.camera_f[1] * (positions[:, :, 1] / self.bound_val(positions[:, :, 2])) + camera_c_offset[1] if self.tensorflow_compatible: # to align with tensorflow # this is incorrect, I believe w += half_resolution[0] h += half_resolution[1] else: # directly do clamping w /= half_resolution[0] h /= half_resolution[1] # clamp to [-1, 1] w = torch.clamp(w, min=-1, max=1) h = torch.clamp(h, min=-1, max=1) feats = [inputs] for img_feature in img_features: feats.append(self.project(resolution, img_feature, torch.stack([w, h], dim=-1))) output = torch.cat(feats, 2) return output def project(self, img_shape, img_feat, sample_points): """ :param img_shape: raw image shape :param img_feat: [batch_size x channel x h x w] :param sample_points: [batch_size x num_points x 2], in range [-1, 1] :return: [batch_size x num_points x feat_dim] """ if self.tensorflow_compatible: feature_shape = self.image_feature_shape(img_feat) points_w = sample_points[:, :, 0] / (img_shape[0] / feature_shape[0]) points_h = sample_points[:, :, 1] / (img_shape[1] / feature_shape[1]) output = torch.stack([self.project_tensorflow(points_h[i], points_w[i], feature_shape, img_feat[i]) for i in range(img_feat.size(0))], 0) else: output = F.grid_sample(img_feat, sample_points.unsqueeze(1)) output = torch.transpose(output.squeeze(2), 1, 2) return output ================================================ FILE: models/losses/classifier.py ================================================ import torch import torch.nn as nn class CrossEntropyLoss(nn.Module): def __init__(self): super().__init__() self.cross_entropy = nn.CrossEntropyLoss().cuda() def forward(self, outputs, targets): labels = targets["labels"] loss = self.cross_entropy(outputs, labels) _, predicted = torch.max(outputs.data, 1) total = labels.size(0) correct = (predicted == labels).sum().item() return loss, {"loss": loss, "acc": correct / total} ================================================ FILE: models/losses/p2m.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from models.layers.chamfer_wrapper import ChamferDist class P2MLoss(nn.Module): def __init__(self, options, ellipsoid): super().__init__() self.options = options self.l1_loss = nn.L1Loss(reduction='mean') self.l2_loss = nn.MSELoss(reduction='mean') self.chamfer_dist = ChamferDist() self.laplace_idx = nn.ParameterList([ nn.Parameter(idx, requires_grad=False) for idx in ellipsoid.laplace_idx]) self.edges = nn.ParameterList([ nn.Parameter(edges, requires_grad=False) for edges in ellipsoid.edges]) def edge_regularization(self, pred, edges): """ :param pred: batch_size * num_points * 3 :param edges: num_edges * 2 :return: """ return self.l2_loss(pred[:, edges[:, 0]], pred[:, edges[:, 1]]) * pred.size(-1) @staticmethod def laplace_coord(inputs, lap_idx): """ :param inputs: nodes Tensor, size (n_pts, n_features = 3) :param lap_idx: laplace index matrix Tensor, size (n_pts, 10) for each vertex, the laplace vector shows: [neighbor_index * 8, self_index, neighbor_count] :returns The laplacian coordinates of input with respect to edges as in lap_idx """ indices = lap_idx[:, :-2] invalid_mask = indices < 0 all_valid_indices = indices.clone() all_valid_indices[invalid_mask] = 0 # do this to avoid negative indices vertices = inputs[:, all_valid_indices] vertices[:, invalid_mask] = 0 neighbor_sum = torch.sum(vertices, 2) neighbor_count = lap_idx[:, -1].float() laplace = inputs - neighbor_sum / neighbor_count[None, :, None] return laplace def laplace_regularization(self, input1, input2, block_idx): """ :param input1: vertices tensor before deformation :param input2: vertices after the deformation :param block_idx: idx to select laplace index matrix tensor :return: if different than 1 then adds a move loss as in the original TF code """ lap1 = self.laplace_coord(input1, self.laplace_idx[block_idx]) lap2 = self.laplace_coord(input2, self.laplace_idx[block_idx]) laplace_loss = self.l2_loss(lap1, lap2) * lap1.size(-1) move_loss = self.l2_loss(input1, input2) * input1.size(-1) if block_idx > 0 else 0 return laplace_loss, move_loss def normal_loss(self, gt_normal, indices, pred_points, adj_list): edges = F.normalize(pred_points[:, adj_list[:, 0]] - pred_points[:, adj_list[:, 1]], dim=2) nearest_normals = torch.stack([t[i] for t, i in zip(gt_normal, indices.long())]) normals = F.normalize(nearest_normals[:, adj_list[:, 0]], dim=2) cosine = torch.abs(torch.sum(edges * normals, 2)) return torch.mean(cosine) def image_loss(self, gt_img, pred_img): rect_loss = F.binary_cross_entropy(pred_img, gt_img) return rect_loss def forward(self, outputs, targets): """ :param outputs: outputs from P2MModel :param targets: targets from input :return: loss, loss_summary (dict) """ chamfer_loss, edge_loss, normal_loss, lap_loss, move_loss = 0., 0., 0., 0., 0. lap_const = [0.2, 1., 1.] gt_coord, gt_normal, gt_images = targets["points"], targets["normals"], targets["images"] pred_coord, pred_coord_before_deform = outputs["pred_coord"], outputs["pred_coord_before_deform"] image_loss = 0. if outputs["reconst"] is not None and self.options.weights.reconst != 0: image_loss = self.image_loss(gt_images, outputs["reconst"]) for i in range(3): dist1, dist2, idx1, idx2 = self.chamfer_dist(gt_coord, pred_coord[i]) chamfer_loss += self.options.weights.chamfer[i] * (torch.mean(dist1) + self.options.weights.chamfer_opposite * torch.mean(dist2)) normal_loss += self.normal_loss(gt_normal, idx2, pred_coord[i], self.edges[i]) edge_loss += self.edge_regularization(pred_coord[i], self.edges[i]) lap, move = self.laplace_regularization(pred_coord_before_deform[i], pred_coord[i], i) lap_loss += lap_const[i] * lap move_loss += lap_const[i] * move loss = chamfer_loss + image_loss * self.options.weights.reconst + \ self.options.weights.laplace * lap_loss + \ self.options.weights.move * move_loss + \ self.options.weights.edge * edge_loss + \ self.options.weights.normal * normal_loss loss = loss * self.options.weights.constant return loss, { "loss": loss, "loss_chamfer": chamfer_loss, "loss_edge": edge_loss, "loss_laplace": lap_loss, "loss_move": move_loss, "loss_normal": normal_loss, } ================================================ FILE: models/p2m.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from models.backbones import get_backbone from models.layers.gbottleneck import GBottleneck from models.layers.gconv import GConv from models.layers.gpooling import GUnpooling from models.layers.gprojection import GProjection class P2MModel(nn.Module): def __init__(self, options, ellipsoid, camera_f, camera_c, mesh_pos): super(P2MModel, self).__init__() self.hidden_dim = options.hidden_dim self.coord_dim = options.coord_dim self.last_hidden_dim = options.last_hidden_dim self.init_pts = nn.Parameter(ellipsoid.coord, requires_grad=False) self.gconv_activation = options.gconv_activation self.nn_encoder, self.nn_decoder = get_backbone(options) self.features_dim = self.nn_encoder.features_dim + self.coord_dim self.gcns = nn.ModuleList([ GBottleneck(6, self.features_dim, self.hidden_dim, self.coord_dim, ellipsoid.adj_mat[0], activation=self.gconv_activation), GBottleneck(6, self.features_dim + self.hidden_dim, self.hidden_dim, self.coord_dim, ellipsoid.adj_mat[1], activation=self.gconv_activation), GBottleneck(6, self.features_dim + self.hidden_dim, self.hidden_dim, self.last_hidden_dim, ellipsoid.adj_mat[2], activation=self.gconv_activation) ]) self.unpooling = nn.ModuleList([ GUnpooling(ellipsoid.unpool_idx[0]), GUnpooling(ellipsoid.unpool_idx[1]) ]) # if options.align_with_tensorflow: # self.projection = GProjection # else: # self.projection = GProjection self.projection = GProjection(mesh_pos, camera_f, camera_c, bound=options.z_threshold, tensorflow_compatible=options.align_with_tensorflow) self.gconv = GConv(in_features=self.last_hidden_dim, out_features=self.coord_dim, adj_mat=ellipsoid.adj_mat[2]) def forward(self, img): batch_size = img.size(0) img_feats = self.nn_encoder(img) img_shape = self.projection.image_feature_shape(img) init_pts = self.init_pts.data.unsqueeze(0).expand(batch_size, -1, -1) # GCN Block 1 x = self.projection(img_shape, img_feats, init_pts) x1, x_hidden = self.gcns[0](x) # before deformation 2 x1_up = self.unpooling[0](x1) # GCN Block 2 x = self.projection(img_shape, img_feats, x1) x = self.unpooling[0](torch.cat([x, x_hidden], 2)) # after deformation 2 x2, x_hidden = self.gcns[1](x) # before deformation 3 x2_up = self.unpooling[1](x2) # GCN Block 3 x = self.projection(img_shape, img_feats, x2) x = self.unpooling[1](torch.cat([x, x_hidden], 2)) x3, _ = self.gcns[2](x) if self.gconv_activation: x3 = F.relu(x3) # after deformation 3 x3 = self.gconv(x3) if self.nn_decoder is not None: reconst = self.nn_decoder(img_feats) else: reconst = None return { "pred_coord": [x1, x2, x3], "pred_coord_before_deform": [init_pts, x1_up, x2_up], "reconst": reconst } ================================================ FILE: options.py ================================================ import os import pprint from argparse import ArgumentParser from datetime import datetime import numpy as np import yaml from easydict import EasyDict as edict from tensorboardX import SummaryWriter from logger import create_logger options = edict() options.name = 'p2m' options.version = None options.num_workers = 1 options.num_gpus = 1 options.pin_memory = True options.log_dir = "logs" options.log_level = "info" options.summary_dir = "summary" options.checkpoint_dir = "checkpoints" options.checkpoint = None options.dataset = edict() options.dataset.name = "shapenet" options.dataset.subset_train = "train_small" options.dataset.subset_eval = "test_small" options.dataset.camera_f = [248., 248.] options.dataset.camera_c = [111.5, 111.5] options.dataset.mesh_pos = [0., 0., -0.8] options.dataset.normalization = True options.dataset.num_classes = 13 options.dataset.shapenet = edict() options.dataset.shapenet.num_points = 3000 options.dataset.shapenet.resize_with_constant_border = False options.dataset.predict = edict() options.dataset.predict.folder = "/tmp" options.model = edict() options.model.name = "pixel2mesh" options.model.hidden_dim = 192 options.model.last_hidden_dim = 192 options.model.coord_dim = 3 options.model.backbone = "vgg16" options.model.gconv_activation = True # provide a boundary for z, so that z will never be equal to 0, on denominator # if z is greater than 0, it will never be less than z; # if z is less than 0, it will never be greater than z. options.model.z_threshold = 0 # align with original tensorflow model # please follow experiments/tensorflow.yml options.model.align_with_tensorflow = False options.loss = edict() options.loss.weights = edict() options.loss.weights.normal = 1.6e-4 options.loss.weights.edge = 0.3 options.loss.weights.laplace = 0.5 options.loss.weights.move = 0.1 options.loss.weights.constant = 1. options.loss.weights.chamfer = [1., 1., 1.] options.loss.weights.chamfer_opposite = 1. options.loss.weights.reconst = 0. options.train = edict() options.train.num_epochs = 50 options.train.batch_size = 4 options.train.summary_steps = 50 options.train.checkpoint_steps = 10000 options.train.test_epochs = 1 options.train.use_augmentation = True options.train.shuffle = True options.test = edict() options.test.dataset = [] options.test.summary_steps = 50 options.test.batch_size = 4 options.test.shuffle = False options.test.weighted_mean = False options.optim = edict() options.optim.name = "adam" options.optim.adam_beta1 = 0.9 options.optim.sgd_momentum = 0.9 options.optim.lr = 5.0E-5 options.optim.wd = 1.0E-6 options.optim.lr_step = [30, 45] options.optim.lr_factor = 0.1 def _update_dict(full_key, val, d): for vk, vv in val.items(): if vk not in d: raise ValueError("{}.{} does not exist in options".format(full_key, vk)) if isinstance(vv, list): d[vk] = np.array(vv) elif isinstance(vv, dict): _update_dict(full_key + "." + vk, vv, d[vk]) else: d[vk] = vv def _update_options(options_file): # do scan twice # in the first round, MODEL.NAME is located so that we can initialize MODEL.EXTRA # in the second round, we update everything with open(options_file) as f: options_dict = yaml.safe_load(f) # do a dfs on `BASED_ON` options files if "based_on" in options_dict: for base_options in options_dict["based_on"]: _update_options(os.path.join(os.path.dirname(options_file), base_options)) options_dict.pop("based_on") _update_dict("", options_dict, options) def update_options(options_file): _update_options(options_file) def gen_options(options_file): def to_dict(ed): ret = dict(ed) for k, v in ret.items(): if isinstance(v, edict): ret[k] = to_dict(v) elif isinstance(v, np.ndarray): ret[k] = v.tolist() return ret cfg = to_dict(options) with open(options_file, 'w') as f: yaml.safe_dump(dict(cfg), f, default_flow_style=False) def slugify(filename): filename = os.path.relpath(filename, ".") if filename.startswith("experiments/"): filename = filename[len("experiments/"):] return os.path.splitext(filename)[0].lower().replace("/", "_").replace(".", "_") def reset_options(options, args, phase='train'): if hasattr(args, "batch_size") and args.batch_size: options.train.batch_size = options.test.batch_size = args.batch_size if hasattr(args, "version") and args.version: options.version = args.version if hasattr(args, "num_epochs") and args.num_epochs: options.train.num_epochs = args.num_epochs if hasattr(args, "checkpoint") and args.checkpoint: options.checkpoint = args.checkpoint if hasattr(args, "folder") and args.folder: options.dataset.predict.folder = args.folder if hasattr(args, "gpus") and args.gpus: options.num_gpus = args.gpus if hasattr(args, "shuffle") and args.shuffle: options.train.shuffle = options.test.shuffle = True options.name = args.name if options.version is None: prefix = "" if args.options: prefix = slugify(args.options) + "_" options.version = prefix + datetime.now().strftime('%m%d%H%M%S') # ignore %Y options.log_dir = os.path.join(options.log_dir, options.name) print('=> creating {}'.format(options.log_dir)) os.makedirs(options.log_dir, exist_ok=True) options.checkpoint_dir = os.path.join(options.checkpoint_dir, options.name, options.version) print('=> creating {}'.format(options.checkpoint_dir)) os.makedirs(options.checkpoint_dir, exist_ok=True) options.summary_dir = os.path.join(options.summary_dir, options.name, options.version) print('=> creating {}'.format(options.summary_dir)) os.makedirs(options.summary_dir, exist_ok=True) logger = create_logger(options, phase=phase) options_text = pprint.pformat(vars(options)) logger.info(options_text) print('=> creating summary writer') writer = SummaryWriter(options.summary_dir) return logger, writer if __name__ == "__main__": parser = ArgumentParser("Read options and freeze") parser.add_argument("--input", type=str, required=True) parser.add_argument("--output", type=str, required=True) args = parser.parse_args() update_options(args.input) gen_options(args.output) ================================================ FILE: slurm/eval.sh ================================================ #!/usr/bin/env bash set -x if [[ $# -lt 4 ]] ; then echo 'too few arguments supplied' exit 1 fi PARTITION=$1 NAME=$2 OPTIONS=$3 CHECKPOINT=$4 srun -p ${PARTITION} \ --job-name=MeshEval \ --gres=gpu:8 \ --ntasks=1 \ --kill-on-bad-exit=1 \ python entrypoint_eval.py --name ${NAME} --options ${OPTIONS} --checkpoint ${CHECKPOINT} & ================================================ FILE: slurm/train.sh ================================================ #!/usr/bin/env bash set -x if [[ $# -lt 3 ]] ; then echo 'too few arguments supplied' exit 1 fi PARTITION=$1 NAME=$2 OPTIONS=$3 srun -p ${PARTITION} \ --job-name=Mesh \ --gres=gpu:8 \ --ntasks=1 \ --kill-on-bad-exit=1 \ python entrypoint_train.py --name ${NAME} --options ${OPTIONS} & ================================================ FILE: slurm/train_checkpoint.sh ================================================ #!/usr/bin/env bash set -x if [[ $# -lt 4 ]] ; then echo 'too few arguments supplied' exit 1 fi PARTITION=$1 NAME=$2 OPTIONS=$3 CHECKPOINT=$4 srun -p ${PARTITION} \ --job-name=Mesh \ --gres=gpu:8 \ --ntasks=1 \ --kill-on-bad-exit=1 \ python entrypoint_train.py --name ${NAME} --options ${OPTIONS} --checkpoint ${CHECKPOINT} & ================================================ FILE: slurm/train_checkpoint_1gpu.sh ================================================ #!/usr/bin/env bash set -x if [[ $# -lt 4 ]] ; then echo 'too few arguments supplied' exit 1 fi PARTITION=$1 NAME=$2 OPTIONS=$3 CHECKPOINT=$4 srun -p ${PARTITION} \ --job-name=Mesh \ --gres=gpu:1 \ --ntasks=1 \ --kill-on-bad-exit=1 \ python entrypoint_train.py --name ${NAME} --options ${OPTIONS} --checkpoint ${CHECKPOINT} & ================================================ FILE: test.py ================================================ import torch import torch.nn as nn from models.layers.chamfer_wrapper import ChamferDist def test(): torch.manual_seed(42) chamfer = ChamferDist() dense = nn.Linear(6, 3) dense.cuda() optimizer = torch.optim.Adam(dense.parameters(), 1e-3) a = torch.rand(4, 5, 6).cuda() b = torch.rand(4, 8, 3).cuda() c = torch.rand(4, 5, 6).cuda() for i in range(30000): a_out = dense(a) d1, d2, i1, i2 = chamfer(a_out, b) loss = d1.mean() + d2.mean() c_out = dense(a) d1, d2, i1, i2 = chamfer(c_out, b) optimizer.zero_grad() loss.backward() optimizer.step() print(loss) test() ================================================ FILE: utils/average_meter.py ================================================ from collections import Iterable import torch import numpy as np # noinspection PyAttributeOutsideInit class AverageMeter(object): """Computes and stores the average and current value""" def __init__(self, multiplier=1.0): self.multiplier = multiplier self.reset() def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): if isinstance(val, torch.Tensor): val = val.cpu().numpy() if isinstance(val, Iterable): val = np.array(val) self.update(np.mean(np.array(val)), n=val.size) else: self.val = self.multiplier * val self.sum += self.multiplier * val * n self.count += n self.avg = self.sum / self.count if self.count != 0 else 0 def __str__(self): return "%.6f (%.6f)" % (self.val, self.avg) ================================================ FILE: utils/demo_selection/select_demo_images.py ================================================ import json import os import random import shutil with open("datasets/data/shapenet/meta/shapenet.json") as fp: labels_map = json.load(fp) with open("datasets/data/shapenet/meta/test_tf.txt") as fp: lines = [line.strip() for line in fp.readlines()] for entry in labels_map.values(): file_list = list(filter(lambda x: (entry["id"] + "/") in x, lines)) chosen = random.choice(file_list) file_location = os.path.join("datasets/data/shapenet/data_tf", chosen[len("Data/ShapeNetP2M/"):-4] + ".png") shutil.copyfile(file_location, "datasets/examples/%s.png" % entry["name"].split(",")[0]) ================================================ FILE: utils/mesh.py ================================================ import os import pickle import numpy as np import torch import trimesh from scipy.sparse import coo_matrix import config def torch_sparse_tensor(indices, value, size): coo = coo_matrix((value, (indices[:, 0], indices[:, 1])), shape=size) values = coo.data indices = np.vstack((coo.row, coo.col)) i = torch.tensor(indices, dtype=torch.long) v = torch.tensor(values, dtype=torch.float) shape = coo.shape return torch.sparse.FloatTensor(i, v, shape) class Ellipsoid(object): def __init__(self, mesh_pos, file=config.ELLIPSOID_PATH): with open(file, "rb") as fp: fp_info = pickle.load(fp, encoding='latin1') # shape: n_pts * 3 self.coord = torch.tensor(fp_info[0]) - torch.tensor(mesh_pos, dtype=torch.float) # edges & faces & lap_idx # edge: num_edges * 2 # faces: num_faces * 4 # laplace_idx: num_pts * 10 self.edges, self.laplace_idx = [], [] for i in range(3): self.edges.append(torch.tensor(fp_info[1 + i][1][0], dtype=torch.long)) self.laplace_idx.append(torch.tensor(fp_info[7][i], dtype=torch.long)) # unpool index # num_pool_edges * 2 # pool_01: 462 * 2, pool_12: 1848 * 2 self.unpool_idx = [torch.tensor(fp_info[4][i], dtype=torch.long) for i in range(2)] # loops and adjacent edges self.adj_mat = [] for i in range(1, 4): # 0: np.array, 2D, pos # 1: np.array, 1D, vals # 2: tuple - shape, n * n adj_mat = torch_sparse_tensor(*fp_info[i][1]) self.adj_mat.append(adj_mat) ellipsoid_dir = os.path.dirname(file) self.faces = [] self.obj_fmt_faces = [] # faces: f * 3, original ellipsoid, and two after deformations for i in range(1, 4): face_file = os.path.join(ellipsoid_dir, "face%d.obj" % i) faces = np.loadtxt(face_file, dtype='|S32') self.obj_fmt_faces.append(faces) self.faces.append(torch.tensor(faces[:, 1:].astype(np.int) - 1)) ================================================ FILE: utils/migrations/delete_unnecessary_keys.py ================================================ from argparse import ArgumentParser import torch parser = ArgumentParser() parser.add_argument("--input", type=str, required=True) parser.add_argument("--output", type=str, required=True) args = parser.parse_args() data = torch.load(args.input) compressed = dict() compressed["model"] = data["model"] torch.save(compressed, args.output) ================================================ FILE: utils/migrations/extract_vgg_weights.py ================================================ import torch from models.classifier import Classifier from options import options options.model.backbone = "vgg16" model = Classifier(options.model, 1000) state_dict = torch.load("checkpoints/debug/migration/400400_000080.pt") model.load_state_dict(state_dict["model"]) torch.save(model.nn_encoder.state_dict(), "checkpoints/debug/migration/vgg16-p2m.pth") ================================================ FILE: utils/migrations/from_p2m_pytorch.py ================================================ import re import torch checkpoint = torch.load("checkpoints/debug/20190705192654/000001_000001.pt") pretrained = torch.load("checkpoints/pretrained/network_4.pth") weights = checkpoint["model"] for k in weights.keys(): match = k match = re.sub("gcns\.(\d)", "GCN_\\1", match) match = re.sub("conv(\d)\.weight", "conv\\1.weight_2", match) match = re.sub("conv(\d)\.loop_weight", "conv\\1.weight_1", match) match = re.sub("gconv\.weight", "GConv.weight_2", match) match = re.sub("gconv\.loop_weight", "GConv.weight_1", match) match = re.sub("gconv\.", "GConv.", match) if match not in pretrained: print(k, match) else: weights[k] = pretrained[match] torch.save(checkpoint, "checkpoints/debug/migration/network_4.pt") ================================================ FILE: utils/migrations/official_config_pytorch_256.txt ================================================ nn_encoder.conv0_1.weight torch.Size([16, 3, 3, 3]) nn_encoder.conv0_1.bias torch.Size([16]) nn_encoder.conv0_2.weight torch.Size([16, 16, 3, 3]) nn_encoder.conv0_2.bias torch.Size([16]) nn_encoder.conv1_1.weight torch.Size([32, 16, 3, 3]) nn_encoder.conv1_1.bias torch.Size([32]) nn_encoder.conv1_2.weight torch.Size([32, 32, 3, 3]) nn_encoder.conv1_2.bias torch.Size([32]) nn_encoder.conv1_3.weight torch.Size([32, 32, 3, 3]) nn_encoder.conv1_3.bias torch.Size([32]) nn_encoder.conv2_1.weight torch.Size([64, 32, 3, 3]) nn_encoder.conv2_1.bias torch.Size([64]) nn_encoder.conv2_2.weight torch.Size([64, 64, 3, 3]) nn_encoder.conv2_2.bias torch.Size([64]) nn_encoder.conv2_3.weight torch.Size([64, 64, 3, 3]) nn_encoder.conv2_3.bias torch.Size([64]) nn_encoder.conv3_1.weight torch.Size([128, 64, 3, 3]) nn_encoder.conv3_1.bias torch.Size([128]) nn_encoder.conv3_2.weight torch.Size([128, 128, 3, 3]) nn_encoder.conv3_2.bias torch.Size([128]) nn_encoder.conv3_3.weight torch.Size([128, 128, 3, 3]) nn_encoder.conv3_3.bias torch.Size([128]) nn_encoder.conv4_1.weight torch.Size([256, 128, 5, 5]) nn_encoder.conv4_1.bias torch.Size([256]) nn_encoder.conv4_2.weight torch.Size([256, 256, 3, 3]) nn_encoder.conv4_2.bias torch.Size([256]) nn_encoder.conv4_3.weight torch.Size([256, 256, 3, 3]) nn_encoder.conv4_3.bias torch.Size([256]) nn_encoder.conv5_1.weight torch.Size([512, 256, 5, 5]) nn_encoder.conv5_1.bias torch.Size([512]) nn_encoder.conv5_2.weight torch.Size([512, 512, 3, 3]) nn_encoder.conv5_2.bias torch.Size([512]) nn_encoder.conv5_3.weight torch.Size([512, 512, 3, 3]) nn_encoder.conv5_3.bias torch.Size([512]) nn_encoder.conv5_4.weight torch.Size([512, 512, 3, 3]) nn_encoder.conv5_4.bias torch.Size([512]) gcns.0.conv1.loop_weight torch.Size([963, 256]) gcns.0.conv1.weight torch.Size([963, 256]) gcns.0.conv1.bias torch.Size([256]) gcns.0.blocks.0.conv1.loop_weight torch.Size([256, 256]) gcns.0.blocks.0.conv1.weight torch.Size([256, 256]) gcns.0.blocks.0.conv1.bias torch.Size([256]) gcns.0.blocks.0.conv2.loop_weight torch.Size([256, 256]) gcns.0.blocks.0.conv2.weight torch.Size([256, 256]) gcns.0.blocks.0.conv2.bias torch.Size([256]) gcns.0.blocks.1.conv1.loop_weight torch.Size([256, 256]) gcns.0.blocks.1.conv1.weight torch.Size([256, 256]) gcns.0.blocks.1.conv1.bias torch.Size([256]) gcns.0.blocks.1.conv2.loop_weight torch.Size([256, 256]) gcns.0.blocks.1.conv2.weight torch.Size([256, 256]) gcns.0.blocks.1.conv2.bias torch.Size([256]) gcns.0.blocks.2.conv1.loop_weight torch.Size([256, 256]) gcns.0.blocks.2.conv1.weight torch.Size([256, 256]) gcns.0.blocks.2.conv1.bias torch.Size([256]) gcns.0.blocks.2.conv2.loop_weight torch.Size([256, 256]) gcns.0.blocks.2.conv2.weight torch.Size([256, 256]) gcns.0.blocks.2.conv2.bias torch.Size([256]) gcns.0.blocks.3.conv1.loop_weight torch.Size([256, 256]) gcns.0.blocks.3.conv1.weight torch.Size([256, 256]) gcns.0.blocks.3.conv1.bias torch.Size([256]) gcns.0.blocks.3.conv2.loop_weight torch.Size([256, 256]) gcns.0.blocks.3.conv2.weight torch.Size([256, 256]) gcns.0.blocks.3.conv2.bias torch.Size([256]) gcns.0.blocks.4.conv1.loop_weight torch.Size([256, 256]) gcns.0.blocks.4.conv1.weight torch.Size([256, 256]) gcns.0.blocks.4.conv1.bias torch.Size([256]) gcns.0.blocks.4.conv2.loop_weight torch.Size([256, 256]) gcns.0.blocks.4.conv2.weight torch.Size([256, 256]) gcns.0.blocks.4.conv2.bias torch.Size([256]) gcns.0.blocks.5.conv1.loop_weight torch.Size([256, 256]) gcns.0.blocks.5.conv1.weight torch.Size([256, 256]) gcns.0.blocks.5.conv1.bias torch.Size([256]) gcns.0.blocks.5.conv2.loop_weight torch.Size([256, 256]) gcns.0.blocks.5.conv2.weight torch.Size([256, 256]) gcns.0.blocks.5.conv2.bias torch.Size([256]) gcns.0.conv2.loop_weight torch.Size([256, 3]) gcns.0.conv2.weight torch.Size([256, 3]) gcns.0.conv2.bias torch.Size([3]) gcns.1.conv1.loop_weight torch.Size([1219, 256]) gcns.1.conv1.weight torch.Size([1219, 256]) gcns.1.conv1.bias torch.Size([256]) gcns.1.blocks.0.conv1.loop_weight torch.Size([256, 256]) gcns.1.blocks.0.conv1.weight torch.Size([256, 256]) gcns.1.blocks.0.conv1.bias torch.Size([256]) gcns.1.blocks.0.conv2.loop_weight torch.Size([256, 256]) gcns.1.blocks.0.conv2.weight torch.Size([256, 256]) gcns.1.blocks.0.conv2.bias torch.Size([256]) gcns.1.blocks.1.conv1.loop_weight torch.Size([256, 256]) gcns.1.blocks.1.conv1.weight torch.Size([256, 256]) gcns.1.blocks.1.conv1.bias torch.Size([256]) gcns.1.blocks.1.conv2.loop_weight torch.Size([256, 256]) gcns.1.blocks.1.conv2.weight torch.Size([256, 256]) gcns.1.blocks.1.conv2.bias torch.Size([256]) gcns.1.blocks.2.conv1.loop_weight torch.Size([256, 256]) gcns.1.blocks.2.conv1.weight torch.Size([256, 256]) gcns.1.blocks.2.conv1.bias torch.Size([256]) gcns.1.blocks.2.conv2.loop_weight torch.Size([256, 256]) gcns.1.blocks.2.conv2.weight torch.Size([256, 256]) gcns.1.blocks.2.conv2.bias torch.Size([256]) gcns.1.blocks.3.conv1.loop_weight torch.Size([256, 256]) gcns.1.blocks.3.conv1.weight torch.Size([256, 256]) gcns.1.blocks.3.conv1.bias torch.Size([256]) gcns.1.blocks.3.conv2.loop_weight torch.Size([256, 256]) gcns.1.blocks.3.conv2.weight torch.Size([256, 256]) gcns.1.blocks.3.conv2.bias torch.Size([256]) gcns.1.blocks.4.conv1.loop_weight torch.Size([256, 256]) gcns.1.blocks.4.conv1.weight torch.Size([256, 256]) gcns.1.blocks.4.conv1.bias torch.Size([256]) gcns.1.blocks.4.conv2.loop_weight torch.Size([256, 256]) gcns.1.blocks.4.conv2.weight torch.Size([256, 256]) gcns.1.blocks.4.conv2.bias torch.Size([256]) gcns.1.blocks.5.conv1.loop_weight torch.Size([256, 256]) gcns.1.blocks.5.conv1.weight torch.Size([256, 256]) gcns.1.blocks.5.conv1.bias torch.Size([256]) gcns.1.blocks.5.conv2.loop_weight torch.Size([256, 256]) gcns.1.blocks.5.conv2.weight torch.Size([256, 256]) gcns.1.blocks.5.conv2.bias torch.Size([256]) gcns.1.conv2.loop_weight torch.Size([256, 3]) gcns.1.conv2.weight torch.Size([256, 3]) gcns.1.conv2.bias torch.Size([3]) gcns.2.conv1.loop_weight torch.Size([1219, 256]) gcns.2.conv1.weight torch.Size([1219, 256]) gcns.2.conv1.bias torch.Size([256]) gcns.2.blocks.0.conv1.loop_weight torch.Size([256, 256]) gcns.2.blocks.0.conv1.weight torch.Size([256, 256]) gcns.2.blocks.0.conv1.bias torch.Size([256]) gcns.2.blocks.0.conv2.loop_weight torch.Size([256, 256]) gcns.2.blocks.0.conv2.weight torch.Size([256, 256]) gcns.2.blocks.0.conv2.bias torch.Size([256]) gcns.2.blocks.1.conv1.loop_weight torch.Size([256, 256]) gcns.2.blocks.1.conv1.weight torch.Size([256, 256]) gcns.2.blocks.1.conv1.bias torch.Size([256]) gcns.2.blocks.1.conv2.loop_weight torch.Size([256, 256]) gcns.2.blocks.1.conv2.weight torch.Size([256, 256]) gcns.2.blocks.1.conv2.bias torch.Size([256]) gcns.2.blocks.2.conv1.loop_weight torch.Size([256, 256]) gcns.2.blocks.2.conv1.weight torch.Size([256, 256]) gcns.2.blocks.2.conv1.bias torch.Size([256]) gcns.2.blocks.2.conv2.loop_weight torch.Size([256, 256]) gcns.2.blocks.2.conv2.weight torch.Size([256, 256]) gcns.2.blocks.2.conv2.bias torch.Size([256]) gcns.2.blocks.3.conv1.loop_weight torch.Size([256, 256]) gcns.2.blocks.3.conv1.weight torch.Size([256, 256]) gcns.2.blocks.3.conv1.bias torch.Size([256]) gcns.2.blocks.3.conv2.loop_weight torch.Size([256, 256]) gcns.2.blocks.3.conv2.weight torch.Size([256, 256]) gcns.2.blocks.3.conv2.bias torch.Size([256]) gcns.2.blocks.4.conv1.loop_weight torch.Size([256, 256]) gcns.2.blocks.4.conv1.weight torch.Size([256, 256]) gcns.2.blocks.4.conv1.bias torch.Size([256]) gcns.2.blocks.4.conv2.loop_weight torch.Size([256, 256]) gcns.2.blocks.4.conv2.weight torch.Size([256, 256]) gcns.2.blocks.4.conv2.bias torch.Size([256]) gcns.2.blocks.5.conv1.loop_weight torch.Size([256, 256]) gcns.2.blocks.5.conv1.weight torch.Size([256, 256]) gcns.2.blocks.5.conv1.bias torch.Size([256]) gcns.2.blocks.5.conv2.loop_weight torch.Size([256, 256]) gcns.2.blocks.5.conv2.weight torch.Size([256, 256]) gcns.2.blocks.5.conv2.bias torch.Size([256]) gcns.2.conv2.loop_weight torch.Size([256, 256]) gcns.2.conv2.weight torch.Size([256, 256]) gcns.2.conv2.bias torch.Size([256]) gconv.loop_weight torch.Size([256, 3]) gconv.weight torch.Size([256, 3]) gconv.bias torch.Size([3]) ================================================ FILE: utils/migrations/official_config_tensorflow_256.txt ================================================ gcn/Conv2D/W:0 (3, 3, 3, 16) gcn/Conv2D/b:0 (16,) gcn/Conv2D_1/W:0 (3, 3, 16, 16) gcn/Conv2D_1/b:0 (16,) gcn/Conv2D_2/W:0 (3, 3, 16, 32) gcn/Conv2D_2/b:0 (32,) gcn/Conv2D_3/W:0 (3, 3, 32, 32) gcn/Conv2D_3/b:0 (32,) gcn/Conv2D_4/W:0 (3, 3, 32, 32) gcn/Conv2D_4/b:0 (32,) gcn/Conv2D_5/W:0 (3, 3, 32, 64) gcn/Conv2D_5/b:0 (64,) gcn/Conv2D_6/W:0 (3, 3, 64, 64) gcn/Conv2D_6/b:0 (64,) gcn/Conv2D_7/W:0 (3, 3, 64, 64) gcn/Conv2D_7/b:0 (64,) gcn/Conv2D_8/W:0 (3, 3, 64, 128) gcn/Conv2D_8/b:0 (128,) gcn/Conv2D_9/W:0 (3, 3, 128, 128) gcn/Conv2D_9/b:0 (128,) gcn/Conv2D_10/W:0 (3, 3, 128, 128) gcn/Conv2D_10/b:0 (128,) gcn/Conv2D_11/W:0 (5, 5, 128, 256) gcn/Conv2D_11/b:0 (256,) gcn/Conv2D_12/W:0 (3, 3, 256, 256) gcn/Conv2D_12/b:0 (256,) gcn/Conv2D_13/W:0 (3, 3, 256, 256) gcn/Conv2D_13/b:0 (256,) gcn/Conv2D_14/W:0 (5, 5, 256, 512) gcn/Conv2D_14/b:0 (512,) gcn/Conv2D_15/W:0 (3, 3, 512, 512) gcn/Conv2D_15/b:0 (512,) gcn/Conv2D_16/W:0 (3, 3, 512, 512) gcn/Conv2D_16/b:0 (512,) gcn/Conv2D_17/W:0 (3, 3, 512, 512) gcn/Conv2D_17/b:0 (512,) gcn/graphconvolution_1_vars/weights_0:0 (963, 256) gcn/graphconvolution_1_vars/weights_1:0 (963, 256) gcn/graphconvolution_1_vars/bias:0 (256,) gcn/graphconvolution_2_vars/weights_0:0 (256, 256) gcn/graphconvolution_2_vars/weights_1:0 (256, 256) gcn/graphconvolution_2_vars/bias:0 (256,) gcn/graphconvolution_3_vars/weights_0:0 (256, 256) gcn/graphconvolution_3_vars/weights_1:0 (256, 256) gcn/graphconvolution_3_vars/bias:0 (256,) gcn/graphconvolution_4_vars/weights_0:0 (256, 256) gcn/graphconvolution_4_vars/weights_1:0 (256, 256) gcn/graphconvolution_4_vars/bias:0 (256,) gcn/graphconvolution_5_vars/weights_0:0 (256, 256) gcn/graphconvolution_5_vars/weights_1:0 (256, 256) gcn/graphconvolution_5_vars/bias:0 (256,) gcn/graphconvolution_6_vars/weights_0:0 (256, 256) gcn/graphconvolution_6_vars/weights_1:0 (256, 256) gcn/graphconvolution_6_vars/bias:0 (256,) gcn/graphconvolution_7_vars/weights_0:0 (256, 256) gcn/graphconvolution_7_vars/weights_1:0 (256, 256) gcn/graphconvolution_7_vars/bias:0 (256,) gcn/graphconvolution_8_vars/weights_0:0 (256, 256) gcn/graphconvolution_8_vars/weights_1:0 (256, 256) gcn/graphconvolution_8_vars/bias:0 (256,) gcn/graphconvolution_9_vars/weights_0:0 (256, 256) gcn/graphconvolution_9_vars/weights_1:0 (256, 256) gcn/graphconvolution_9_vars/bias:0 (256,) gcn/graphconvolution_10_vars/weights_0:0 (256, 256) gcn/graphconvolution_10_vars/weights_1:0 (256, 256) gcn/graphconvolution_10_vars/bias:0 (256,) gcn/graphconvolution_11_vars/weights_0:0 (256, 256) gcn/graphconvolution_11_vars/weights_1:0 (256, 256) gcn/graphconvolution_11_vars/bias:0 (256,) gcn/graphconvolution_12_vars/weights_0:0 (256, 256) gcn/graphconvolution_12_vars/weights_1:0 (256, 256) gcn/graphconvolution_12_vars/bias:0 (256,) gcn/graphconvolution_13_vars/weights_0:0 (256, 256) gcn/graphconvolution_13_vars/weights_1:0 (256, 256) gcn/graphconvolution_13_vars/bias:0 (256,) gcn/graphconvolution_14_vars/weights_0:0 (256, 3) gcn/graphconvolution_14_vars/weights_1:0 (256, 3) gcn/graphconvolution_14_vars/bias:0 (3,) gcn/graphconvolution_15_vars/weights_0:0 (1219, 256) gcn/graphconvolution_15_vars/weights_1:0 (1219, 256) gcn/graphconvolution_15_vars/bias:0 (256,) gcn/graphconvolution_16_vars/weights_0:0 (256, 256) gcn/graphconvolution_16_vars/weights_1:0 (256, 256) gcn/graphconvolution_16_vars/bias:0 (256,) gcn/graphconvolution_17_vars/weights_0:0 (256, 256) gcn/graphconvolution_17_vars/weights_1:0 (256, 256) gcn/graphconvolution_17_vars/bias:0 (256,) gcn/graphconvolution_18_vars/weights_0:0 (256, 256) gcn/graphconvolution_18_vars/weights_1:0 (256, 256) gcn/graphconvolution_18_vars/bias:0 (256,) gcn/graphconvolution_19_vars/weights_0:0 (256, 256) gcn/graphconvolution_19_vars/weights_1:0 (256, 256) gcn/graphconvolution_19_vars/bias:0 (256,) gcn/graphconvolution_20_vars/weights_0:0 (256, 256) gcn/graphconvolution_20_vars/weights_1:0 (256, 256) gcn/graphconvolution_20_vars/bias:0 (256,) gcn/graphconvolution_21_vars/weights_0:0 (256, 256) gcn/graphconvolution_21_vars/weights_1:0 (256, 256) gcn/graphconvolution_21_vars/bias:0 (256,) gcn/graphconvolution_22_vars/weights_0:0 (256, 256) gcn/graphconvolution_22_vars/weights_1:0 (256, 256) gcn/graphconvolution_22_vars/bias:0 (256,) gcn/graphconvolution_23_vars/weights_0:0 (256, 256) gcn/graphconvolution_23_vars/weights_1:0 (256, 256) gcn/graphconvolution_23_vars/bias:0 (256,) gcn/graphconvolution_24_vars/weights_0:0 (256, 256) gcn/graphconvolution_24_vars/weights_1:0 (256, 256) gcn/graphconvolution_24_vars/bias:0 (256,) gcn/graphconvolution_25_vars/weights_0:0 (256, 256) gcn/graphconvolution_25_vars/weights_1:0 (256, 256) gcn/graphconvolution_25_vars/bias:0 (256,) gcn/graphconvolution_26_vars/weights_0:0 (256, 256) gcn/graphconvolution_26_vars/weights_1:0 (256, 256) gcn/graphconvolution_26_vars/bias:0 (256,) gcn/graphconvolution_27_vars/weights_0:0 (256, 256) gcn/graphconvolution_27_vars/weights_1:0 (256, 256) gcn/graphconvolution_27_vars/bias:0 (256,) gcn/graphconvolution_28_vars/weights_0:0 (256, 3) gcn/graphconvolution_28_vars/weights_1:0 (256, 3) gcn/graphconvolution_28_vars/bias:0 (3,) gcn/graphconvolution_29_vars/weights_0:0 (1219, 256) gcn/graphconvolution_29_vars/weights_1:0 (1219, 256) gcn/graphconvolution_29_vars/bias:0 (256,) gcn/graphconvolution_30_vars/weights_0:0 (256, 256) gcn/graphconvolution_30_vars/weights_1:0 (256, 256) gcn/graphconvolution_30_vars/bias:0 (256,) gcn/graphconvolution_31_vars/weights_0:0 (256, 256) gcn/graphconvolution_31_vars/weights_1:0 (256, 256) gcn/graphconvolution_31_vars/bias:0 (256,) gcn/graphconvolution_32_vars/weights_0:0 (256, 256) gcn/graphconvolution_32_vars/weights_1:0 (256, 256) gcn/graphconvolution_32_vars/bias:0 (256,) gcn/graphconvolution_33_vars/weights_0:0 (256, 256) gcn/graphconvolution_33_vars/weights_1:0 (256, 256) gcn/graphconvolution_33_vars/bias:0 (256,) gcn/graphconvolution_34_vars/weights_0:0 (256, 256) gcn/graphconvolution_34_vars/weights_1:0 (256, 256) gcn/graphconvolution_34_vars/bias:0 (256,) gcn/graphconvolution_35_vars/weights_0:0 (256, 256) gcn/graphconvolution_35_vars/weights_1:0 (256, 256) gcn/graphconvolution_35_vars/bias:0 (256,) gcn/graphconvolution_36_vars/weights_0:0 (256, 256) gcn/graphconvolution_36_vars/weights_1:0 (256, 256) gcn/graphconvolution_36_vars/bias:0 (256,) gcn/graphconvolution_37_vars/weights_0:0 (256, 256) gcn/graphconvolution_37_vars/weights_1:0 (256, 256) gcn/graphconvolution_37_vars/bias:0 (256,) gcn/graphconvolution_38_vars/weights_0:0 (256, 256) gcn/graphconvolution_38_vars/weights_1:0 (256, 256) gcn/graphconvolution_38_vars/bias:0 (256,) gcn/graphconvolution_39_vars/weights_0:0 (256, 256) gcn/graphconvolution_39_vars/weights_1:0 (256, 256) gcn/graphconvolution_39_vars/bias:0 (256,) gcn/graphconvolution_40_vars/weights_0:0 (256, 256) gcn/graphconvolution_40_vars/weights_1:0 (256, 256) gcn/graphconvolution_40_vars/bias:0 (256,) gcn/graphconvolution_41_vars/weights_0:0 (256, 256) gcn/graphconvolution_41_vars/weights_1:0 (256, 256) gcn/graphconvolution_41_vars/bias:0 (256,) gcn/graphconvolution_42_vars/weights_0:0 (256, 128) gcn/graphconvolution_42_vars/weights_1:0 (256, 128) gcn/graphconvolution_42_vars/bias:0 (128,) gcn/graphconvolution_43_vars/weights_0:0 (128, 3) gcn/graphconvolution_43_vars/weights_1:0 (128, 3) gcn/graphconvolution_43_vars/bias:0 (3,) ================================================ FILE: utils/migrations/official_model_converter.py ================================================ import pickle import torch import numpy as np with open("checkpoints/debug/migration/p2m-tensorflow.pkl", "rb") as f: official = pickle.load(f) for k, v in official.items(): print(k, v.shape) with open("checkpoints/debug/host_template_256/000001_000001.pt", "rb") as f: host = torch.load(f) for k, v in host["model"].items(): print(k, v.shape) with open("utils/migrations/official_config_pytorch_256.txt", "r") as f: pt_names = [line.split()[0] for line in f.readlines()] with open("utils/migrations/official_config_tensorflow_256.txt", "r") as f: tf_names = [line.split()[0] for line in f.readlines()] for pt, tf in zip(pt_names, tf_names): if host["model"][pt].shape != official[tf].shape: data = np.transpose(official[tf], (3, 2, 0, 1)) else: data = official[tf] print(pt, tf, host["model"][pt].data.shape, data.shape) host["model"][pt].data = torch.from_numpy(data) torch.save(host, "checkpoints/debug/migration/network_official.pt") ================================================ FILE: utils/migrations/tensorflow_to_pkl.py ================================================ import pickle import tensorflow as tf from tensorflow.python.framework import ops nn_distance_module = tf.load_op_library('tf_ops/libtf_nndistance.so') def nn_distance(xyz1, xyz2): ''' Computes the distance of nearest neighbors for a pair of point clouds input: xyz1: (batch_size,#points_1,3) the first point cloud input: xyz2: (batch_size,#points_2,3) the second point cloud output: dist1: (batch_size,#point_1) distance from first to second output: idx1: (batch_size,#point_1) nearest neighbor from first to second output: dist2: (batch_size,#point_2) distance from second to first output: idx2: (batch_size,#point_2) nearest neighbor from second to first ''' return nn_distance_module.nn_distance(xyz1, xyz2) @ops.RegisterGradient('NnDistance') def _nn_distance_grad(op, grad_dist1, grad_idx1, grad_dist2, grad_idx2): xyz1 = op.inputs[0] xyz2 = op.inputs[1] idx1 = op.outputs[1] idx2 = op.outputs[3] return nn_distance_module.nn_distance_grad(xyz1, xyz2, grad_dist1, idx1, grad_dist2, idx2) pickle_format = dict() with tf.Session() as sess: new_saver = tf.train.import_meta_graph('checkpoint/gcn.ckpt.meta') what = new_saver.restore(sess, 'checkpoint/gcn.ckpt') all_vars = tf.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) for v in all_vars: try: v_ = sess.run(v) pickle_format[v.name] = v_ except: pass with open("result.pkl", "wb") as f: pickle.dump(pickle_format, f) ================================================ FILE: utils/migrations/validate_dataset_all.py ================================================ import os import sys import requests from tqdm import tqdm def go(file_path, subset): shapenet_root = "datasets/data/shapenet" with open(file_path, "r") as f, open(os.path.join(shapenet_root, "meta", subset + "_all.txt"), "w") as g: for line in tqdm(f.readlines()): _, _, label, filename, _, index = line.strip().split("/") converted = label + "_" + filename + "_" + index file_path = os.path.join(shapenet_root, "data", label + "/" + filename + "_" + index) if not os.path.exists(file_path): print("fail! " + file_path) continue print(converted, file=g) go(sys.argv[1], "train") go(sys.argv[2], "test") ================================================ FILE: utils/tensor.py ================================================ """ Helper functions that have not yet been implemented in pytorch """ import torch def recursive_detach(t): if isinstance(t, torch.Tensor): return t.detach() elif isinstance(t, list): return [recursive_detach(x) for x in t] elif isinstance(t, dict): return {k: recursive_detach(v) for k, v in t.items()} else: return t def batch_mm(matrix, batch): """ https://github.com/pytorch/pytorch/issues/14489 """ # TODO: accelerate this with batch operations return torch.stack([matrix.mm(b) for b in batch], dim=0) def dot(x, y, sparse=False): """Wrapper for torch.matmul (sparse vs dense).""" if sparse: return batch_mm(x, y) else: return torch.matmul(x, y) ================================================ FILE: utils/vis/renderer.py ================================================ import cv2 import neural_renderer as nr import numpy as np import torch def _process_render_result(img, height, width): if isinstance(img, torch.Tensor): img = img.cpu().numpy() if img.ndim == 2: # assuming single channel image img = np.expand_dims(img, axis=0) if img.shape[-1] == 3: # assuming [height, width, rgb] img = np.moveaxis(img, -1, 0) # return 3 * width * height or width * height, in range [0, 1] return np.clip(img[:height, :width], 0, 1) def _mix_render_result_with_image(rgb, alpha, image): alpha = np.expand_dims(alpha, 0) return alpha * rgb + (1 - alpha) * image class MeshRenderer(object): def __init__(self, camera_f, camera_c, mesh_pos): self.colors = {'pink': np.array([.9, .7, .7]), 'light_blue': np.array([0.65098039, 0.74117647, 0.85882353]), 'light_green': np.array([165., 216., 168.]) / 255, 'purple': np.array([216., 193., 165.]) / 255, 'orange': np.array([216., 165., 213.]) / 255, 'light_yellow': np.array([213., 216., 165.]) / 255, } self.camera_f, self.camera_c, self.mesh_pos = camera_f, camera_c, mesh_pos self.renderer = nr.Renderer(camera_mode='projection', light_intensity_directional=.8, light_intensity_ambient=.3, background_color=[1., 1., 1.], light_direction=[0., 0., -1.]) def _render_mesh(self, vertices: np.ndarray, faces: np.ndarray, width, height, camera_k, camera_dist_coeffs, rvec, tvec, color=None): # render a square image, then crop img_size = max(height, width) # This is not thread safe! self.renderer.image_size = img_size vertices = torch.tensor(vertices, dtype=torch.float32) faces = torch.tensor(faces, dtype=torch.int32) if color is None: color = 'light_blue' color = self.colors[color] texture_size = 2 textures = torch.tensor(color, dtype=torch.float32) \ .repeat(faces.size(0), texture_size, texture_size, texture_size, 1) camera_k = torch.tensor(camera_k, dtype=torch.float32) rotmat = torch.tensor(cv2.Rodrigues(rvec)[0], dtype=torch.float32) tvec = torch.tensor(tvec, dtype=torch.float32) camera_dist_coeffs = torch.tensor(camera_dist_coeffs, dtype=torch.float32) rgb, _, alpha = self.renderer.render(vertices.unsqueeze(0).cuda(), faces.unsqueeze(0).cuda(), textures.unsqueeze(0).cuda(), K=camera_k.unsqueeze(0).cuda(), R=rotmat.unsqueeze(0).cuda(), t=tvec.unsqueeze(0).cuda(), dist_coeffs=camera_dist_coeffs.unsqueeze(0).cuda(), orig_size=img_size) # use the extra dimension of alpha for broadcasting alpha = _process_render_result(alpha[0], height, width) rgb = _process_render_result(rgb[0], height, width) return rgb, alpha def _render_pointcloud(self, vertices: np.ndarray, width, height, camera_k, camera_dist_coeffs, rvec, tvec, color=None): if color is None: color = 'pink' color = self.colors[color] # return pointcloud vertices_2d = cv2.projectPoints(np.expand_dims(vertices, -1), rvec, tvec, camera_k, camera_dist_coeffs)[0] vertices_2d = np.reshape(vertices_2d, (-1, 2)) alpha = np.zeros((height, width, 3), np.float) whiteboard = np.ones((3, height, width), np.float) if np.isnan(vertices_2d).any(): return whiteboard, alpha for x, y in vertices_2d: cv2.circle(alpha, (int(x), int(y)), radius=1, color=(1., 1., 1.), thickness=-1) rgb = _process_render_result(alpha * color[None, None, :], height, width) alpha = _process_render_result(alpha[:, :, 0], height, width) rgb = _mix_render_result_with_image(rgb, alpha[0], whiteboard) return rgb, alpha def visualize_reconstruction(self, gt_coord, coord, faces, image, mesh_only=False, **kwargs): camera_k = np.array([[self.camera_f[0], 0, self.camera_c[0]], [0, self.camera_f[1], self.camera_c[1]], [0, 0, 1]]) # inverse y and z, equivalent to inverse x, but gives positive z rvec = np.array([np.pi, 0., 0.], dtype=np.float32) tvec = np.zeros(3, dtype=np.float32) dist_coeffs = np.zeros(5, dtype=np.float32) mesh, _ = self._render_mesh(coord, faces, image.shape[2], image.shape[1], camera_k, dist_coeffs, rvec, tvec, **kwargs) if mesh_only: return mesh gt_pc, _ = self._render_pointcloud(gt_coord, image.shape[2], image.shape[1], camera_k, dist_coeffs, rvec, tvec, **kwargs) pred_pc, _ = self._render_pointcloud(coord, image.shape[2], image.shape[1], camera_k, dist_coeffs, rvec, tvec, **kwargs) return np.concatenate((image, gt_pc, pred_pc, mesh), 2) def p2m_batch_visualize(self, batch_input, batch_output, faces, atmost=3): """ Every thing is tensor for now, needs to move to cpu and convert to numpy """ batch_size = min(batch_input["images_orig"].size(0), atmost) images_stack = [] mesh_pos = np.array(self.mesh_pos) for i in range(batch_size): image = batch_input["images_orig"][i].cpu().numpy() gt_points = batch_input["points"][i].cpu().numpy() + mesh_pos for j in range(3): for k in (["pred_coord_before_deform", "pred_coord"] if j == 0 else ["pred_coord"]): coord = batch_output[k][j][i].cpu().numpy() + mesh_pos images_stack.append(self.visualize_reconstruction(gt_points, coord, faces[j].cpu().numpy(), image)) return torch.from_numpy(np.concatenate(images_stack, 1))