Repository: googleinterns/deep-stabilization Branch: master Commit: 7159c09d21ae Files: 65 Total size: 42.8 MB Directory structure: gitextract__lkvtuhi/ ├── .gitignore ├── LICENSE ├── README.md ├── docs/ │ ├── code-of-conduct.md │ └── contributing.md └── dvs/ ├── checkpoint/ │ └── stabilzation/ │ └── stabilzation_last.checkpoint ├── conf/ │ ├── stabilzation.yaml │ └── stabilzation_train.yaml ├── dataset.py ├── flownet2/ │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── convert.py │ ├── datasets.py │ ├── install.sh │ ├── losses.py │ ├── main.py │ ├── models.py │ ├── networks/ │ │ ├── FlowNetC.py │ │ ├── FlowNetFusion.py │ │ ├── FlowNetS.py │ │ ├── FlowNetSD.py │ │ ├── __init__.py │ │ ├── channelnorm_package/ │ │ │ ├── __init__.py │ │ │ ├── channelnorm.py │ │ │ ├── channelnorm_cuda.cc │ │ │ ├── channelnorm_kernel.cu │ │ │ ├── channelnorm_kernel.cuh │ │ │ └── setup.py │ │ ├── correlation_package/ │ │ │ ├── __init__.py │ │ │ ├── correlation.py │ │ │ ├── correlation_cuda.cc │ │ │ ├── correlation_cuda_kernel.cu │ │ │ ├── correlation_cuda_kernel.cuh │ │ │ └── setup.py │ │ ├── resample2d_package/ │ │ │ ├── __init__.py │ │ │ ├── resample2d.py │ │ │ ├── resample2d_cuda.cc │ │ │ ├── resample2d_kernel.cu │ │ │ ├── resample2d_kernel.cuh │ │ │ └── setup.py │ │ └── submodules.py │ ├── run.sh │ ├── run_release.sh │ └── utils/ │ ├── __init__.py │ ├── flow_utils.py │ ├── frame_utils.py │ ├── param_utils.py │ └── tools.py ├── gyro/ │ ├── __init__.py │ ├── gyro_function.py │ └── gyro_io.py ├── inference.py ├── load_frame_sensor_data.py ├── loss.py ├── metrics.py ├── model.py ├── printer.py ├── requirements.txt ├── train.py ├── util.py └── warp/ ├── __init__.py ├── rasterizer.py ├── read_write.py └── warping.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.pyc .torch _ext *.o _ext/ *.png *.jpg *.tar log/* ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # Deep Online Fused Video Stabilization [[Paper]](https://openaccess.thecvf.com/content/WACV2022/papers/Shi_Deep_Online_Fused_Video_Stabilization_WACV_2022_paper.pdf)[[Supplementary]](https://zhmeishi.github.io/dvs/paper/dvs_supp.pdf) [[Project Page]](https://zhmeishi.github.io/dvs/) [[Dataset]](https://storage.googleapis.com/dataset_release/all.zip) [[Our Result]](https://storage.googleapis.com/dataset_release/inference_result_release.zip) [[More Results]](https://zhmeishi.github.io/dvs/supp/results.html) This repository contains the Pytorch implementation of our method in the paper "Deep Online Fused Video Stabilization". ## Environment Setting Python version >= 3.6 Pytorch with CUDA >= 1.0.0 (guide is [here](https://pytorch.org/get-started/locally/)) Install other used packages: ``` cd dvs pip install -r requirements.txt --ignore-installed ``` ## Data Preparation Download sample video [here](https://drive.google.com/file/d/1PpF3-6BbQKy9fldjIfwa5AlbtQflx3sG/view?usp=sharing). Uncompress the *video* folder under the *dvs* folder. ``` python load_frame_sensor_data.py ``` Demo of curve visualization: The **gyro/OIS curve visualization** can be found at *dvs/video/s_114_outdoor_running_trail_daytime/ControlCam_20200930_104820_real.jpg*. ## FlowNet2 Preparation Note, we provide optical flow result of one test video in our Data Preparation. If you would like to generate them for all test videos, please follow [FlowNet2 official website](https://github.com/NVIDIA/flownet2-pytorch) and guide below. Otherwise, you can skip this section. Note, FlowNet2 installation is tricky. Please use Python=3.6 and Pytorch=1.0.0. More details are [here](https://github.com/NVIDIA/flownet2-pytorch/issues/156) or contact us for any questions. Download FlowNet2 model *FlowNet2_checkpoint.pth.tar* [here](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view). Move it under folder *dvs/flownet2*. ``` python warp/read_write.py # video2frames cd flownet2 bash install.sh # install package bash run.sh # generate optical flow file for dataset ``` ## Running Inference ``` python inference.py python metrics.py ``` The loss and metric information will be printed in the terminal. The metric numbers can be slightly different due to difference on opencv/pytorch versions. The result is under *dvs/test/stabilzation*. In *s_114_outdoor_running_trail_daytime.jpg*, the blue curve is the output of our models, and the green curve is the input. *s_114_outdoor_running_trail_daytime_stab.mp4* is uncropped stabilized video. *s_114_outdoor_running_trail_daytime_stab_crop.mp4* is cropped stabilized video. Note, the cropped video is generated after running the metrics code. ## Training Download dataset for training and test [here](https://storage.googleapis.com/dataset_release/all.zip). Uncompress *all.zip* and move *dataset_release* folder under the *dvs* folder. Follow FlowNet2 Preparation Section. ``` python warp/read_write.py --dir_path ./dataset_release # video2frames cd flownet2 bash run_release.sh # generate optical flow file for dataset ``` Run training code. ``` python train.py ``` The model is saved in *checkpoint/stabilzation_train*. ## Citation If you use this code or dataset for your research, please cite our paper. ``` @inproceedings{shi2022deep, title={Deep Online Fused Video Stabilization}, author={Shi, Zhenmei and Shi, Fuhao and Lai, Wei-Sheng and Liang, Chia-Kai and Liang, Yingyu}, booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision}, pages={1250--1258}, year={2022} } ``` ================================================ FILE: docs/code-of-conduct.md ================================================ # Google Open Source Community Guidelines At Google, we recognize and celebrate the creativity and collaboration of open source contributors and the diversity of skills, experiences, cultures, and opinions they bring to the projects and communities they participate in. Every one of Google's open source projects and communities are inclusive environments, based on treating all individuals respectfully, regardless of gender identity and expression, sexual orientation, disabilities, neurodiversity, physical appearance, body size, ethnicity, nationality, race, age, religion, or similar personal characteristic. We value diverse opinions, but we value respectful behavior more. Respectful behavior includes: * Being considerate, kind, constructive, and helpful. * Not engaging in demeaning, discriminatory, harassing, hateful, sexualized, or physically threatening behavior, speech, and imagery. * Not engaging in unwanted physical contact. Some Google open source projects [may adopt][] an explicit project code of conduct, which may have additional detailed expectations for participants. Most of those projects will use our [modified Contributor Covenant][]. [may adopt]: https://opensource.google/docs/releasing/preparing/#conduct [modified Contributor Covenant]: https://opensource.google/docs/releasing/template/CODE_OF_CONDUCT/ ## Resolve peacefully We do not believe that all conflict is necessarily bad; healthy debate and disagreement often yields positive results. However, it is never okay to be disrespectful. If you see someone behaving disrespectfully, you are encouraged to address the behavior directly with those involved. Many issues can be resolved quickly and easily, and this gives people more control over the outcome of their dispute. If you are unable to resolve the matter for any reason, or if the behavior is threatening or harassing, report it. We are dedicated to providing an environment where participants feel welcome and safe. ## Reporting problems Some Google open source projects may adopt a project-specific code of conduct. In those cases, a Google employee will be identified as the Project Steward, who will receive and handle reports of code of conduct violations. In the event that a project hasn’t identified a Project Steward, you can report problems by emailing opensource@google.com. We will investigate every complaint, but you may not receive a direct response. We will use our discretion in determining when and how to follow up on reported incidents, which may range from not taking action to permanent expulsion from the project and project-sponsored spaces. We will notify the accused of the report and provide them an opportunity to discuss it before any action is taken. The identity of the reporter will be omitted from the details of the report supplied to the accused. In potentially harmful situations, such as ongoing harassment or threats to anyone's safety, we may take action without notice. *This document was adapted from the [IndieWeb Code of Conduct][] and can also be found at .* [IndieWeb Code of Conduct]: https://indieweb.org/code-of-conduct ================================================ FILE: docs/contributing.md ================================================ # How to Contribute We'd love to accept your patches and contributions to this project. There are just a few small guidelines you need to follow. ## Contributor License Agreement Contributions to this project must be accompanied by a Contributor License Agreement. You (or your employer) retain the copyright to your contribution; this simply gives us permission to use and redistribute your contributions as part of the project. Head over to to see your current agreements on file or to sign a new one. You generally only need to submit a CLA once, so if you've already submitted one (even if it was for a different project), you probably don't need to do it again. ## Code reviews All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests. ## Community Guidelines This project follows [Google's Open Source Community Guidelines](https://opensource.google/conduct/). ================================================ FILE: dvs/checkpoint/stabilzation/stabilzation_last.checkpoint ================================================ [File too large to display: 42.5 MB] ================================================ FILE: dvs/conf/stabilzation.yaml ================================================ data: exp: 'stabilzation' checkpoints_dir: './checkpoint' log: './log' data_dir: './video' use_cuda: true batch_size: 16 resize_ratio: 0.25 number_real: 10 number_virtual: 2 time_train: 2000 # ms sample_freq: 40 # ms channel_size: 1 num_workers: 16 # num_workers for data_loader model: load_model: null cnn: activate_function: relu # sigmoid, relu, tanh, quadratic batch_norm: true gap: false layers: rnn: layers: - - 512 - true - - 512 - true fc: activate_function: relu batch_norm: false # (batch_norm and drop_out) is False layers: - - 256 - true - - 4 # last layer should be equal to nr_class - true drop_out: 0 train: optimizer: "adam" # adam or sgd momentum: 0.9 # for sgd decay_epoch: null epoch: 400 snapshot: 2 init_lr: 0.0001 lr_decay: 0.5 lr_step: 200 # if > 0 decay_epoch should be null seed: 1 weight_decay: 0.0001 clip_norm: False init: "xavier_uniform" # xavier_uniform or xavier_normal loss: follow: 10 angle: 1 smooth: 10 #10 c2_smooth: 200 #20 undefine: 2.0 opt: 0.1 stay: 0 ================================================ FILE: dvs/conf/stabilzation_train.yaml ================================================ data: exp: 'stabilzation_train' checkpoints_dir: './checkpoint' log: './log' data_dir: './dataset_release' use_cuda: true batch_size: 16 resize_ratio: 0.25 number_real: 10 number_virtual: 2 time_train: 2000 # ms sample_freq: 40 # ms channel_size: 1 num_workers: 16 # num_workers for data_loader model: load_model: null cnn: activate_function: relu # sigmoid, relu, tanh, quadratic batch_norm: true gap: false layers: rnn: layers: - - 512 - true - - 512 - true fc: activate_function: relu batch_norm: false # (batch_norm and drop_out) is False layers: - - 256 - true - - 4 # last layer should be equal to nr_class - true drop_out: 0 train: optimizer: "adam" # adam or sgd momentum: 0.9 # for sgd decay_epoch: null epoch: 400 snapshot: 2 init_lr: 0.0001 lr_decay: 0.5 lr_step: 200 # if > 0 decay_epoch should be null seed: 1 weight_decay: 0.0001 clip_norm: False init: "xavier_uniform" # xavier_uniform or xavier_normal loss: follow: 10 angle: 1 smooth: 10 #10 c2_smooth: 200 #20 undefine: 2.0 opt: 0.1 stay: 0 ================================================ FILE: dvs/dataset.py ================================================ from torch.utils.data import Dataset import os import collections from gyro import ( LoadGyroData, LoadOISData, LoadFrameData, GetGyroAtTimeStamp, get_static, GetMetadata, GetProjections, train_GetGyroAtTimeStamp, QuaternionProduct, QuaternionReciprocal, FindOISAtTimeStamp, norm_quat ) import random import numpy as np import torchvision.transforms as transforms import torch from flownet2 import flow_utils from scipy import ndimage, misc from numpy import linalg as LA def get_data_loader(cf, no_flo = False): size = cf["data"]["batch_size"] num_workers = cf["data"]["num_workers"] train_data, test_data = get_dataset(cf, no_flo) trainloader = torch.utils.data.DataLoader(train_data, batch_size=size,shuffle=True, pin_memory=True, num_workers=num_workers) testloader = torch.utils.data.DataLoader(test_data, batch_size=size,shuffle=False, pin_memory=True, num_workers=num_workers) return trainloader,testloader def get_dataset(cf, no_flo = False): resize_ratio = cf["data"]["resize_ratio"] train_transform, test_transform = _data_transforms() train_path = os.path.join(cf["data"]["data_dir"], "training") test_path = os.path.join(cf["data"]["data_dir"], "test") if not os.path.exists(train_path): train_path = cf["data"]["data_dir"] if not os.path.exists(test_path): test_path = cf["data"]["data_dir"] train_data = Dataset_Gyro( train_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"], time_train = cf["data"]["time_train"]*1000000, transform = train_transform, resize_ratio = resize_ratio, no_flo = no_flo) test_data = Dataset_Gyro( test_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"], time_train = cf["data"]["time_train"]*1000000, transform = test_transform, resize_ratio = resize_ratio, no_flo = no_flo) return train_data, test_data def get_inference_data_loader(cf, data_path, no_flo = False): test_data = get_inference_dataset(cf, data_path, no_flo) testloader = torch.utils.data.DataLoader(test_data, batch_size=1,shuffle=False, pin_memory=True, num_workers=1) return testloader def get_inference_dataset(cf, data_path, no_flo = False): resize_ratio = cf["data"]["resize_ratio"] _, test_transform = _data_transforms() test_data = Dataset_Gyro( data_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"], time_train = cf["data"]["time_train"]*1000000, transform = test_transform, resize_ratio = resize_ratio, inference_only = True, no_flo = no_flo) return test_data def _data_transforms(): test_transform = transforms.Compose( [transforms.ToTensor(), ]) train_transform = transforms.Compose( [transforms.ToTensor(), ]) return train_transform, test_transform class DVS_data(): def __init__(self): self.gyro = None self.ois = None self.frame = None self.length = 0 self.flo_path = None self.flo_shape = None self.flo_back_path = None class Dataset_Gyro(Dataset): def __init__(self, path, sample_freq = 33*1000000, number_real = 10, time_train = 2000*1000000, \ transform = None, inference_only = False, no_flo = False, resize_ratio = 1): r""" Arguments: sample_freq: real quaternions [t-sample_freq*number_real, t+sample_freq*number_real] ns number_real: real gyro num in half time_interval time_train: time for a batch ns """ self.sample_freq = sample_freq self.number_real = number_real self.no_flo = no_flo self.resize_ratio = resize_ratio self.static_options = get_static() self.inference_only = inference_only self.ois_ratio = np.array([self.static_options["crop_window_width"] / self.static_options["width"], \ self.static_options["crop_window_height"] / self.static_options["height"]]) * 0.01 self.unit_size = 4 if inference_only: self.length = 1 self.data = [self.process_one_video(path)] self.number_train = self.data[0].length return self.time_train = time_train self.number_train = time_train//self.sample_freq self.data_name = sorted(os.listdir(path)) self.length = len(self.data_name) self.data = [] for i in range(self.length): self.data.append(self.process_one_video(os.path.join(path,self.data_name[i]))) def process_one_video(self, path): dvs_data = DVS_data() files = sorted(os.listdir(path)) print(path) for f in files: file_path = os.path.join(path,f) if "gimbal" in file_path.lower(): continue if "frame" in f and "txt" in f: dvs_data.frame = LoadFrameData(file_path) print("frame:", dvs_data.frame.shape, end=" ") elif "gyro" in f: dvs_data.gyro = LoadGyroData(file_path) dvs_data.gyro = preprocess_gyro(dvs_data.gyro) print("gyro:", dvs_data.gyro.shape, end=" ") elif "ois" in f and "txt" in f: dvs_data.ois = LoadOISData(file_path) print("ois:", dvs_data.ois.shape, end=" ") elif f == "flo": dvs_data.flo_path, dvs_data.flo_shape = LoadFlow(file_path) print("flo_path:", len(dvs_data.flo_path), end=" ") print("flo_shape:", dvs_data.flo_shape, end=" ") elif f == "flo_back": dvs_data.flo_back_path, _ = LoadFlow(file_path) print() if dvs_data.flo_path is not None: dvs_data.length = min(dvs_data.frame.shape[0] - 1, len(dvs_data.flo_path)) else: dvs_data.length = dvs_data.frame.shape[0] - 1 return dvs_data def generate_quaternions(self, dvs_data): first_id = random.randint(0, dvs_data.length - self.number_train) + 1 # skip the first frame sample_data = np.zeros((self.number_train, 2 * self.number_real + 1, self.unit_size), dtype=np.float32) sample_ois = np.zeros((self.number_train, 2), dtype=np.float32) sample_time = np.zeros((self.number_train+1), dtype=np.float32) sample_time[0] = get_timestamp(dvs_data.frame, first_id - 1) real_postion = np.zeros((self.number_train, 4), dtype=np.float32) time_start = sample_time[0] for i in range(self.number_train): sample_time[i+1] = get_timestamp(dvs_data.frame, first_id + i) real_postion[i] = GetGyroAtTimeStamp(dvs_data.gyro, sample_time[i+1] - self.sample_freq) sample_ois[i] = self.get_ois_at_timestamp(dvs_data.ois, sample_time[i+1]) for j in range(-self.number_real, self.number_real+1): index = j + self.number_real time_stamp = sample_time[i+1] + self.sample_freq * j sample_data[i, index] = self.get_data_at_timestamp(dvs_data.gyro, dvs_data.ois, time_stamp, real_postion[i]) sample_data = np.reshape(sample_data, (self.number_train, (2*self.number_real+1) * self.unit_size)) return sample_data, sample_time, first_id, real_postion, sample_ois def load_flo(self, idx, first_id): shape = self.data[idx].flo_shape h, w = shape[0], shape[1] flo = np.zeros((self.number_train, h, w, 2)) flo_back = np.zeros((self.number_train, h, w, 2)) for i in range(self.number_train): frame_id = i + first_id f = flow_utils.readFlow(self.data[idx].flo_path[frame_id-1]).astype(np.float32) flo[i] = f f_b = flow_utils.readFlow(self.data[idx].flo_back_path[frame_id-1]).astype(np.float32) flo_back[i] = f_b return flo, flo_back def load_real_projections(self, idx, first_id): real_projections = np.zeros((self.number_train + 1, self.static_options["num_grid_rows"], 3, 3)) for i in range(self.number_train + 1): frame_id = i + first_id metadata = GetMetadata(self.data[idx].frame, frame_id - 1) real_projections[i] = np.array(GetProjections(self.static_options, metadata, self.data[idx].gyro, np.zeros(self.data[idx].ois.shape), no_shutter = True)) return real_projections def __getitem__(self, idx): inputs, times, first_id, real_postion, ois = self.generate_quaternions(self.data[idx]) real_projections = self.load_real_projections(idx, first_id) if self.no_flo: flo, flo_back = 0, 0 else: flo, flo_back = self.load_flo(idx, first_id) return inputs, times, flo, flo_back, real_projections, real_postion, ois, idx def __len__(self): return self.length def get_virtual_data(self, virtual_queue, real_queue_idx, pre_times, cur_times, time_start, batch_size, number_virtual, quat_t_1): # virtual_queue: [batch_size, num, 5 (timestamp, quats)] # eular angle, # deta R angular velocity [Q't-1, Q't-2] # output virtual angular velocity, x, x*dtime => detaQt virtual_data = np.zeros((batch_size, number_virtual, 4), dtype=np.float32) vt_1 = np.zeros((batch_size, 4), dtype=np.float32) quat_t_1 = quat_t_1.numpy() for i in range(batch_size): sample_time = cur_times[i] for j in range(number_virtual): time_stamp = sample_time - self.sample_freq * (number_virtual - j) virtual_data[i, j] = get_virtual_at_timestamp(virtual_queue[i], self.data[real_queue_idx[i]].gyro, time_stamp, time_start[i], quat_t_1[i]) vt_1[i] = get_virtual_at_timestamp(virtual_queue[i], self.data[real_queue_idx[i]].gyro, pre_times[i], time_start[i], None) virtual_data = np.reshape(virtual_data, (batch_size, number_virtual * 4)) return torch.tensor(virtual_data, dtype=torch.float), torch.tensor(vt_1, dtype=torch.float) def update_virtual_queue(self, batch_size, virtual_queue, out, times): virtual_data = np.zeros((batch_size, 5)) virtual_data[:,0] = times virtual_data[:, 1:] = out virtual_data = np.expand_dims(virtual_data, axis = 1) if None in virtual_queue: virtual_queue = virtual_data else: virtual_queue = np.concatenate((virtual_queue, virtual_data), axis = 1) return virtual_queue def random_init_virtual_queue(self, batch_size, real_postion, times): virtual_queue = np.zeros((batch_size, 3, 5)) virtual_queue[:, 2, 0] = times - 0.1 * self.sample_freq virtual_queue[:, 1, 0] = times - 1.1 * self.sample_freq virtual_queue[:, 0, 0] = times - 2.1 * self.sample_freq for i in range(batch_size): quat = np.random.uniform(low=-0.06, high= 0.06, size=4) # transfer to angle # 0.05 quat[3] = 1 quat = quat / LA.norm(quat) quat = norm_quat(QuaternionProduct(real_postion[i], quat)) virtual_queue[i, 2, 1:] = quat virtual_queue[i, 1, 1:] = quat virtual_queue[i, 0, 1:] = quat return virtual_queue def get_data_at_timestamp(self, gyro_data, ois_data, time_stamp, quat_t_1): quat_t = GetGyroAtTimeStamp(gyro_data, time_stamp) quat_dif = QuaternionProduct(quat_t, QuaternionReciprocal(quat_t_1)) return quat_dif def get_ois_at_timestamp(self, ois_data, time_stamp): ois_t = FindOISAtTimeStamp(ois_data, time_stamp) ois_t = np.array(ois_t) / self.ois_ratio return ois_t def get_timestamp(frame_data, idx): sample_time = frame_data[idx, 0] metadata = GetMetadata(frame_data, idx) timestmap_ns = metadata["timestamp_ns"] + metadata["rs_time_ns"] * 0.5 return timestmap_ns def preprocess_gyro(gyro, extend = 200): fake_gyro = np.zeros((extend, 5)) time_start = gyro[0,0] for i in range(extend): fake_gyro[-i-1, 0] = time_start - (gyro[i+1, 0] - time_start) fake_gyro[-i-1, 4] = gyro[i+1, 4] fake_gyro[-i-1, 1:4] = -gyro[i+1, 1:4] new_gyro = np.concatenate((fake_gyro, gyro), axis = 0) return new_gyro def LoadFlow(path): file_names = sorted(os.listdir(path)) file_path =[] for n in file_names: file_path.append(os.path.join(path, n)) return file_path, flow_utils.readFlow(file_path[0]).shape def get_virtual_at_timestamp(virtual_queue, real_queue, time_stamp, time_start, quat_t_1 = None, sample_freq = None): if virtual_queue is None: quat_t = GetGyroAtTimeStamp(real_queue, time_stamp) else: quat_t = train_GetGyroAtTimeStamp(virtual_queue, time_stamp) if quat_t is None: quat_t = GetGyroAtTimeStamp(real_queue, time_stamp) if quat_t_1 is None: return quat_t else: quat_dif = QuaternionProduct(quat_t, QuaternionReciprocal(quat_t_1)) return quat_dif ================================================ FILE: dvs/flownet2/LICENSE ================================================ Copyright 2017 NVIDIA CORPORATION Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: dvs/flownet2/README.md ================================================ # flownet2-pytorch Pytorch implementation of [FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks](https://arxiv.org/abs/1612.01925). Multiple GPU training is supported, and the code provides examples for training or inference on [MPI-Sintel](http://sintel.is.tue.mpg.de/) clean and final datasets. The same commands can be used for training or inference with other datasets. See below for more detail. Inference using fp16 (half-precision) is also supported. For more help, type
python main.py --help ## Network architectures Below are the different flownet neural network architectures that are provided.
A batchnorm version for each network is also available. - **FlowNet2S** - **FlowNet2C** - **FlowNet2CS** - **FlowNet2CSS** - **FlowNet2SD** - **FlowNet2** ## Custom layers `FlowNet2` or `FlowNet2C*` achitectures rely on custom layers `Resample2d` or `Correlation`.
A pytorch implementation of these layers with cuda kernels are available at [./networks](./networks).
Note : Currently, half precision kernels are not available for these layers. ## Data Loaders Dataloaders for FlyingChairs, FlyingThings, ChairsSDHom and ImagesFromFolder are available in [datasets.py](./datasets.py).
## Loss Functions L1 and L2 losses with multi-scale support are available in [losses.py](./losses.py).
## Installation # get flownet2-pytorch source git clone https://github.com/NVIDIA/flownet2-pytorch.git cd flownet2-pytorch # install custom layers bash install.sh ### Python requirements Currently, the code supports python 3 * numpy * PyTorch ( == 0.4.1, for <= 0.4.0 see branch [python36-PyTorch0.4](https://github.com/NVIDIA/flownet2-pytorch/tree/python36-PyTorch0.4)) * scipy * scikit-image * tensorboardX * colorama, tqdm, setproctitle ## Converted Caffe Pre-trained Models We've included caffe pre-trained models. Should you use these pre-trained weights, please adhere to the [license agreements](https://drive.google.com/file/d/1TVv0BnNFh3rpHZvD-easMb9jYrPE2Eqd/view?usp=sharing). * [FlowNet2](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view?usp=sharing)[620MB] * [FlowNet2-C](https://drive.google.com/file/d/1BFT6b7KgKJC8rA59RmOVAXRM_S7aSfKE/view?usp=sharing)[149MB] * [FlowNet2-CS](https://drive.google.com/file/d/1iBJ1_o7PloaINpa8m7u_7TsLCX0Dt_jS/view?usp=sharing)[297MB] * [FlowNet2-CSS](https://drive.google.com/file/d/157zuzVf4YMN6ABAQgZc8rRmR5cgWzSu8/view?usp=sharing)[445MB] * [FlowNet2-CSS-ft-sd](https://drive.google.com/file/d/1R5xafCIzJCXc8ia4TGfC65irmTNiMg6u/view?usp=sharing)[445MB] * [FlowNet2-S](https://drive.google.com/file/d/1V61dZjFomwlynwlYklJHC-TLfdFom3Lg/view?usp=sharing)[148MB] * [FlowNet2-SD](https://drive.google.com/file/d/1QW03eyYG_vD-dT-Mx4wopYvtPu_msTKn/view?usp=sharing)[173MB] ## Inference # Example on MPISintel Clean python main.py --inference --model FlowNet2 --save_flow --inference_dataset MpiSintelClean \ --inference_dataset_root /path/to/mpi-sintel/clean/dataset \ --resume /path/to/checkpoints ## Training and validation # Example on MPISintel Final and Clean, with L1Loss on FlowNet2 model python main.py --batch_size 8 --model FlowNet2 --loss=L1Loss --optimizer=Adam --optimizer_lr=1e-4 \ --training_dataset MpiSintelFinal --training_dataset_root /path/to/mpi-sintel/final/dataset \ --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset # Example on MPISintel Final and Clean, with MultiScale loss on FlowNet2C model python main.py --batch_size 8 --model FlowNet2C --optimizer=Adam --optimizer_lr=1e-4 --loss=MultiScale --loss_norm=L1 \ --loss_numScales=5 --loss_startScale=4 --optimizer_lr=1e-4 --crop_size 384 512 \ --training_dataset FlyingChairs --training_dataset_root /path/to/flying-chairs/dataset \ --validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset ## Results on MPI-Sintel [![Predicted flows on MPI-Sintel](./image.png)](https://www.youtube.com/watch?v=HtBmabY8aeU "Predicted flows on MPI-Sintel") ## Reference If you find this implementation useful in your work, please acknowledge it appropriately and cite the paper: ```` @InProceedings{IMKDB17, author = "E. Ilg and N. Mayer and T. Saikia and M. Keuper and A. Dosovitskiy and T. Brox", title = "FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks", booktitle = "IEEE Conference on Computer Vision and Pattern Recognition (CVPR)", month = "Jul", year = "2017", url = "http://lmb.informatik.uni-freiburg.de//Publications/2017/IMKDB17" } ```` ``` @misc{flownet2-pytorch, author = {Fitsum Reda and Robert Pottorff and Jon Barker and Bryan Catanzaro}, title = {flownet2-pytorch: Pytorch implementation of FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks}, year = {2017}, publisher = {GitHub}, journal = {GitHub repository}, howpublished = {\url{https://github.com/NVIDIA/flownet2-pytorch}} } ``` ## Related Optical Flow Work from Nvidia Code (in Caffe and Pytorch): [PWC-Net](https://github.com/NVlabs/PWC-Net)
Paper : [PWC-Net: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume](https://arxiv.org/abs/1709.02371). ## Acknowledgments Parts of this code were derived, as noted in the code, from [ClementPinard/FlowNetPytorch](https://github.com/ClementPinard/FlowNetPytorch). ================================================ FILE: dvs/flownet2/__init__.py ================================================ from .utils import flow_utils, tools ================================================ FILE: dvs/flownet2/convert.py ================================================ #!/usr/bin/env python2.7 import caffe from caffe.proto import caffe_pb2 import sys, os import torch import torch.nn as nn import argparse, tempfile import numpy as np parser = argparse.ArgumentParser() parser.add_argument('caffe_model', help='input model in hdf5 or caffemodel format') parser.add_argument('prototxt_template',help='prototxt template') parser.add_argument('flownet2_pytorch', help='path to flownet2-pytorch') args = parser.parse_args() args.rgb_max = 255 args.fp16 = False args.grads = {} # load models sys.path.append(args.flownet2_pytorch) import models from utils.param_utils import * width = 256 height = 256 keys = {'TARGET_WIDTH': width, 'TARGET_HEIGHT': height, 'ADAPTED_WIDTH':width, 'ADAPTED_HEIGHT':height, 'SCALE_WIDTH':1., 'SCALE_HEIGHT':1.,} template = '\n'.join(np.loadtxt(args.prototxt_template, dtype=str, delimiter='\n')) for k in keys: template = template.replace('$%s$'%(k),str(keys[k])) prototxt = tempfile.NamedTemporaryFile(mode='w', delete=True) prototxt.write(template) prototxt.flush() net = caffe.Net(prototxt.name, args.caffe_model, caffe.TEST) weights = {} biases = {} for k, v in list(net.params.items()): weights[k] = np.array(v[0].data).reshape(v[0].data.shape) biases[k] = np.array(v[1].data).reshape(v[1].data.shape) print((k, weights[k].shape, biases[k].shape)) if 'FlowNet2/' in args.caffe_model: model = models.FlowNet2(args) parse_flownetc(model.flownetc.modules(), weights, biases) parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_') parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_') parse_flownetsd(model.flownets_d.modules(), weights, biases, param_prefix='netsd_') parse_flownetfusion(model.flownetfusion.modules(), weights, biases, param_prefix='fuse_') state = {'epoch': 0, 'state_dict': model.state_dict(), 'best_EPE': 1e10} torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2_checkpoint.pth.tar')) elif 'FlowNet2-C/' in args.caffe_model: model = models.FlowNet2C(args) parse_flownetc(model.modules(), weights, biases) state = {'epoch': 0, 'state_dict': model.state_dict(), 'best_EPE': 1e10} torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-C_checkpoint.pth.tar')) elif 'FlowNet2-CS/' in args.caffe_model: model = models.FlowNet2CS(args) parse_flownetc(model.flownetc.modules(), weights, biases) parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_') state = {'epoch': 0, 'state_dict': model.state_dict(), 'best_EPE': 1e10} torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CS_checkpoint.pth.tar')) elif 'FlowNet2-CSS/' in args.caffe_model: model = models.FlowNet2CSS(args) parse_flownetc(model.flownetc.modules(), weights, biases) parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_') parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_') state = {'epoch': 0, 'state_dict': model.state_dict(), 'best_EPE': 1e10} torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS_checkpoint.pth.tar')) elif 'FlowNet2-CSS-ft-sd/' in args.caffe_model: model = models.FlowNet2CSS(args) parse_flownetc(model.flownetc.modules(), weights, biases) parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_') parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_') state = {'epoch': 0, 'state_dict': model.state_dict(), 'best_EPE': 1e10} torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS-ft-sd_checkpoint.pth.tar')) elif 'FlowNet2-S/' in args.caffe_model: model = models.FlowNet2S(args) parse_flownetsonly(model.modules(), weights, biases, param_prefix='') state = {'epoch': 0, 'state_dict': model.state_dict(), 'best_EPE': 1e10} torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-S_checkpoint.pth.tar')) elif 'FlowNet2-SD/' in args.caffe_model: model = models.FlowNet2SD(args) parse_flownetsd(model.modules(), weights, biases, param_prefix='') state = {'epoch': 0, 'state_dict': model.state_dict(), 'best_EPE': 1e10} torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-SD_checkpoint.pth.tar')) else: print(('model type cound not be determined from input caffe model %s'%(args.caffe_model))) quit() print(("done converting ", args.caffe_model)) ================================================ FILE: dvs/flownet2/datasets.py ================================================ import torch import torch.utils.data as data import os, math, random from os.path import * import numpy as np from glob import glob import utils.frame_utils as frame_utils from imageio import imread class StaticRandomCrop(object): def __init__(self, image_size, crop_size): self.th, self.tw = crop_size h, w = image_size self.h1 = random.randint(0, h - self.th) self.w1 = random.randint(0, w - self.tw) def __call__(self, img): return img[self.h1:(self.h1+self.th), self.w1:(self.w1+self.tw),:] class StaticCenterCrop(object): def __init__(self, image_size, crop_size): self.th, self.tw = crop_size self.h, self.w = image_size def __call__(self, img): return img[(self.h-self.th)//2:(self.h+self.th)//2, (self.w-self.tw)//2:(self.w+self.tw)//2,:] class Padding(object): def __init__(self, image_size, pad_size): self.th, self.tw = pad_size self.h, self.w = image_size def __call__(self, img): out = np.zeros((self.th, self.tw, 3)) out[:self.h, :self.w,:] = img return out class MpiSintel(data.Dataset): def __init__(self, args, is_cropped = False, root = '', dstype = 'clean', replicates = 1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates flow_root = join(root, 'flow') image_root = join(root, dstype) file_list = sorted(glob(join(flow_root, '*/*.flo'))) self.flow_list = [] self.image_list = [] for file in file_list: if 'test' in file: # print file continue fbase = file[len(flow_root)+1:] fprefix = fbase[:-8] fnum = int(fbase[-8:-4]) img1 = join(image_root, fprefix + "%04d"%(fnum+0) + '.png') img2 = join(image_root, fprefix + "%04d"%(fnum+1) + '.png') if not isfile(img1) or not isfile(img2) or not isfile(file): continue self.image_list += [[img1, img2]] self.flow_list += [file] self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64): self.render_size[0] = ( (self.frame_size[0])//64 ) * 64 self.render_size[1] = ( (self.frame_size[1])//64 ) * 64 args.inference_size = self.render_size assert (len(self.image_list) == len(self.flow_list)) def __getitem__(self, index): index = index % self.size img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) flow = frame_utils.read_gen(self.flow_list[index]) images = [img1, img2] image_size = img1.shape[:2] if self.is_cropped: cropper = StaticRandomCrop(image_size, self.crop_size) else: cropper = StaticCenterCrop(image_size, self.render_size) images = list(map(cropper, images)) flow = cropper(flow) images = np.array(images).transpose(3,0,1,2) flow = flow.transpose(2,0,1) images = torch.from_numpy(images.astype(np.float32)) flow = torch.from_numpy(flow.astype(np.float32)) return [images], [flow] def __len__(self): return self.size * self.replicates class MpiSintelClean(MpiSintel): def __init__(self, args, is_cropped = False, root = '', replicates = 1): super(MpiSintelClean, self).__init__(args, is_cropped = is_cropped, root = root, dstype = 'clean', replicates = replicates) class MpiSintelFinal(MpiSintel): def __init__(self, args, is_cropped = False, root = '', replicates = 1): super(MpiSintelFinal, self).__init__(args, is_cropped = is_cropped, root = root, dstype = 'final', replicates = replicates) class FlyingChairs(data.Dataset): def __init__(self, args, is_cropped, root = '/path/to/FlyingChairs_release/data', replicates = 1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates images = sorted( glob( join(root, '*.ppm') ) ) self.flow_list = sorted( glob( join(root, '*.flo') ) ) assert (len(images)//2 == len(self.flow_list)) self.image_list = [] for i in range(len(self.flow_list)): im1 = images[2*i] im2 = images[2*i + 1] self.image_list += [ [ im1, im2 ] ] assert len(self.image_list) == len(self.flow_list) self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64): self.render_size[0] = ( (self.frame_size[0])//64 ) * 64 self.render_size[1] = ( (self.frame_size[1])//64 ) * 64 args.inference_size = self.render_size def __getitem__(self, index): index = index % self.size img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) flow = frame_utils.read_gen(self.flow_list[index]) images = [img1, img2] image_size = img1.shape[:2] if self.is_cropped: cropper = StaticRandomCrop(image_size, self.crop_size) else: cropper = StaticCenterCrop(image_size, self.render_size) images = list(map(cropper, images)) flow = cropper(flow) images = np.array(images).transpose(3,0,1,2) flow = flow.transpose(2,0,1) images = torch.from_numpy(images.astype(np.float32)) flow = torch.from_numpy(flow.astype(np.float32)) return [images], [flow] def __len__(self): return self.size * self.replicates class FlyingThings(data.Dataset): def __init__(self, args, is_cropped, root = '/path/to/flyingthings3d', dstype = 'frames_cleanpass', replicates = 1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates image_dirs = sorted(glob(join(root, dstype, 'TRAIN/*/*'))) image_dirs = sorted([join(f, 'left') for f in image_dirs] + [join(f, 'right') for f in image_dirs]) flow_dirs = sorted(glob(join(root, 'optical_flow_flo_format/TRAIN/*/*'))) flow_dirs = sorted([join(f, 'into_future/left') for f in flow_dirs] + [join(f, 'into_future/right') for f in flow_dirs]) assert (len(image_dirs) == len(flow_dirs)) self.image_list = [] self.flow_list = [] for idir, fdir in zip(image_dirs, flow_dirs): images = sorted( glob(join(idir, '*.png')) ) flows = sorted( glob(join(fdir, '*.flo')) ) for i in range(len(flows)): self.image_list += [ [ images[i], images[i+1] ] ] self.flow_list += [flows[i]] assert len(self.image_list) == len(self.flow_list) self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64): self.render_size[0] = ( (self.frame_size[0])//64 ) * 64 self.render_size[1] = ( (self.frame_size[1])//64 ) * 64 args.inference_size = self.render_size def __getitem__(self, index): index = index % self.size img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) flow = frame_utils.read_gen(self.flow_list[index]) images = [img1, img2] image_size = img1.shape[:2] if self.is_cropped: cropper = StaticRandomCrop(image_size, self.crop_size) else: cropper = StaticCenterCrop(image_size, self.render_size) images = list(map(cropper, images)) flow = cropper(flow) images = np.array(images).transpose(3,0,1,2) flow = flow.transpose(2,0,1) images = torch.from_numpy(images.astype(np.float32)) flow = torch.from_numpy(flow.astype(np.float32)) return [images], [flow] def __len__(self): return self.size * self.replicates class FlyingThingsClean(FlyingThings): def __init__(self, args, is_cropped = False, root = '', replicates = 1): super(FlyingThingsClean, self).__init__(args, is_cropped = is_cropped, root = root, dstype = 'frames_cleanpass', replicates = replicates) class FlyingThingsFinal(FlyingThings): def __init__(self, args, is_cropped = False, root = '', replicates = 1): super(FlyingThingsFinal, self).__init__(args, is_cropped = is_cropped, root = root, dstype = 'frames_finalpass', replicates = replicates) class ChairsSDHom(data.Dataset): def __init__(self, args, is_cropped, root = '/path/to/chairssdhom/data', dstype = 'train', replicates = 1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates image1 = sorted( glob( join(root, dstype, 't0/*.png') ) ) image2 = sorted( glob( join(root, dstype, 't1/*.png') ) ) self.flow_list = sorted( glob( join(root, dstype, 'flow/*.flo') ) ) assert (len(image1) == len(self.flow_list)) self.image_list = [] for i in range(len(self.flow_list)): im1 = image1[i] im2 = image2[i] self.image_list += [ [ im1, im2 ] ] assert len(self.image_list) == len(self.flow_list) self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64): self.render_size[0] = ( (self.frame_size[0])//64 ) * 64 self.render_size[1] = ( (self.frame_size[1])//64 ) * 64 args.inference_size = self.render_size def __getitem__(self, index): index = index % self.size img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) flow = frame_utils.read_gen(self.flow_list[index]) flow = flow[::-1,:,:] images = [img1, img2] image_size = img1.shape[:2] if self.is_cropped: cropper = StaticRandomCrop(image_size, self.crop_size) else: cropper = StaticCenterCrop(image_size, self.render_size) images = list(map(cropper, images)) flow = cropper(flow) images = np.array(images).transpose(3,0,1,2) flow = flow.transpose(2,0,1) images = torch.from_numpy(images.astype(np.float32)) flow = torch.from_numpy(flow.astype(np.float32)) return [images], [flow] def __len__(self): return self.size * self.replicates class ChairsSDHomTrain(ChairsSDHom): def __init__(self, args, is_cropped = False, root = '', replicates = 1): super(ChairsSDHomTrain, self).__init__(args, is_cropped = is_cropped, root = root, dstype = 'train', replicates = replicates) class ChairsSDHomTest(ChairsSDHom): def __init__(self, args, is_cropped = False, root = '', replicates = 1): super(ChairsSDHomTest, self).__init__(args, is_cropped = is_cropped, root = root, dstype = 'test', replicates = replicates) class ImagesFromFolder(data.Dataset): def __init__(self, args, is_cropped, root = '/path/to/frames/only/folder', iext = 'png', replicates = 1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates images = sorted( glob( join(root, '*.' + iext) ) ) self.image_list = [] for i in range(len(images)-1): im1 = images[i] im2 = images[i+1] self.image_list += [ [ im1, im2 ] ] self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64): self.render_size[0] = ( (self.frame_size[0])//64 ) * 64 self.render_size[1] = ( (self.frame_size[1])//64 ) * 64 args.inference_size = self.render_size def __getitem__(self, index): index = index % self.size img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) images = [img1, img2] image_size = img1.shape[:2] if self.is_cropped: cropper = StaticRandomCrop(image_size, self.crop_size) else: cropper = StaticCenterCrop(image_size, self.render_size) images = list(map(cropper, images)) images = np.array(images).transpose(3,0,1,2) images = torch.from_numpy(images.astype(np.float32)) return [images], [torch.zeros(images.size()[0:1] + (2,) + images.size()[-2:])] def __len__(self): return self.size * self.replicates class Google(data.Dataset): def __init__(self, args, is_cropped = False, root = '', dstype = 'frames', replicates = 1): self.args = args self.is_cropped = is_cropped self.crop_size = args.crop_size self.render_size = args.inference_size self.replicates = replicates image_root = join(root, dstype) file_list = sorted(glob(join(image_root, '*.png'))) self.image_list = [] for i in range(len(file_list)-1): img1 = join(file_list[i]) img2 = join(file_list[i+1]) if not isfile(img1) or not isfile(img2): continue self.image_list += [[img1, img2]] self.size = len(self.image_list) self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64): self.render_size[0] = ( math.ceil(self.frame_size[0]/64) ) * 64 self.render_size[1] = ( math.ceil(self.frame_size[1]/64) ) * 64 args.inference_size = self.render_size def __getitem__(self, index): index = index % self.size img1 = frame_utils.read_gen(self.image_list[index][0]) img2 = frame_utils.read_gen(self.image_list[index][1]) images = [img1, img2] image_size = img1.shape[:2] if self.is_cropped: cropper = StaticRandomCrop(image_size, self.crop_size) else: cropper = Padding(image_size, self.render_size) images = list(map(cropper, images)) images = np.array(images).transpose(3,0,1,2) images = torch.from_numpy(images.astype(np.float32)) return [images] def __len__(self): return self.size * self.replicates ''' import argparse import sys, os import importlib from scipy.misc import imsave import numpy as np import datasets reload(datasets) parser = argparse.ArgumentParser() args = parser.parse_args() args.inference_size = [1080, 1920] args.crop_size = [384, 512] args.effective_batch_size = 1 index = 500 v_dataset = datasets.MpiSintelClean(args, True, root='../MPI-Sintel/flow/training') a, b = v_dataset[index] im1 = a[0].numpy()[:,0,:,:].transpose(1,2,0) im2 = a[0].numpy()[:,1,:,:].transpose(1,2,0) imsave('./img1.png', im1) imsave('./img2.png', im2) flow_utils.writeFlow('./flow.flo', b[0].numpy().transpose(1,2,0)) ''' ================================================ FILE: dvs/flownet2/install.sh ================================================ #!/bin/bash cd ./networks/correlation_package rm -rf *_cuda.egg-info build dist __pycache__ python3 setup.py install --user cd ../resample2d_package rm -rf *_cuda.egg-info build dist __pycache__ python3 setup.py install --user cd ../channelnorm_package rm -rf *_cuda.egg-info build dist __pycache__ python3 setup.py install --user cd .. ================================================ FILE: dvs/flownet2/losses.py ================================================ ''' Portions of this code copyright 2017, Clement Pinard ''' # freda (todo) : adversarial loss import torch import torch.nn as nn import math def EPE(input_flow, target_flow): return torch.norm(target_flow-input_flow,p=2,dim=1).mean() class L1(nn.Module): def __init__(self): super(L1, self).__init__() def forward(self, output, target): lossvalue = torch.abs(output - target).mean() return lossvalue class L2(nn.Module): def __init__(self): super(L2, self).__init__() def forward(self, output, target): lossvalue = torch.norm(output-target,p=2,dim=1).mean() return lossvalue class L1Loss(nn.Module): def __init__(self, args): super(L1Loss, self).__init__() self.args = args self.loss = L1() self.loss_labels = ['L1', 'EPE'] def forward(self, output, target): lossvalue = self.loss(output, target) epevalue = EPE(output, target) return [lossvalue, epevalue] class L2Loss(nn.Module): def __init__(self, args): super(L2Loss, self).__init__() self.args = args self.loss = L2() self.loss_labels = ['L2', 'EPE'] def forward(self, output, target): lossvalue = self.loss(output, target) epevalue = EPE(output, target) return [lossvalue, epevalue] class MultiScale(nn.Module): def __init__(self, args, startScale = 4, numScales = 5, l_weight= 0.32, norm= 'L1'): super(MultiScale,self).__init__() self.startScale = startScale self.numScales = numScales self.loss_weights = torch.FloatTensor([(l_weight / 2 ** scale) for scale in range(self.numScales)]) self.args = args self.l_type = norm self.div_flow = 0.05 assert(len(self.loss_weights) == self.numScales) if self.l_type == 'L1': self.loss = L1() else: self.loss = L2() self.multiScales = [nn.AvgPool2d(self.startScale * (2**scale), self.startScale * (2**scale)) for scale in range(self.numScales)] self.loss_labels = ['MultiScale-'+self.l_type, 'EPE'], def forward(self, output, target): lossvalue = 0 epevalue = 0 if type(output) is tuple: target = self.div_flow * target for i, output_ in enumerate(output): target_ = self.multiScales[i](target) epevalue += self.loss_weights[i]*EPE(output_, target_) lossvalue += self.loss_weights[i]*self.loss(output_, target_) return [lossvalue, epevalue] else: epevalue += EPE(output, target) lossvalue += self.loss(output, target) return [lossvalue, epevalue] ================================================ FILE: dvs/flownet2/main.py ================================================ #!/usr/bin/env python import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" import torch import torch.nn as nn from torch.utils.data import DataLoader from torch.autograd import Variable from tensorboardX import SummaryWriter import argparse, os, sys, subprocess import colorama import numpy as np from tqdm import tqdm from glob import glob from os.path import * import models, datasets from utils import flow_utils, tools import time # Reusable function for inference def inference(args, epoch, data_path, data_loader, model, offset=0): model.eval() if args.save_flow or args.render_validation: flow_folder = "{}/flo".format(data_path) flow_back_folder = "{}/flo_back".format(data_path) if not os.path.exists(flow_folder): os.makedirs(flow_folder) if not os.path.exists(flow_back_folder): os.makedirs(flow_back_folder) # visualization folder if args.inference_visualize: flow_vis_folder = "{}/flo_vis".format(data_path) if not os.path.exists(flow_vis_folder): os.makedirs(flow_vis_folder) flow_back_vis_folder = "{}/flo_back_vis".format(data_path) if not os.path.exists(flow_back_vis_folder): os.makedirs(flow_back_vis_folder) args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches progress = tqdm(data_loader, ncols=100, total=np.minimum(len(data_loader), args.inference_n_batches), desc='Inferencing ', leave=True, position=offset) for batch_idx, (data) in enumerate(progress): data = data[0] data_back = torch.cat((data[:,:,1:,:,:], data[:,:,:1,:,:]), dim = 2) if args.cuda: data_forward = data.cuda(non_blocking=True) data_back = data_back.cuda(non_blocking=True) data_forward = Variable(data_forward) data_back = Variable(data_back) flo_path = join(flow_folder, '%06d.flo'%(batch_idx)) flo_back_path = join(flow_back_folder, '%06d.flo'%(batch_idx)) frame_size = data_loader.dataset.frame_size if not os.path.exists(flo_path): with torch.no_grad(): output = model(data_forward)[:,:,:frame_size[0], :frame_size[1]] if args.save_flow or args.render_validation: _pflow = output[0].data.cpu().numpy().transpose(1, 2, 0) flow_utils.writeFlow( flo_path, _pflow) if args.inference_visualize: flow_utils.visulize_flow_file( join(flow_folder, '%06d.flo' % (batch_idx)),flow_vis_folder) if not os.path.exists(flo_back_path): with torch.no_grad(): output = model(data_back)[:,:,:frame_size[0], :frame_size[1]] if args.save_flow or args.render_validation: _pflow = output[0].data.cpu().numpy().transpose(1, 2, 0) flow_utils.writeFlow( flo_back_path, _pflow) if args.inference_visualize: flow_utils.visulize_flow_file( join(flow_back_folder, '%06d.flo' % (batch_idx)), flow_back_vis_folder) progress.update(1) if batch_idx == (args.inference_n_batches - 1): break progress.close() return if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument('--fp16_scale', type=float, default=1024., help='Loss scaling, positive power of 2 values can improve fp16 convergence.') parser.add_argument('--start_epoch', type=int, default=1) parser.add_argument('--batch_size', '-b', type=int, default=8, help="Batch size") parser.add_argument('--crop_size', type=int, nargs='+', default = [256, 256], help="Spatial dimension to crop training samples for training") parser.add_argument("--rgb_max", type=float, default = 255.) parser.add_argument('--number_workers', '-nw', '--num_workers', type=int, default=8) parser.add_argument('--number_gpus', '-ng', type=int, default=-1, help='number of GPUs to use') parser.add_argument('--no_cuda', action='store_true') parser.add_argument('--save', '-s', default='./Google', type=str, help='directory for saving') parser.add_argument('--inference', action='store_true') parser.add_argument('--inference_visualize', action='store_true', help="visualize the optical flow during inference") parser.add_argument('--inference_size', type=int, nargs='+', default = [-1,-1], help='spatial size divisible by 64. default (-1,-1) - largest possible valid size would be used') parser.add_argument('--inference_batch_size', type=int, default=1) parser.add_argument('--inference_n_batches', type=int, default=-1) parser.add_argument('--save_flow', action='store_true', help='save predicted flows to file') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--log_frequency', '--summ_iter', type=int, default=1, help="Log every n batches") tools.add_arguments_for_module(parser, models, argument_for_class='model', default='FlowNet2') tools.add_arguments_for_module(parser, datasets, argument_for_class='inference_dataset', default='Google', skip_params=['is_cropped'], parameter_defaults={'root': './Google/train', 'replicates': 1}) main_dir = os.path.dirname(os.path.realpath(__file__)) os.chdir(main_dir) # Parse the official arguments with tools.TimerBlock("Parsing Arguments") as block: args = parser.parse_args() if args.number_gpus < 0 : args.number_gpus = torch.cuda.device_count() # Get argument defaults (hastag #thisisahack) parser.add_argument('--IGNORE', action='store_true') defaults = vars(parser.parse_args(['--IGNORE'])) # Print all arguments, color the non-defaults for argument, value in sorted(vars(args).items()): reset = colorama.Style.RESET_ALL color = reset if value == defaults[argument] else colorama.Fore.MAGENTA block.log('{}{}: {}{}'.format(color, argument, value, reset)) args.model_class = tools.module_to_dict(models)[args.model] args.inference_dataset_class = tools.module_to_dict(datasets)[args.inference_dataset] args.cuda = not args.no_cuda and torch.cuda.is_available() # args.current_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]).rstrip() args.log_file = join(args.save, 'args.txt') # dict to collect activation gradients (for training debug purpose) args.grads = {} args.total_epochs = 1 args.inference_dir = "{}/inference".format(args.save) print('Source Code') # print((' Current Git Hash: {}\n'.format(args.current_hash))) # Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments with tools.TimerBlock("Initializing Datasets") as block: args.effective_batch_size = args.batch_size * args.number_gpus args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus args.effective_number_workers = args.number_workers * args.number_gpus gpuargs = {'num_workers': args.effective_number_workers, 'pin_memory': True, 'drop_last' : True} if args.cuda else {} inf_gpuargs = gpuargs.copy() inf_gpuargs['num_workers'] = args.number_workers block.log('Inference Dataset: {}'.format(args.inference_dataset)) dataset_root = args.inference_dataset_root data_name = sorted(os.listdir(dataset_root)) block.log(data_name) inference_loaders = {} for i in range(len(data_name)): dataset_path = os.path.join(dataset_root, data_name[i]) args.inference_dataset_root = dataset_path inference_dataset = args.inference_dataset_class(args, False, **tools.kwargs_from_args(args, 'inference_dataset')) inference_loaders[dataset_path] = DataLoader(inference_dataset, batch_size=args.effective_inference_batch_size, shuffle=False, **inf_gpuargs) block.log('Inference Input: {}'.format(' '.join([str([d for d in x.size()]) for x in inference_dataset[0][0]]))) # Dynamically load model and loss class with parameters passed in via "--model_[param]=[value]" or "--loss_[param]=[value]" arguments with tools.TimerBlock("Building {} model".format(args.model)) as block: class Model(nn.Module): def __init__(self, args): super(Model, self).__init__() kwargs = tools.kwargs_from_args(args, 'model') self.model = args.model_class(args, **kwargs) def forward(self, data): output = self.model(data) return output model = Model(args) block.log('Effective Batch Size: {}'.format(args.effective_batch_size)) block.log('Number of parameters: {}'.format(sum([p.data.nelement() if p.requires_grad else 0 for p in model.parameters()]))) if args.cuda and args.number_gpus > 0: block.log('Initializing CUDA') model = model.cuda() block.log('Parallelizing') model = nn.parallel.DataParallel(model, device_ids=list(range(args.number_gpus))) # Load weights if needed, otherwise randomly initialize if args.resume and os.path.isfile(args.resume): block.log("Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.module.model.load_state_dict(checkpoint['state_dict']) block.log("Loaded checkpoint '{}' (at epoch {})".format(args.resume, checkpoint['epoch'])) elif args.resume and args.inference: block.log("No checkpoint found at '{}'".format(args.resume)) quit() else: block.log("Random initialization") block.log("Initializing save directory: {}".format(args.save)) if not os.path.exists(args.save): os.makedirs(args.save) # Log all arguments to file for argument, value in sorted(vars(args).items()): block.log2file(args.log_file, '{}: {}'.format(argument, value)) for data_path in inference_loaders: # Primary epoch loop progress = tqdm(list(range(args.start_epoch, args.total_epochs + 1)), miniters=1, ncols=100, desc='Overall Progress', leave=True, position=0) offset = 1 for epoch in progress: stats = inference(args=args, epoch=epoch - 1, data_path = data_path, data_loader=inference_loaders[data_path], model=model, offset=offset) offset += 1 print("\n") ================================================ FILE: dvs/flownet2/models.py ================================================ import torch import torch.nn as nn from torch.nn import init import math import numpy as np try: from networks.resample2d_package.resample2d import Resample2d from networks.channelnorm_package.channelnorm import ChannelNorm from networks import FlowNetC from networks import FlowNetS from networks import FlowNetSD from networks import FlowNetFusion from networks.submodules import * except: from .networks.resample2d_package.resample2d import Resample2d from .networks.channelnorm_package.channelnorm import ChannelNorm from .networks import FlowNetC from .networks import FlowNetS from .networks import FlowNetSD from .networks import FlowNetFusion from .networks.submodules import * 'Parameter count = 162,518,834' class FlowNet2(nn.Module): def __init__(self, args, batchNorm=False, div_flow = 20.): super(FlowNet2,self).__init__() self.batchNorm = batchNorm self.div_flow = div_flow self.rgb_max = args.rgb_max self.args = args self.channelnorm = ChannelNorm() # First Block (FlowNetC) self.flownetc = FlowNetC.FlowNetC(args, batchNorm=self.batchNorm) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') if args.fp16: self.resample1 = nn.Sequential( tofp32(), Resample2d(), tofp16()) else: self.resample1 = Resample2d() # Block (FlowNetS1) self.flownets_1 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm) self.upsample2 = nn.Upsample(scale_factor=4, mode='bilinear') if args.fp16: self.resample2 = nn.Sequential( tofp32(), Resample2d(), tofp16()) else: self.resample2 = Resample2d() # Block (FlowNetS2) self.flownets_2 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm) # Block (FlowNetSD) self.flownets_d = FlowNetSD.FlowNetSD(args, batchNorm=self.batchNorm) self.upsample3 = nn.Upsample(scale_factor=4, mode='nearest') self.upsample4 = nn.Upsample(scale_factor=4, mode='nearest') if args.fp16: self.resample3 = nn.Sequential( tofp32(), Resample2d(), tofp16()) else: self.resample3 = Resample2d() if args.fp16: self.resample4 = nn.Sequential( tofp32(), Resample2d(), tofp16()) else: self.resample4 = Resample2d() # Block (FLowNetFusion) self.flownetfusion = FlowNetFusion.FlowNetFusion(args, batchNorm=self.batchNorm) for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: init.uniform_(m.bias) init.xavier_uniform_(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: init.uniform_(m.bias) init.xavier_uniform_(m.weight) # init_deconv_bilinear(m.weight) def init_deconv_bilinear(self, weight): f_shape = weight.size() heigh, width = f_shape[-2], f_shape[-1] f = np.ceil(width/2.0) c = (2 * f - 1 - f % 2) / (2.0 * f) bilinear = np.zeros([heigh, width]) for x in range(width): for y in range(heigh): value = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) bilinear[x, y] = value min_dim = min(f_shape[0], f_shape[1]) weight.data.fill_(0.) for i in range(min_dim): weight.data[i,i,:,:] = torch.from_numpy(bilinear) return def forward(self, inputs): rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,)) x = (inputs - rgb_mean) / self.rgb_max x1 = x[:,:,0,:,:] x2 = x[:,:,1,:,:] x = torch.cat((x1,x2), dim = 1) # flownetc flownetc_flow2 = self.flownetc(x)[0] flownetc_flow = self.upsample1(flownetc_flow2*self.div_flow) # warp img1 to img0; magnitude of diff between img0 and and warped_img1, resampled_img1 = self.resample1(x[:,3:,:,:], flownetc_flow) diff_img0 = x[:,:3,:,:] - resampled_img1 norm_diff_img0 = self.channelnorm(diff_img0) # concat img0, img1, img1->img0, flow, diff-mag ; concat1 = torch.cat((x, resampled_img1, flownetc_flow/self.div_flow, norm_diff_img0), dim=1) # flownets1 flownets1_flow2 = self.flownets_1(concat1)[0] flownets1_flow = self.upsample2(flownets1_flow2*self.div_flow) # warp img1 to img0 using flownets1; magnitude of diff between img0 and and warped_img1 resampled_img1 = self.resample2(x[:,3:,:,:], flownets1_flow) diff_img0 = x[:,:3,:,:] - resampled_img1 norm_diff_img0 = self.channelnorm(diff_img0) # concat img0, img1, img1->img0, flow, diff-mag concat2 = torch.cat((x, resampled_img1, flownets1_flow/self.div_flow, norm_diff_img0), dim=1) # flownets2 flownets2_flow2 = self.flownets_2(concat2)[0] flownets2_flow = self.upsample4(flownets2_flow2 * self.div_flow) norm_flownets2_flow = self.channelnorm(flownets2_flow) diff_flownets2_flow = self.resample4(x[:,3:,:,:], flownets2_flow) # if not diff_flownets2_flow.volatile: # diff_flownets2_flow.register_hook(save_grad(self.args.grads, 'diff_flownets2_flow')) diff_flownets2_img1 = self.channelnorm((x[:,:3,:,:]-diff_flownets2_flow)) # if not diff_flownets2_img1.volatile: # diff_flownets2_img1.register_hook(save_grad(self.args.grads, 'diff_flownets2_img1')) # flownetsd flownetsd_flow2 = self.flownets_d(x)[0] flownetsd_flow = self.upsample3(flownetsd_flow2 / self.div_flow) norm_flownetsd_flow = self.channelnorm(flownetsd_flow) diff_flownetsd_flow = self.resample3(x[:,3:,:,:], flownetsd_flow) # if not diff_flownetsd_flow.volatile: # diff_flownetsd_flow.register_hook(save_grad(self.args.grads, 'diff_flownetsd_flow')) diff_flownetsd_img1 = self.channelnorm((x[:,:3,:,:]-diff_flownetsd_flow)) # if not diff_flownetsd_img1.volatile: # diff_flownetsd_img1.register_hook(save_grad(self.args.grads, 'diff_flownetsd_img1')) # concat img1 flownetsd, flownets2, norm_flownetsd, norm_flownets2, diff_flownetsd_img1, diff_flownets2_img1 concat3 = torch.cat((x[:,:3,:,:], flownetsd_flow, flownets2_flow, norm_flownetsd_flow, norm_flownets2_flow, diff_flownetsd_img1, diff_flownets2_img1), dim=1) flownetfusion_flow = self.flownetfusion(concat3) # if not flownetfusion_flow.volatile: # flownetfusion_flow.register_hook(save_grad(self.args.grads, 'flownetfusion_flow')) return flownetfusion_flow class FlowNet2C(FlowNetC.FlowNetC): def __init__(self, args, batchNorm=False, div_flow=20): super(FlowNet2C,self).__init__(args, batchNorm=batchNorm, div_flow=20) self.rgb_max = args.rgb_max def forward(self, inputs): rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,)) x = (inputs - rgb_mean) / self.rgb_max x1 = x[:,:,0,:,:] x2 = x[:,:,1,:,:] # FlownetC top input stream out_conv1a = self.conv1(x1) out_conv2a = self.conv2(out_conv1a) out_conv3a = self.conv3(out_conv2a) # FlownetC bottom input stream out_conv1b = self.conv1(x2) out_conv2b = self.conv2(out_conv1b) out_conv3b = self.conv3(out_conv2b) # Merge streams out_corr = self.corr(out_conv3a, out_conv3b) # False out_corr = self.corr_activation(out_corr) # Redirect top input stream and concatenate out_conv_redir = self.conv_redir(out_conv3a) in_conv3_1 = torch.cat((out_conv_redir, out_corr), 1) # Merged conv layers out_conv3_1 = self.conv3_1(in_conv3_1) out_conv4 = self.conv4_1(self.conv4(out_conv3_1)) out_conv5 = self.conv5_1(self.conv5(out_conv4)) out_conv6 = self.conv6_1(self.conv6(out_conv5)) flow6 = self.predict_flow6(out_conv6) flow6_up = self.upsampled_flow6_to_5(flow6) out_deconv5 = self.deconv5(out_conv6) concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1) flow5 = self.predict_flow5(concat5) flow5_up = self.upsampled_flow5_to_4(flow5) out_deconv4 = self.deconv4(concat5) concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1) flow4 = self.predict_flow4(concat4) flow4_up = self.upsampled_flow4_to_3(flow4) out_deconv3 = self.deconv3(concat4) concat3 = torch.cat((out_conv3_1,out_deconv3,flow4_up),1) flow3 = self.predict_flow3(concat3) flow3_up = self.upsampled_flow3_to_2(flow3) out_deconv2 = self.deconv2(concat3) concat2 = torch.cat((out_conv2a,out_deconv2,flow3_up),1) flow2 = self.predict_flow2(concat2) if self.training: return flow2,flow3,flow4,flow5,flow6 else: return self.upsample1(flow2*self.div_flow) class FlowNet2S(FlowNetS.FlowNetS): def __init__(self, args, batchNorm=False, div_flow=20): super(FlowNet2S,self).__init__(args, input_channels = 6, batchNorm=batchNorm) self.rgb_max = args.rgb_max self.div_flow = div_flow def forward(self, inputs): rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,)) x = (inputs - rgb_mean) / self.rgb_max x = torch.cat( (x[:,:,0,:,:], x[:,:,1,:,:]), dim = 1) out_conv1 = self.conv1(x) out_conv2 = self.conv2(out_conv1) out_conv3 = self.conv3_1(self.conv3(out_conv2)) out_conv4 = self.conv4_1(self.conv4(out_conv3)) out_conv5 = self.conv5_1(self.conv5(out_conv4)) out_conv6 = self.conv6_1(self.conv6(out_conv5)) flow6 = self.predict_flow6(out_conv6) flow6_up = self.upsampled_flow6_to_5(flow6) out_deconv5 = self.deconv5(out_conv6) concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1) flow5 = self.predict_flow5(concat5) flow5_up = self.upsampled_flow5_to_4(flow5) out_deconv4 = self.deconv4(concat5) concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1) flow4 = self.predict_flow4(concat4) flow4_up = self.upsampled_flow4_to_3(flow4) out_deconv3 = self.deconv3(concat4) concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1) flow3 = self.predict_flow3(concat3) flow3_up = self.upsampled_flow3_to_2(flow3) out_deconv2 = self.deconv2(concat3) concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1) flow2 = self.predict_flow2(concat2) if self.training: return flow2,flow3,flow4,flow5,flow6 else: return self.upsample1(flow2*self.div_flow) class FlowNet2SD(FlowNetSD.FlowNetSD): def __init__(self, args, batchNorm=False, div_flow=20): super(FlowNet2SD,self).__init__(args, batchNorm=batchNorm) self.rgb_max = args.rgb_max self.div_flow = div_flow def forward(self, inputs): rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,)) x = (inputs - rgb_mean) / self.rgb_max x = torch.cat( (x[:,:,0,:,:], x[:,:,1,:,:]), dim = 1) out_conv0 = self.conv0(x) out_conv1 = self.conv1_1(self.conv1(out_conv0)) out_conv2 = self.conv2_1(self.conv2(out_conv1)) out_conv3 = self.conv3_1(self.conv3(out_conv2)) out_conv4 = self.conv4_1(self.conv4(out_conv3)) out_conv5 = self.conv5_1(self.conv5(out_conv4)) out_conv6 = self.conv6_1(self.conv6(out_conv5)) flow6 = self.predict_flow6(out_conv6) flow6_up = self.upsampled_flow6_to_5(flow6) out_deconv5 = self.deconv5(out_conv6) concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1) out_interconv5 = self.inter_conv5(concat5) flow5 = self.predict_flow5(out_interconv5) flow5_up = self.upsampled_flow5_to_4(flow5) out_deconv4 = self.deconv4(concat5) concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1) out_interconv4 = self.inter_conv4(concat4) flow4 = self.predict_flow4(out_interconv4) flow4_up = self.upsampled_flow4_to_3(flow4) out_deconv3 = self.deconv3(concat4) concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1) out_interconv3 = self.inter_conv3(concat3) flow3 = self.predict_flow3(out_interconv3) flow3_up = self.upsampled_flow3_to_2(flow3) out_deconv2 = self.deconv2(concat3) concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1) out_interconv2 = self.inter_conv2(concat2) flow2 = self.predict_flow2(out_interconv2) if self.training: return flow2,flow3,flow4,flow5,flow6 else: return self.upsample1(flow2*self.div_flow) class FlowNet2CS(nn.Module): def __init__(self, args, batchNorm=False, div_flow = 20.): super(FlowNet2CS,self).__init__() self.batchNorm = batchNorm self.div_flow = div_flow self.rgb_max = args.rgb_max self.args = args self.channelnorm = ChannelNorm() # First Block (FlowNetC) self.flownetc = FlowNetC.FlowNetC(args, batchNorm=self.batchNorm) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') if args.fp16: self.resample1 = nn.Sequential( tofp32(), Resample2d(), tofp16()) else: self.resample1 = Resample2d() # Block (FlowNetS1) self.flownets_1 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm) self.upsample2 = nn.Upsample(scale_factor=4, mode='bilinear') for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: init.uniform(m.bias) init.xavier_uniform(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: init.uniform(m.bias) init.xavier_uniform(m.weight) # init_deconv_bilinear(m.weight) def forward(self, inputs): rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,)) x = (inputs - rgb_mean) / self.rgb_max x1 = x[:,:,0,:,:] x2 = x[:,:,1,:,:] x = torch.cat((x1,x2), dim = 1) # flownetc flownetc_flow2 = self.flownetc(x)[0] flownetc_flow = self.upsample1(flownetc_flow2*self.div_flow) # warp img1 to img0; magnitude of diff between img0 and and warped_img1, resampled_img1 = self.resample1(x[:,3:,:,:], flownetc_flow) diff_img0 = x[:,:3,:,:] - resampled_img1 norm_diff_img0 = self.channelnorm(diff_img0) # concat img0, img1, img1->img0, flow, diff-mag ; concat1 = torch.cat((x, resampled_img1, flownetc_flow/self.div_flow, norm_diff_img0), dim=1) # flownets1 flownets1_flow2 = self.flownets_1(concat1)[0] flownets1_flow = self.upsample2(flownets1_flow2*self.div_flow) return flownets1_flow class FlowNet2CSS(nn.Module): def __init__(self, args, batchNorm=False, div_flow = 20.): super(FlowNet2CSS,self).__init__() self.batchNorm = batchNorm self.div_flow = div_flow self.rgb_max = args.rgb_max self.args = args self.channelnorm = ChannelNorm() # First Block (FlowNetC) self.flownetc = FlowNetC.FlowNetC(args, batchNorm=self.batchNorm) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') if args.fp16: self.resample1 = nn.Sequential( tofp32(), Resample2d(), tofp16()) else: self.resample1 = Resample2d() # Block (FlowNetS1) self.flownets_1 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm) self.upsample2 = nn.Upsample(scale_factor=4, mode='bilinear') if args.fp16: self.resample2 = nn.Sequential( tofp32(), Resample2d(), tofp16()) else: self.resample2 = Resample2d() # Block (FlowNetS2) self.flownets_2 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm) self.upsample3 = nn.Upsample(scale_factor=4, mode='nearest') for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: init.uniform(m.bias) init.xavier_uniform(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: init.uniform(m.bias) init.xavier_uniform(m.weight) # init_deconv_bilinear(m.weight) def forward(self, inputs): rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,)) x = (inputs - rgb_mean) / self.rgb_max x1 = x[:,:,0,:,:] x2 = x[:,:,1,:,:] x = torch.cat((x1,x2), dim = 1) # flownetc flownetc_flow2 = self.flownetc(x)[0] flownetc_flow = self.upsample1(flownetc_flow2*self.div_flow) # warp img1 to img0; magnitude of diff between img0 and and warped_img1, resampled_img1 = self.resample1(x[:,3:,:,:], flownetc_flow) diff_img0 = x[:,:3,:,:] - resampled_img1 norm_diff_img0 = self.channelnorm(diff_img0) # concat img0, img1, img1->img0, flow, diff-mag ; concat1 = torch.cat((x, resampled_img1, flownetc_flow/self.div_flow, norm_diff_img0), dim=1) # flownets1 flownets1_flow2 = self.flownets_1(concat1)[0] flownets1_flow = self.upsample2(flownets1_flow2*self.div_flow) # warp img1 to img0 using flownets1; magnitude of diff between img0 and and warped_img1 resampled_img1 = self.resample2(x[:,3:,:,:], flownets1_flow) diff_img0 = x[:,:3,:,:] - resampled_img1 norm_diff_img0 = self.channelnorm(diff_img0) # concat img0, img1, img1->img0, flow, diff-mag concat2 = torch.cat((x, resampled_img1, flownets1_flow/self.div_flow, norm_diff_img0), dim=1) # flownets2 flownets2_flow2 = self.flownets_2(concat2)[0] flownets2_flow = self.upsample3(flownets2_flow2 * self.div_flow) return flownets2_flow ================================================ FILE: dvs/flownet2/networks/FlowNetC.py ================================================ import torch import torch.nn as nn from torch.nn import init import math import numpy as np from .correlation_package.correlation import Correlation from .submodules import * 'Parameter count , 39,175,298 ' class FlowNetC(nn.Module): def __init__(self,args, batchNorm=True, div_flow = 20): super(FlowNetC,self).__init__() self.batchNorm = batchNorm self.div_flow = div_flow self.conv1 = conv(self.batchNorm, 3, 64, kernel_size=7, stride=2) self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2) self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2) self.conv_redir = conv(self.batchNorm, 256, 32, kernel_size=1, stride=1) if args.fp16: self.corr = nn.Sequential( tofp32(), Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1), tofp16()) else: self.corr = Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1) self.corr_activation = nn.LeakyReLU(0.1,inplace=True) self.conv3_1 = conv(self.batchNorm, 473, 256) self.conv4 = conv(self.batchNorm, 256, 512, stride=2) self.conv4_1 = conv(self.batchNorm, 512, 512) self.conv5 = conv(self.batchNorm, 512, 512, stride=2) self.conv5_1 = conv(self.batchNorm, 512, 512) self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) self.conv6_1 = conv(self.batchNorm,1024, 1024) self.deconv5 = deconv(1024,512) self.deconv4 = deconv(1026,256) self.deconv3 = deconv(770,128) self.deconv2 = deconv(386,64) self.predict_flow6 = predict_flow(1024) self.predict_flow5 = predict_flow(1026) self.predict_flow4 = predict_flow(770) self.predict_flow3 = predict_flow(386) self.predict_flow2 = predict_flow(194) self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True) for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: init.uniform_(m.bias) init.xavier_uniform_(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: init.uniform_(m.bias) init.xavier_uniform_(m.weight) # init_deconv_bilinear(m.weight) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') def forward(self, x): x1 = x[:,0:3,:,:] x2 = x[:,3::,:,:] out_conv1a = self.conv1(x1) out_conv2a = self.conv2(out_conv1a) out_conv3a = self.conv3(out_conv2a) # FlownetC bottom input stream out_conv1b = self.conv1(x2) out_conv2b = self.conv2(out_conv1b) out_conv3b = self.conv3(out_conv2b) # Merge streams out_corr = self.corr(out_conv3a, out_conv3b) # False out_corr = self.corr_activation(out_corr) # Redirect top input stream and concatenate out_conv_redir = self.conv_redir(out_conv3a) in_conv3_1 = torch.cat((out_conv_redir, out_corr), 1) # Merged conv layers out_conv3_1 = self.conv3_1(in_conv3_1) out_conv4 = self.conv4_1(self.conv4(out_conv3_1)) out_conv5 = self.conv5_1(self.conv5(out_conv4)) out_conv6 = self.conv6_1(self.conv6(out_conv5)) flow6 = self.predict_flow6(out_conv6) flow6_up = self.upsampled_flow6_to_5(flow6) out_deconv5 = self.deconv5(out_conv6) concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1) flow5 = self.predict_flow5(concat5) flow5_up = self.upsampled_flow5_to_4(flow5) out_deconv4 = self.deconv4(concat5) concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1) flow4 = self.predict_flow4(concat4) flow4_up = self.upsampled_flow4_to_3(flow4) out_deconv3 = self.deconv3(concat4) concat3 = torch.cat((out_conv3_1,out_deconv3,flow4_up),1) flow3 = self.predict_flow3(concat3) flow3_up = self.upsampled_flow3_to_2(flow3) out_deconv2 = self.deconv2(concat3) concat2 = torch.cat((out_conv2a,out_deconv2,flow3_up),1) flow2 = self.predict_flow2(concat2) if self.training: return flow2,flow3,flow4,flow5,flow6 else: return flow2, ================================================ FILE: dvs/flownet2/networks/FlowNetFusion.py ================================================ import torch import torch.nn as nn from torch.nn import init import math import numpy as np from .submodules import * 'Parameter count = 581,226' class FlowNetFusion(nn.Module): def __init__(self,args, batchNorm=True): super(FlowNetFusion,self).__init__() self.batchNorm = batchNorm self.conv0 = conv(self.batchNorm, 11, 64) self.conv1 = conv(self.batchNorm, 64, 64, stride=2) self.conv1_1 = conv(self.batchNorm, 64, 128) self.conv2 = conv(self.batchNorm, 128, 128, stride=2) self.conv2_1 = conv(self.batchNorm, 128, 128) self.deconv1 = deconv(128,32) self.deconv0 = deconv(162,16) self.inter_conv1 = i_conv(self.batchNorm, 162, 32) self.inter_conv0 = i_conv(self.batchNorm, 82, 16) self.predict_flow2 = predict_flow(128) self.predict_flow1 = predict_flow(32) self.predict_flow0 = predict_flow(16) self.upsampled_flow2_to_1 = nn.ConvTranspose2d(2, 2, 4, 2, 1) self.upsampled_flow1_to_0 = nn.ConvTranspose2d(2, 2, 4, 2, 1) for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: init.uniform_(m.bias) init.xavier_uniform_(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: init.uniform_(m.bias) init.xavier_uniform_(m.weight) # init_deconv_bilinear(m.weight) def forward(self, x): out_conv0 = self.conv0(x) out_conv1 = self.conv1_1(self.conv1(out_conv0)) out_conv2 = self.conv2_1(self.conv2(out_conv1)) flow2 = self.predict_flow2(out_conv2) flow2_up = self.upsampled_flow2_to_1(flow2) out_deconv1 = self.deconv1(out_conv2) concat1 = torch.cat((out_conv1,out_deconv1,flow2_up),1) out_interconv1 = self.inter_conv1(concat1) flow1 = self.predict_flow1(out_interconv1) flow1_up = self.upsampled_flow1_to_0(flow1) out_deconv0 = self.deconv0(concat1) concat0 = torch.cat((out_conv0,out_deconv0,flow1_up),1) out_interconv0 = self.inter_conv0(concat0) flow0 = self.predict_flow0(out_interconv0) return flow0 ================================================ FILE: dvs/flownet2/networks/FlowNetS.py ================================================ ''' Portions of this code copyright 2017, Clement Pinard ''' import torch import torch.nn as nn from torch.nn import init import math import numpy as np from .submodules import * 'Parameter count : 38,676,504 ' class FlowNetS(nn.Module): def __init__(self, args, input_channels = 12, batchNorm=True): super(FlowNetS,self).__init__() self.batchNorm = batchNorm self.conv1 = conv(self.batchNorm, input_channels, 64, kernel_size=7, stride=2) self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2) self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2) self.conv3_1 = conv(self.batchNorm, 256, 256) self.conv4 = conv(self.batchNorm, 256, 512, stride=2) self.conv4_1 = conv(self.batchNorm, 512, 512) self.conv5 = conv(self.batchNorm, 512, 512, stride=2) self.conv5_1 = conv(self.batchNorm, 512, 512) self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) self.conv6_1 = conv(self.batchNorm,1024, 1024) self.deconv5 = deconv(1024,512) self.deconv4 = deconv(1026,256) self.deconv3 = deconv(770,128) self.deconv2 = deconv(386,64) self.predict_flow6 = predict_flow(1024) self.predict_flow5 = predict_flow(1026) self.predict_flow4 = predict_flow(770) self.predict_flow3 = predict_flow(386) self.predict_flow2 = predict_flow(194) self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: init.uniform_(m.bias) init.xavier_uniform_(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: init.uniform_(m.bias) init.xavier_uniform_(m.weight) # init_deconv_bilinear(m.weight) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') def forward(self, x): out_conv1 = self.conv1(x) out_conv2 = self.conv2(out_conv1) out_conv3 = self.conv3_1(self.conv3(out_conv2)) out_conv4 = self.conv4_1(self.conv4(out_conv3)) out_conv5 = self.conv5_1(self.conv5(out_conv4)) out_conv6 = self.conv6_1(self.conv6(out_conv5)) flow6 = self.predict_flow6(out_conv6) flow6_up = self.upsampled_flow6_to_5(flow6) out_deconv5 = self.deconv5(out_conv6) concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1) flow5 = self.predict_flow5(concat5) flow5_up = self.upsampled_flow5_to_4(flow5) out_deconv4 = self.deconv4(concat5) concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1) flow4 = self.predict_flow4(concat4) flow4_up = self.upsampled_flow4_to_3(flow4) out_deconv3 = self.deconv3(concat4) concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1) flow3 = self.predict_flow3(concat3) flow3_up = self.upsampled_flow3_to_2(flow3) out_deconv2 = self.deconv2(concat3) concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1) flow2 = self.predict_flow2(concat2) if self.training: return flow2,flow3,flow4,flow5,flow6 else: return flow2, ================================================ FILE: dvs/flownet2/networks/FlowNetSD.py ================================================ import torch import torch.nn as nn from torch.nn import init import math import numpy as np from .submodules import * 'Parameter count = 45,371,666' class FlowNetSD(nn.Module): def __init__(self, args, batchNorm=True): super(FlowNetSD,self).__init__() self.batchNorm = batchNorm self.conv0 = conv(self.batchNorm, 6, 64) self.conv1 = conv(self.batchNorm, 64, 64, stride=2) self.conv1_1 = conv(self.batchNorm, 64, 128) self.conv2 = conv(self.batchNorm, 128, 128, stride=2) self.conv2_1 = conv(self.batchNorm, 128, 128) self.conv3 = conv(self.batchNorm, 128, 256, stride=2) self.conv3_1 = conv(self.batchNorm, 256, 256) self.conv4 = conv(self.batchNorm, 256, 512, stride=2) self.conv4_1 = conv(self.batchNorm, 512, 512) self.conv5 = conv(self.batchNorm, 512, 512, stride=2) self.conv5_1 = conv(self.batchNorm, 512, 512) self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) self.conv6_1 = conv(self.batchNorm,1024, 1024) self.deconv5 = deconv(1024,512) self.deconv4 = deconv(1026,256) self.deconv3 = deconv(770,128) self.deconv2 = deconv(386,64) self.inter_conv5 = i_conv(self.batchNorm, 1026, 512) self.inter_conv4 = i_conv(self.batchNorm, 770, 256) self.inter_conv3 = i_conv(self.batchNorm, 386, 128) self.inter_conv2 = i_conv(self.batchNorm, 194, 64) self.predict_flow6 = predict_flow(1024) self.predict_flow5 = predict_flow(512) self.predict_flow4 = predict_flow(256) self.predict_flow3 = predict_flow(128) self.predict_flow2 = predict_flow(64) self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1) self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1) self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1) self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1) for m in self.modules(): if isinstance(m, nn.Conv2d): if m.bias is not None: init.uniform_(m.bias) init.xavier_uniform_(m.weight) if isinstance(m, nn.ConvTranspose2d): if m.bias is not None: init.uniform_(m.bias) init.xavier_uniform_(m.weight) # init_deconv_bilinear(m.weight) self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear') def forward(self, x): out_conv0 = self.conv0(x) out_conv1 = self.conv1_1(self.conv1(out_conv0)) out_conv2 = self.conv2_1(self.conv2(out_conv1)) out_conv3 = self.conv3_1(self.conv3(out_conv2)) out_conv4 = self.conv4_1(self.conv4(out_conv3)) out_conv5 = self.conv5_1(self.conv5(out_conv4)) out_conv6 = self.conv6_1(self.conv6(out_conv5)) flow6 = self.predict_flow6(out_conv6) flow6_up = self.upsampled_flow6_to_5(flow6) out_deconv5 = self.deconv5(out_conv6) concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1) out_interconv5 = self.inter_conv5(concat5) flow5 = self.predict_flow5(out_interconv5) flow5_up = self.upsampled_flow5_to_4(flow5) out_deconv4 = self.deconv4(concat5) concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1) out_interconv4 = self.inter_conv4(concat4) flow4 = self.predict_flow4(out_interconv4) flow4_up = self.upsampled_flow4_to_3(flow4) out_deconv3 = self.deconv3(concat4) concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1) out_interconv3 = self.inter_conv3(concat3) flow3 = self.predict_flow3(out_interconv3) flow3_up = self.upsampled_flow3_to_2(flow3) out_deconv2 = self.deconv2(concat3) concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1) out_interconv2 = self.inter_conv2(concat2) flow2 = self.predict_flow2(out_interconv2) if self.training: return flow2,flow3,flow4,flow5,flow6 else: return flow2, ================================================ FILE: dvs/flownet2/networks/__init__.py ================================================ ================================================ FILE: dvs/flownet2/networks/channelnorm_package/__init__.py ================================================ ================================================ FILE: dvs/flownet2/networks/channelnorm_package/channelnorm.py ================================================ from torch.autograd import Function, Variable from torch.nn.modules.module import Module import channelnorm_cuda class ChannelNormFunction(Function): @staticmethod def forward(ctx, input1, norm_deg=2): assert input1.is_contiguous() b, _, h, w = input1.size() output = input1.new(b, 1, h, w).zero_() channelnorm_cuda.forward(input1, output, norm_deg) ctx.save_for_backward(input1, output) ctx.norm_deg = norm_deg return output @staticmethod def backward(ctx, grad_output): input1, output = ctx.saved_tensors grad_input1 = Variable(input1.new(input1.size()).zero_()) channelnorm_cuda.backward(input1, output, grad_output.data, grad_input1.data, ctx.norm_deg) return grad_input1, None class ChannelNorm(Module): def __init__(self, norm_deg=2): super(ChannelNorm, self).__init__() self.norm_deg = norm_deg def forward(self, input1): return ChannelNormFunction.apply(input1, self.norm_deg) ================================================ FILE: dvs/flownet2/networks/channelnorm_package/channelnorm_cuda.cc ================================================ #include #include #include "channelnorm_kernel.cuh" int channelnorm_cuda_forward( at::Tensor& input1, at::Tensor& output, int norm_deg) { channelnorm_kernel_forward(input1, output, norm_deg); return 1; } int channelnorm_cuda_backward( at::Tensor& input1, at::Tensor& output, at::Tensor& gradOutput, at::Tensor& gradInput1, int norm_deg) { channelnorm_kernel_backward(input1, output, gradOutput, gradInput1, norm_deg); return 1; } PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("forward", &channelnorm_cuda_forward, "Channel norm forward (CUDA)"); m.def("backward", &channelnorm_cuda_backward, "Channel norm backward (CUDA)"); } ================================================ FILE: dvs/flownet2/networks/channelnorm_package/channelnorm_kernel.cu ================================================ #include #include #include #include "channelnorm_kernel.cuh" #define CUDA_NUM_THREADS 512 #define DIM0(TENSOR) ((TENSOR).x) #define DIM1(TENSOR) ((TENSOR).y) #define DIM2(TENSOR) ((TENSOR).z) #define DIM3(TENSOR) ((TENSOR).w) #define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))]) using at::Half; template __global__ void kernel_channelnorm_update_output( const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride, scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride, int norm_deg) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index >= n) { return; } int dim_b = DIM0(output_size); int dim_c = DIM1(output_size); int dim_h = DIM2(output_size); int dim_w = DIM3(output_size); int dim_chw = dim_c * dim_h * dim_w; int b = ( index / dim_chw ) % dim_b; int y = ( index / dim_w ) % dim_h; int x = ( index ) % dim_w; int i1dim_c = DIM1(input1_size); int i1dim_h = DIM2(input1_size); int i1dim_w = DIM3(input1_size); int i1dim_chw = i1dim_c * i1dim_h * i1dim_w; int i1dim_hw = i1dim_h * i1dim_w; float result = 0.0; for (int c = 0; c < i1dim_c; ++c) { int i1Index = b * i1dim_chw + c * i1dim_hw + y * i1dim_w + x; scalar_t val = input1[i1Index]; result += static_cast(val * val); } result = sqrt(result); output[index] = static_cast(result); } template __global__ void kernel_channelnorm_backward_input1( const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride, const scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride, const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride, scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, int norm_deg) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index >= n) { return; } float val = 0.0; int dim_b = DIM0(gradInput_size); int dim_c = DIM1(gradInput_size); int dim_h = DIM2(gradInput_size); int dim_w = DIM3(gradInput_size); int dim_chw = dim_c * dim_h * dim_w; int dim_hw = dim_h * dim_w; int b = ( index / dim_chw ) % dim_b; int y = ( index / dim_w ) % dim_h; int x = ( index ) % dim_w; int outIndex = b * dim_hw + y * dim_w + x; val = static_cast(gradOutput[outIndex]) * static_cast(input1[index]) / (static_cast(output[outIndex])+1e-9); gradInput[index] = static_cast(val); } void channelnorm_kernel_forward( at::Tensor& input1, at::Tensor& output, int norm_deg) { const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3)); const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3)); const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3)); const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3)); int n = output.numel(); AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_forward", ([&] { kernel_channelnorm_update_output<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( //at::globalContext().getCurrentCUDAStream() >>>( n, input1.data(), input1_size, input1_stride, output.data(), output_size, output_stride, norm_deg); })); // TODO: ATen-equivalent check // THCudaCheck(cudaGetLastError()); } void channelnorm_kernel_backward( at::Tensor& input1, at::Tensor& output, at::Tensor& gradOutput, at::Tensor& gradInput1, int norm_deg) { const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3)); const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3)); const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3)); const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3)); const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3)); const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3)); const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3)); const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3)); int n = gradInput1.numel(); AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_backward_input1", ([&] { kernel_channelnorm_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( //at::globalContext().getCurrentCUDAStream() >>>( n, input1.data(), input1_size, input1_stride, output.data(), output_size, output_stride, gradOutput.data(), gradOutput_size, gradOutput_stride, gradInput1.data(), gradInput1_size, gradInput1_stride, norm_deg ); })); // TODO: Add ATen-equivalent check // THCudaCheck(cudaGetLastError()); } ================================================ FILE: dvs/flownet2/networks/channelnorm_package/channelnorm_kernel.cuh ================================================ #pragma once #include void channelnorm_kernel_forward( at::Tensor& input1, at::Tensor& output, int norm_deg); void channelnorm_kernel_backward( at::Tensor& input1, at::Tensor& output, at::Tensor& gradOutput, at::Tensor& gradInput1, int norm_deg); ================================================ FILE: dvs/flownet2/networks/channelnorm_package/setup.py ================================================ #!/usr/bin/env python3 import os import torch from setuptools import setup from torch.utils.cpp_extension import BuildExtension, CUDAExtension cxx_args = ['-std=c++11'] nvcc_args = [ '-gencode', 'arch=compute_52,code=sm_52', '-gencode', 'arch=compute_60,code=sm_60', '-gencode', 'arch=compute_61,code=sm_61', '-gencode', 'arch=compute_70,code=sm_70', '-gencode', 'arch=compute_70,code=compute_70' ] setup( name='channelnorm_cuda', ext_modules=[ CUDAExtension('channelnorm_cuda', [ 'channelnorm_cuda.cc', 'channelnorm_kernel.cu' ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) ], cmdclass={ 'build_ext': BuildExtension }) ================================================ FILE: dvs/flownet2/networks/correlation_package/__init__.py ================================================ ================================================ FILE: dvs/flownet2/networks/correlation_package/correlation.py ================================================ import torch from torch.nn.modules.module import Module from torch.autograd import Function import correlation_cuda class CorrelationFunction(Function): @staticmethod def forward(ctx, input1, input2, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1): ctx.save_for_backward(input1, input2) ctx.pad_size = pad_size ctx.kernel_size = kernel_size ctx.max_displacement = max_displacement ctx.stride1 = stride1 ctx.stride2 = stride2 ctx.corr_multiply = corr_multiply with torch.cuda.device_of(input1): rbot1 = input1.new() rbot2 = input2.new() output = input1.new() correlation_cuda.forward(input1, input2, rbot1, rbot2, output, ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply) return output @staticmethod def backward(ctx, grad_output): input1, input2 = ctx.saved_tensors with torch.cuda.device_of(input1): rbot1 = input1.new() rbot2 = input2.new() grad_input1 = input1.new() grad_input2 = input2.new() correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2, ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply) return grad_input1, grad_input2, None, None, None, None, None, None class Correlation(Module): def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1): super(Correlation, self).__init__() self.pad_size = pad_size self.kernel_size = kernel_size self.max_displacement = max_displacement self.stride1 = stride1 self.stride2 = stride2 self.corr_multiply = corr_multiply def forward(self, input1, input2): result = CorrelationFunction.apply(input1, input2, self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply) return result ================================================ FILE: dvs/flownet2/networks/correlation_package/correlation_cuda.cc ================================================ #include #include #include #include #include #include #include "correlation_cuda_kernel.cuh" int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output, int pad_size, int kernel_size, int max_displacement, int stride1, int stride2, int corr_type_multiply) { int batchSize = input1.size(0); int nInputChannels = input1.size(1); int inputHeight = input1.size(2); int inputWidth = input1.size(3); int kernel_radius = (kernel_size - 1) / 2; int border_radius = kernel_radius + max_displacement; int paddedInputHeight = inputHeight + 2 * pad_size; int paddedInputWidth = inputWidth + 2 * pad_size; int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1); int outputHeight = ceil(static_cast(paddedInputHeight - 2 * border_radius) / static_cast(stride1)); int outputwidth = ceil(static_cast(paddedInputWidth - 2 * border_radius) / static_cast(stride1)); rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth}); rInput1.fill_(0); rInput2.fill_(0); output.fill_(0); int success = correlation_forward_cuda_kernel( output, output.size(0), output.size(1), output.size(2), output.size(3), output.stride(0), output.stride(1), output.stride(2), output.stride(3), input1, input1.size(1), input1.size(2), input1.size(3), input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3), input2, input2.size(1), input2.stride(0), input2.stride(1), input2.stride(2), input2.stride(3), rInput1, rInput2, pad_size, kernel_size, max_displacement, stride1, stride2, corr_type_multiply, at::cuda::getCurrentCUDAStream() //at::globalContext().getCurrentCUDAStream() ); //check for errors if (!success) { AT_ERROR("CUDA call failed"); } return 1; } int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput, at::Tensor& gradInput1, at::Tensor& gradInput2, int pad_size, int kernel_size, int max_displacement, int stride1, int stride2, int corr_type_multiply) { int batchSize = input1.size(0); int nInputChannels = input1.size(1); int paddedInputHeight = input1.size(2)+ 2 * pad_size; int paddedInputWidth = input1.size(3)+ 2 * pad_size; int height = input1.size(2); int width = input1.size(3); rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); gradInput1.resize_({batchSize, nInputChannels, height, width}); gradInput2.resize_({batchSize, nInputChannels, height, width}); rInput1.fill_(0); rInput2.fill_(0); gradInput1.fill_(0); gradInput2.fill_(0); int success = correlation_backward_cuda_kernel(gradOutput, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3), gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3), input1, input1.size(1), input1.size(2), input1.size(3), input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3), input2, input2.stride(0), input2.stride(1), input2.stride(2), input2.stride(3), gradInput1, gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3), gradInput2, gradInput2.size(1), gradInput2.stride(0), gradInput2.stride(1), gradInput2.stride(2), gradInput2.stride(3), rInput1, rInput2, pad_size, kernel_size, max_displacement, stride1, stride2, corr_type_multiply, at::cuda::getCurrentCUDAStream() //at::globalContext().getCurrentCUDAStream() ); if (!success) { AT_ERROR("CUDA call failed"); } return 1; } PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)"); m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)"); } ================================================ FILE: dvs/flownet2/networks/correlation_package/correlation_cuda_kernel.cu ================================================ #include #include "correlation_cuda_kernel.cuh" #define CUDA_NUM_THREADS 1024 #define THREADS_PER_BLOCK 32 #define FULL_MASK 0xffffffff #include #include #include #include using at::Half; template __forceinline__ __device__ scalar_t warpReduceSum(scalar_t val) { for (int offset = 16; offset > 0; offset /= 2) val += __shfl_down_sync(FULL_MASK, val, offset); return val; } template __forceinline__ __device__ scalar_t blockReduceSum(scalar_t val) { static __shared__ scalar_t shared[32]; int lane = threadIdx.x % warpSize; int wid = threadIdx.x / warpSize; val = warpReduceSum(val); if (lane == 0) shared[wid] = val; __syncthreads(); val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0; if (wid == 0) val = warpReduceSum(val); return val; } template __global__ void channels_first(const scalar_t* __restrict__ input, scalar_t* rinput, int channels, int height, int width, int pad_size) { // n (batch size), c (num of channels), y (height), x (width) int n = blockIdx.x; int y = blockIdx.y; int x = blockIdx.z; int ch_off = threadIdx.x; scalar_t value; int dimcyx = channels * height * width; int dimyx = height * width; int p_dimx = (width + 2 * pad_size); int p_dimy = (height + 2 * pad_size); int p_dimyxc = channels * p_dimy * p_dimx; int p_dimxc = p_dimx * channels; for (int c = ch_off; c < channels; c += THREADS_PER_BLOCK) { value = input[n * dimcyx + c * dimyx + y * width + x]; rinput[n * p_dimyxc + (y + pad_size) * p_dimxc + (x + pad_size) * channels + c] = value; } } template __global__ void correlation_forward(scalar_t* __restrict__ output, const int nOutputChannels, const int outputHeight, const int outputWidth, const scalar_t* __restrict__ rInput1, const int nInputChannels, const int inputHeight, const int inputWidth, const scalar_t* __restrict__ rInput2, const int pad_size, const int kernel_size, const int max_displacement, const int stride1, const int stride2) { int32_t pInputWidth = inputWidth + 2 * pad_size; int32_t pInputHeight = inputHeight + 2 * pad_size; int32_t kernel_rad = (kernel_size - 1) / 2; int32_t displacement_rad = max_displacement / stride2; int32_t displacement_size = 2 * displacement_rad + 1; int32_t n = blockIdx.x; int32_t y1 = blockIdx.y * stride1 + max_displacement; int32_t x1 = blockIdx.z * stride1 + max_displacement; int32_t c = threadIdx.x; int32_t pdimyxc = pInputHeight * pInputWidth * nInputChannels; int32_t pdimxc = pInputWidth * nInputChannels; int32_t pdimc = nInputChannels; int32_t tdimcyx = nOutputChannels * outputHeight * outputWidth; int32_t tdimyx = outputHeight * outputWidth; int32_t tdimx = outputWidth; int32_t nelems = kernel_size * kernel_size * pdimc; // element-wise product along channel axis for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) { for (int ti = -displacement_rad; ti <= displacement_rad; ++ti) { int x2 = x1 + ti * stride2; int y2 = y1 + tj * stride2; float acc0 = 0.0f; for (int j = -kernel_rad; j <= kernel_rad; ++j) { for (int i = -kernel_rad; i <= kernel_rad; ++i) { // THREADS_PER_BLOCK #pragma unroll for (int ch = c; ch < pdimc; ch += blockDim.x) { int indx1 = n * pdimyxc + (y1 + j) * pdimxc + (x1 + i) * pdimc + ch; int indx2 = n * pdimyxc + (y2 + j) * pdimxc + (x2 + i) * pdimc + ch; acc0 += static_cast(rInput1[indx1] * rInput2[indx2]); } } } if (blockDim.x == warpSize) { __syncwarp(); acc0 = warpReduceSum(acc0); } else { __syncthreads(); acc0 = blockReduceSum(acc0); } if (threadIdx.x == 0) { int tc = (tj + displacement_rad) * displacement_size + (ti + displacement_rad); const int tindx = n * tdimcyx + tc * tdimyx + blockIdx.y * tdimx + blockIdx.z; output[tindx] = static_cast(acc0 / nelems); } } } } template __global__ void correlation_backward_input1(int item, scalar_t* gradInput1, int nInputChannels, int inputHeight, int inputWidth, const scalar_t* __restrict__ gradOutput, int nOutputChannels, int outputHeight, int outputWidth, const scalar_t* __restrict__ rInput2, int pad_size, int kernel_size, int max_displacement, int stride1, int stride2) { // n (batch size), c (num of channels), y (height), x (width) int n = item; int y = blockIdx.x * stride1 + pad_size; int x = blockIdx.y * stride1 + pad_size; int c = blockIdx.z; int tch_off = threadIdx.x; int kernel_rad = (kernel_size - 1) / 2; int displacement_rad = max_displacement / stride2; int displacement_size = 2 * displacement_rad + 1; int xmin = (x - kernel_rad - max_displacement) / stride1; int ymin = (y - kernel_rad - max_displacement) / stride1; int xmax = (x + kernel_rad - max_displacement) / stride1; int ymax = (y + kernel_rad - max_displacement) / stride1; if (xmax < 0 || ymax < 0 || xmin >= outputWidth || ymin >= outputHeight) { // assumes gradInput1 is pre-allocated and zero filled return; } if (xmin > xmax || ymin > ymax) { // assumes gradInput1 is pre-allocated and zero filled return; } xmin = max(0,xmin); xmax = min(outputWidth-1,xmax); ymin = max(0,ymin); ymax = min(outputHeight-1,ymax); int pInputWidth = inputWidth + 2 * pad_size; int pInputHeight = inputHeight + 2 * pad_size; int pdimyxc = pInputHeight * pInputWidth * nInputChannels; int pdimxc = pInputWidth * nInputChannels; int pdimc = nInputChannels; int tdimcyx = nOutputChannels * outputHeight * outputWidth; int tdimyx = outputHeight * outputWidth; int tdimx = outputWidth; int odimcyx = nInputChannels * inputHeight* inputWidth; int odimyx = inputHeight * inputWidth; int odimx = inputWidth; scalar_t nelems = kernel_size * kernel_size * nInputChannels; __shared__ scalar_t prod_sum[THREADS_PER_BLOCK]; prod_sum[tch_off] = 0; for (int tc = tch_off; tc < nOutputChannels; tc += THREADS_PER_BLOCK) { int i2 = (tc % displacement_size - displacement_rad) * stride2; int j2 = (tc / displacement_size - displacement_rad) * stride2; int indx2 = n * pdimyxc + (y + j2)* pdimxc + (x + i2) * pdimc + c; scalar_t val2 = rInput2[indx2]; for (int j = ymin; j <= ymax; ++j) { for (int i = xmin; i <= xmax; ++i) { int tindx = n * tdimcyx + tc * tdimyx + j * tdimx + i; prod_sum[tch_off] += gradOutput[tindx] * val2; } } } __syncthreads(); if(tch_off == 0) { scalar_t reduce_sum = 0; for(int idx = 0; idx < THREADS_PER_BLOCK; idx++) { reduce_sum += prod_sum[idx]; } const int indx1 = n * odimcyx + c * odimyx + (y - pad_size) * odimx + (x - pad_size); gradInput1[indx1] = reduce_sum / nelems; } } template __global__ void correlation_backward_input2(int item, scalar_t* gradInput2, int nInputChannels, int inputHeight, int inputWidth, const scalar_t* __restrict__ gradOutput, int nOutputChannels, int outputHeight, int outputWidth, const scalar_t* __restrict__ rInput1, int pad_size, int kernel_size, int max_displacement, int stride1, int stride2) { // n (batch size), c (num of channels), y (height), x (width) int n = item; int y = blockIdx.x * stride1 + pad_size; int x = blockIdx.y * stride1 + pad_size; int c = blockIdx.z; int tch_off = threadIdx.x; int kernel_rad = (kernel_size - 1) / 2; int displacement_rad = max_displacement / stride2; int displacement_size = 2 * displacement_rad + 1; int pInputWidth = inputWidth + 2 * pad_size; int pInputHeight = inputHeight + 2 * pad_size; int pdimyxc = pInputHeight * pInputWidth * nInputChannels; int pdimxc = pInputWidth * nInputChannels; int pdimc = nInputChannels; int tdimcyx = nOutputChannels * outputHeight * outputWidth; int tdimyx = outputHeight * outputWidth; int tdimx = outputWidth; int odimcyx = nInputChannels * inputHeight* inputWidth; int odimyx = inputHeight * inputWidth; int odimx = inputWidth; scalar_t nelems = kernel_size * kernel_size * nInputChannels; __shared__ scalar_t prod_sum[THREADS_PER_BLOCK]; prod_sum[tch_off] = 0; for (int tc = tch_off; tc < nOutputChannels; tc += THREADS_PER_BLOCK) { int i2 = (tc % displacement_size - displacement_rad) * stride2; int j2 = (tc / displacement_size - displacement_rad) * stride2; int xmin = (x - kernel_rad - max_displacement - i2) / stride1; int ymin = (y - kernel_rad - max_displacement - j2) / stride1; int xmax = (x + kernel_rad - max_displacement - i2) / stride1; int ymax = (y + kernel_rad - max_displacement - j2) / stride1; if (xmax < 0 || ymax < 0 || xmin >= outputWidth || ymin >= outputHeight) { // assumes gradInput2 is pre-allocated and zero filled continue; } if (xmin > xmax || ymin > ymax) { // assumes gradInput2 is pre-allocated and zero filled continue; } xmin = max(0,xmin); xmax = min(outputWidth-1,xmax); ymin = max(0,ymin); ymax = min(outputHeight-1,ymax); int indx1 = n * pdimyxc + (y - j2)* pdimxc + (x - i2) * pdimc + c; scalar_t val1 = rInput1[indx1]; for (int j = ymin; j <= ymax; ++j) { for (int i = xmin; i <= xmax; ++i) { int tindx = n * tdimcyx + tc * tdimyx + j * tdimx + i; prod_sum[tch_off] += gradOutput[tindx] * val1; } } } __syncthreads(); if(tch_off == 0) { scalar_t reduce_sum = 0; for(int idx = 0; idx < THREADS_PER_BLOCK; idx++) { reduce_sum += prod_sum[idx]; } const int indx2 = n * odimcyx + c * odimyx + (y - pad_size) * odimx + (x - pad_size); gradInput2[indx2] = reduce_sum / nelems; } } int correlation_forward_cuda_kernel(at::Tensor& output, int ob, int oc, int oh, int ow, int osb, int osc, int osh, int osw, at::Tensor& input1, int ic, int ih, int iw, int isb, int isc, int ish, int isw, at::Tensor& input2, int gc, int gsb, int gsc, int gsh, int gsw, at::Tensor& rInput1, at::Tensor& rInput2, int pad_size, int kernel_size, int max_displacement, int stride1, int stride2, int corr_type_multiply, cudaStream_t stream) { int batchSize = ob; int nInputChannels = ic; int inputWidth = iw; int inputHeight = ih; int nOutputChannels = oc; int outputWidth = ow; int outputHeight = oh; dim3 blocks_grid(batchSize, inputHeight, inputWidth); dim3 threads_block(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channels_first_fwd_1", ([&] { channels_first<<>>( input1.data(), rInput1.data(), nInputChannels, inputHeight, inputWidth, pad_size); })); AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "channels_first_fwd_2", ([&] { channels_first<<>> ( input2.data(), rInput2.data(), nInputChannels, inputHeight, inputWidth, pad_size); })); dim3 threadsPerBlock(THREADS_PER_BLOCK); dim3 totalBlocksCorr(batchSize, outputHeight, outputWidth); AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "correlation_forward", ([&] { correlation_forward<<>> (output.data(), nOutputChannels, outputHeight, outputWidth, rInput1.data(), nInputChannels, inputHeight, inputWidth, rInput2.data(), pad_size, kernel_size, max_displacement, stride1, stride2); })); cudaError_t err = cudaGetLastError(); // check for errors if (err != cudaSuccess) { printf("error in correlation_forward_cuda_kernel: %s\n", cudaGetErrorString(err)); return 0; } return 1; } int correlation_backward_cuda_kernel( at::Tensor& gradOutput, int gob, int goc, int goh, int gow, int gosb, int gosc, int gosh, int gosw, at::Tensor& input1, int ic, int ih, int iw, int isb, int isc, int ish, int isw, at::Tensor& input2, int gsb, int gsc, int gsh, int gsw, at::Tensor& gradInput1, int gisb, int gisc, int gish, int gisw, at::Tensor& gradInput2, int ggc, int ggsb, int ggsc, int ggsh, int ggsw, at::Tensor& rInput1, at::Tensor& rInput2, int pad_size, int kernel_size, int max_displacement, int stride1, int stride2, int corr_type_multiply, cudaStream_t stream) { int batchSize = gob; int num = batchSize; int nInputChannels = ic; int inputWidth = iw; int inputHeight = ih; int nOutputChannels = goc; int outputWidth = gow; int outputHeight = goh; dim3 blocks_grid(batchSize, inputHeight, inputWidth); dim3 threads_block(THREADS_PER_BLOCK); AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "lltm_forward_cuda", ([&] { channels_first<<>>( input1.data(), rInput1.data(), nInputChannels, inputHeight, inputWidth, pad_size ); })); AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "lltm_forward_cuda", ([&] { channels_first<<>>( input2.data(), rInput2.data(), nInputChannels, inputHeight, inputWidth, pad_size ); })); dim3 threadsPerBlock(THREADS_PER_BLOCK); dim3 totalBlocksCorr(inputHeight, inputWidth, nInputChannels); for (int n = 0; n < num; ++n) { AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "lltm_forward_cuda", ([&] { correlation_backward_input1<<>> ( n, gradInput1.data(), nInputChannels, inputHeight, inputWidth, gradOutput.data(), nOutputChannels, outputHeight, outputWidth, rInput2.data(), pad_size, kernel_size, max_displacement, stride1, stride2); })); } for(int n = 0; n < batchSize; n++) { AT_DISPATCH_FLOATING_TYPES_AND_HALF(rInput1.type(), "lltm_forward_cuda", ([&] { correlation_backward_input2<<>>( n, gradInput2.data(), nInputChannels, inputHeight, inputWidth, gradOutput.data(), nOutputChannels, outputHeight, outputWidth, rInput1.data(), pad_size, kernel_size, max_displacement, stride1, stride2); })); } // check for errors cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { printf("error in correlation_backward_cuda_kernel: %s\n", cudaGetErrorString(err)); return 0; } return 1; } ================================================ FILE: dvs/flownet2/networks/correlation_package/correlation_cuda_kernel.cuh ================================================ #pragma once #include #include #include int correlation_forward_cuda_kernel(at::Tensor& output, int ob, int oc, int oh, int ow, int osb, int osc, int osh, int osw, at::Tensor& input1, int ic, int ih, int iw, int isb, int isc, int ish, int isw, at::Tensor& input2, int gc, int gsb, int gsc, int gsh, int gsw, at::Tensor& rInput1, at::Tensor& rInput2, int pad_size, int kernel_size, int max_displacement, int stride1, int stride2, int corr_type_multiply, cudaStream_t stream); int correlation_backward_cuda_kernel( at::Tensor& gradOutput, int gob, int goc, int goh, int gow, int gosb, int gosc, int gosh, int gosw, at::Tensor& input1, int ic, int ih, int iw, int isb, int isc, int ish, int isw, at::Tensor& input2, int gsb, int gsc, int gsh, int gsw, at::Tensor& gradInput1, int gisb, int gisc, int gish, int gisw, at::Tensor& gradInput2, int ggc, int ggsb, int ggsc, int ggsh, int ggsw, at::Tensor& rInput1, at::Tensor& rInput2, int pad_size, int kernel_size, int max_displacement, int stride1, int stride2, int corr_type_multiply, cudaStream_t stream); ================================================ FILE: dvs/flownet2/networks/correlation_package/setup.py ================================================ #!/usr/bin/env python3 import os import torch from setuptools import setup, find_packages from torch.utils.cpp_extension import BuildExtension, CUDAExtension cxx_args = ['-std=c++11'] nvcc_args = [ '-gencode', 'arch=compute_50,code=sm_50', '-gencode', 'arch=compute_52,code=sm_52', '-gencode', 'arch=compute_60,code=sm_60', '-gencode', 'arch=compute_61,code=sm_61', '-gencode', 'arch=compute_70,code=sm_70', '-gencode', 'arch=compute_70,code=compute_70' ] setup( name='correlation_cuda', ext_modules=[ CUDAExtension('correlation_cuda', [ 'correlation_cuda.cc', 'correlation_cuda_kernel.cu' ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) ], cmdclass={ 'build_ext': BuildExtension }) ================================================ FILE: dvs/flownet2/networks/resample2d_package/__init__.py ================================================ ================================================ FILE: dvs/flownet2/networks/resample2d_package/resample2d.py ================================================ from torch.nn.modules.module import Module from torch.autograd import Function, Variable import resample2d_cuda class Resample2dFunction(Function): @staticmethod def forward(ctx, input1, input2, kernel_size=1, bilinear= True): assert input1.is_contiguous() assert input2.is_contiguous() ctx.save_for_backward(input1, input2) ctx.kernel_size = kernel_size ctx.bilinear = bilinear _, d, _, _ = input1.size() b, _, h, w = input2.size() output = input1.new(b, d, h, w).zero_() resample2d_cuda.forward(input1, input2, output, kernel_size, bilinear) return output @staticmethod def backward(ctx, grad_output): grad_output = grad_output.contiguous() assert grad_output.is_contiguous() input1, input2 = ctx.saved_tensors grad_input1 = Variable(input1.new(input1.size()).zero_()) grad_input2 = Variable(input1.new(input2.size()).zero_()) resample2d_cuda.backward(input1, input2, grad_output.data, grad_input1.data, grad_input2.data, ctx.kernel_size, ctx.bilinear) return grad_input1, grad_input2, None, None class Resample2d(Module): def __init__(self, kernel_size=1, bilinear = True): super(Resample2d, self).__init__() self.kernel_size = kernel_size self.bilinear = bilinear def forward(self, input1, input2): input1_c = input1.contiguous() return Resample2dFunction.apply(input1_c, input2, self.kernel_size, self.bilinear) ================================================ FILE: dvs/flownet2/networks/resample2d_package/resample2d_cuda.cc ================================================ #include #include #include "resample2d_kernel.cuh" int resample2d_cuda_forward( at::Tensor& input1, at::Tensor& input2, at::Tensor& output, int kernel_size, bool bilinear) { resample2d_kernel_forward(input1, input2, output, kernel_size, bilinear); return 1; } int resample2d_cuda_backward( at::Tensor& input1, at::Tensor& input2, at::Tensor& gradOutput, at::Tensor& gradInput1, at::Tensor& gradInput2, int kernel_size, bool bilinear) { resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size, bilinear); return 1; } PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)"); m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)"); } ================================================ FILE: dvs/flownet2/networks/resample2d_package/resample2d_kernel.cu ================================================ #include #include #include #define CUDA_NUM_THREADS 512 #define THREADS_PER_BLOCK 64 #define DIM0(TENSOR) ((TENSOR).x) #define DIM1(TENSOR) ((TENSOR).y) #define DIM2(TENSOR) ((TENSOR).z) #define DIM3(TENSOR) ((TENSOR).w) #define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))]) template __global__ void kernel_resample2d_update_output(const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride, const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride, scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride, int kernel_size, bool bilinear) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index >= n) { return; } scalar_t val = 0.0f; int dim_b = DIM0(output_size); int dim_c = DIM1(output_size); int dim_h = DIM2(output_size); int dim_w = DIM3(output_size); int dim_chw = dim_c * dim_h * dim_w; int dim_hw = dim_h * dim_w; int b = ( index / dim_chw ) % dim_b; int c = ( index / dim_hw ) % dim_c; int y = ( index / dim_w ) % dim_h; int x = ( index ) % dim_w; scalar_t dx = DIM3_INDEX(input2, b, 0, y, x); scalar_t dy = DIM3_INDEX(input2, b, 1, y, x); scalar_t xf = static_cast(x) + dx; scalar_t yf = static_cast(y) + dy; scalar_t alpha = xf - floor(xf); // alpha scalar_t beta = yf - floor(yf); // beta if (bilinear) { int xL = max(min( int (floor(xf)), dim_w-1), 0); int xR = max(min( int (floor(xf)+1), dim_w -1), 0); int yT = max(min( int (floor(yf)), dim_h-1), 0); int yB = max(min( int (floor(yf)+1), dim_h-1), 0); for (int fy = 0; fy < kernel_size; fy += 1) { for (int fx = 0; fx < kernel_size; fx += 1) { val += static_cast((1. - alpha)*(1. - beta) * DIM3_INDEX(input1, b, c, yT + fy, xL + fx)); val += static_cast((alpha)*(1. - beta) * DIM3_INDEX(input1, b, c, yT + fy, xR + fx)); val += static_cast((1. - alpha)*(beta) * DIM3_INDEX(input1, b, c, yB + fy, xL + fx)); val += static_cast((alpha)*(beta) * DIM3_INDEX(input1, b, c, yB + fy, xR + fx)); } } output[index] = val; } else { int xN = max(min( int (floor(xf + 0.5)), dim_w - 1), 0); int yN = max(min( int (floor(yf + 0.5)), dim_h - 1), 0); output[index] = static_cast ( DIM3_INDEX(input1, b, c, yN, xN) ); } } template __global__ void kernel_resample2d_backward_input1( const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride, const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride, const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride, scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, int kernel_size, bool bilinear) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index >= n) { return; } int dim_b = DIM0(gradOutput_size); int dim_c = DIM1(gradOutput_size); int dim_h = DIM2(gradOutput_size); int dim_w = DIM3(gradOutput_size); int dim_chw = dim_c * dim_h * dim_w; int dim_hw = dim_h * dim_w; int b = ( index / dim_chw ) % dim_b; int c = ( index / dim_hw ) % dim_c; int y = ( index / dim_w ) % dim_h; int x = ( index ) % dim_w; scalar_t dx = DIM3_INDEX(input2, b, 0, y, x); scalar_t dy = DIM3_INDEX(input2, b, 1, y, x); scalar_t xf = static_cast(x) + dx; scalar_t yf = static_cast(y) + dy; scalar_t alpha = xf - int(xf); // alpha scalar_t beta = yf - int(yf); // beta int idim_h = DIM2(input1_size); int idim_w = DIM3(input1_size); int xL = max(min( int (floor(xf)), idim_w-1), 0); int xR = max(min( int (floor(xf)+1), idim_w -1), 0); int yT = max(min( int (floor(yf)), idim_h-1), 0); int yB = max(min( int (floor(yf)+1), idim_h-1), 0); for (int fy = 0; fy < kernel_size; fy += 1) { for (int fx = 0; fx < kernel_size; fx += 1) { atomicAdd(&DIM3_INDEX(gradInput, b, c, (yT + fy), (xL + fx)), (1-alpha)*(1-beta) * DIM3_INDEX(gradOutput, b, c, y, x)); atomicAdd(&DIM3_INDEX(gradInput, b, c, (yT + fy), (xR + fx)), (alpha)*(1-beta) * DIM3_INDEX(gradOutput, b, c, y, x)); atomicAdd(&DIM3_INDEX(gradInput, b, c, (yB + fy), (xL + fx)), (1-alpha)*(beta) * DIM3_INDEX(gradOutput, b, c, y, x)); atomicAdd(&DIM3_INDEX(gradInput, b, c, (yB + fy), (xR + fx)), (alpha)*(beta) * DIM3_INDEX(gradOutput, b, c, y, x)); } } } template __global__ void kernel_resample2d_backward_input2( const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride, const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride, const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride, scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, int kernel_size, bool bilinear) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index >= n) { return; } scalar_t output = 0.0; int kernel_rad = (kernel_size - 1)/2; int dim_b = DIM0(gradInput_size); int dim_c = DIM1(gradInput_size); int dim_h = DIM2(gradInput_size); int dim_w = DIM3(gradInput_size); int dim_chw = dim_c * dim_h * dim_w; int dim_hw = dim_h * dim_w; int b = ( index / dim_chw ) % dim_b; int c = ( index / dim_hw ) % dim_c; int y = ( index / dim_w ) % dim_h; int x = ( index ) % dim_w; int odim_c = DIM1(gradOutput_size); scalar_t dx = DIM3_INDEX(input2, b, 0, y, x); scalar_t dy = DIM3_INDEX(input2, b, 1, y, x); scalar_t xf = static_cast(x) + dx; scalar_t yf = static_cast(y) + dy; int xL = max(min( int (floor(xf)), dim_w-1), 0); int xR = max(min( int (floor(xf)+1), dim_w -1), 0); int yT = max(min( int (floor(yf)), dim_h-1), 0); int yB = max(min( int (floor(yf)+1), dim_h-1), 0); if (c % 2) { float gamma = 1 - (xf - floor(xf)); // alpha for (int i = 0; i <= 2*kernel_rad; ++i) { for (int j = 0; j <= 2*kernel_rad; ++j) { for (int ch = 0; ch < odim_c; ++ch) { output += (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xL + i)); output -= (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xL + i)); output += (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xR + i)); output -= (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xR + i)); } } } } else { float gamma = 1 - (yf - floor(yf)); // alpha for (int i = 0; i <= 2*kernel_rad; ++i) { for (int j = 0; j <= 2*kernel_rad; ++j) { for (int ch = 0; ch < odim_c; ++ch) { output += (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xR + i)); output -= (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xL + i)); output += (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xR + i)); output -= (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xL + i)); } } } } gradInput[index] = output; } void resample2d_kernel_forward( at::Tensor& input1, at::Tensor& input2, at::Tensor& output, int kernel_size, bool bilinear) { int n = output.numel(); const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3)); const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3)); const long4 input2_size = make_long4(input2.size(0), input2.size(1), input2.size(2), input2.size(3)); const long4 input2_stride = make_long4(input2.stride(0), input2.stride(1), input2.stride(2), input2.stride(3)); const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3)); const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3)); // TODO: when atomicAdd gets resolved, change to AT_DISPATCH_FLOATING_TYPES_AND_HALF // AT_DISPATCH_FLOATING_TYPES(input1.type(), "resample_forward_kernel", ([&] { kernel_resample2d_update_output<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( //at::globalContext().getCurrentCUDAStream() >>>( n, input1.data(), input1_size, input1_stride, input2.data(), input2_size, input2_stride, output.data(), output_size, output_stride, kernel_size, bilinear); // })); // TODO: ATen-equivalent check // THCudaCheck(cudaGetLastError()); } void resample2d_kernel_backward( at::Tensor& input1, at::Tensor& input2, at::Tensor& gradOutput, at::Tensor& gradInput1, at::Tensor& gradInput2, int kernel_size, bool bilinear) { int n = gradOutput.numel(); const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3)); const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3)); const long4 input2_size = make_long4(input2.size(0), input2.size(1), input2.size(2), input2.size(3)); const long4 input2_stride = make_long4(input2.stride(0), input2.stride(1), input2.stride(2), input2.stride(3)); const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3)); const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3)); const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3)); const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3)); // AT_DISPATCH_FLOATING_TYPES(input1.type(), "resample_backward_input1", ([&] { kernel_resample2d_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( //at::globalContext().getCurrentCUDAStream() >>>( n, input1.data(), input1_size, input1_stride, input2.data(), input2_size, input2_stride, gradOutput.data(), gradOutput_size, gradOutput_stride, gradInput1.data(), gradInput1_size, gradInput1_stride, kernel_size, bilinear ); // })); const long4 gradInput2_size = make_long4(gradInput2.size(0), gradInput2.size(1), gradInput2.size(2), gradInput2.size(3)); const long4 gradInput2_stride = make_long4(gradInput2.stride(0), gradInput2.stride(1), gradInput2.stride(2), gradInput2.stride(3)); n = gradInput2.numel(); // AT_DISPATCH_FLOATING_TYPES(gradInput2.type(), "resample_backward_input2", ([&] { kernel_resample2d_backward_input2<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( //at::globalContext().getCurrentCUDAStream() >>>( n, input1.data(), input1_size, input1_stride, input2.data(), input2_size, input2_stride, gradOutput.data(), gradOutput_size, gradOutput_stride, gradInput2.data(), gradInput2_size, gradInput2_stride, kernel_size, bilinear ); // })); // TODO: Use the ATen equivalent to get last error // THCudaCheck(cudaGetLastError()); } ================================================ FILE: dvs/flownet2/networks/resample2d_package/resample2d_kernel.cuh ================================================ #pragma once #include void resample2d_kernel_forward( at::Tensor& input1, at::Tensor& input2, at::Tensor& output, int kernel_size, bool bilinear); void resample2d_kernel_backward( at::Tensor& input1, at::Tensor& input2, at::Tensor& gradOutput, at::Tensor& gradInput1, at::Tensor& gradInput2, int kernel_size, bool bilinear); ================================================ FILE: dvs/flownet2/networks/resample2d_package/setup.py ================================================ #!/usr/bin/env python3 import os import torch from setuptools import setup from torch.utils.cpp_extension import BuildExtension, CUDAExtension cxx_args = ['-std=c++11'] nvcc_args = [ '-gencode', 'arch=compute_50,code=sm_50', '-gencode', 'arch=compute_52,code=sm_52', '-gencode', 'arch=compute_60,code=sm_60', '-gencode', 'arch=compute_61,code=sm_61', '-gencode', 'arch=compute_70,code=sm_70', '-gencode', 'arch=compute_70,code=compute_70' ] setup( name='resample2d_cuda', ext_modules=[ CUDAExtension('resample2d_cuda', [ 'resample2d_cuda.cc', 'resample2d_kernel.cu' ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) ], cmdclass={ 'build_ext': BuildExtension }) ================================================ FILE: dvs/flownet2/networks/submodules.py ================================================ # freda (todo) : import torch.nn as nn import torch import numpy as np def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1): if batchNorm: return nn.Sequential( nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False), nn.BatchNorm2d(out_planes), nn.LeakyReLU(0.1,inplace=True) ) else: return nn.Sequential( nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True), nn.LeakyReLU(0.1,inplace=True) ) def i_conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, bias = True): if batchNorm: return nn.Sequential( nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias), nn.BatchNorm2d(out_planes), ) else: return nn.Sequential( nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias), ) def predict_flow(in_planes): return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True) def deconv(in_planes, out_planes): return nn.Sequential( nn.ConvTranspose2d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True), nn.LeakyReLU(0.1,inplace=True) ) class tofp16(nn.Module): def __init__(self): super(tofp16, self).__init__() def forward(self, input): return input.half() class tofp32(nn.Module): def __init__(self): super(tofp32, self).__init__() def forward(self, input): return input.float() def init_deconv_bilinear(weight): f_shape = weight.size() heigh, width = f_shape[-2], f_shape[-1] f = np.ceil(width/2.0) c = (2 * f - 1 - f % 2) / (2.0 * f) bilinear = np.zeros([heigh, width]) for x in range(width): for y in range(heigh): value = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) bilinear[x, y] = value weight.data.fill_(0.) for i in range(f_shape[0]): for j in range(f_shape[1]): weight.data[i,j,:,:] = torch.from_numpy(bilinear) def save_grad(grads, name): def hook(grad): grads[name] = grad return hook ''' def save_grad(grads, name): def hook(grad): grads[name] = grad return hook import torch from channelnorm_package.modules.channelnorm import ChannelNorm model = ChannelNorm().cuda() grads = {} a = 100*torch.autograd.Variable(torch.randn((1,3,5,5)).cuda(), requires_grad=True) a.register_hook(save_grad(grads, 'a')) b = model(a) y = torch.mean(b) y.backward() ''' ================================================ FILE: dvs/flownet2/run.sh ================================================ #!/bin/bash python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \ --inference_dataset_root ./../video \ --resume ./FlowNet2_checkpoint.pth.tar \ --inference_visualize ================================================ FILE: dvs/flownet2/run_release.sh ================================================ #!/bin/bash python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \ --inference_dataset_root ./../dataset_release/test \ --resume ./FlowNet2_checkpoint.pth.tar \ --inference_visualize python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \ --inference_dataset_root ./../dataset_release/training \ --resume ./FlowNet2_checkpoint.pth.tar \ --inference_visualize ================================================ FILE: dvs/flownet2/utils/__init__.py ================================================ ================================================ FILE: dvs/flownet2/utils/flow_utils.py ================================================ import numpy as np import matplotlib.pyplot as plt import os.path TAG_CHAR = np.array([202021.25], np.float32) def readFlow(fn): """ Read .flo file in Middlebury format""" # Code adapted from: # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy # WARNING: this will work on little-endian architectures (eg Intel x86) only! # print 'fn = %s'%(fn) with open(fn, 'rb') as f: magic = np.fromfile(f, np.float32, count=1) if 202021.25 != magic: print('Magic number incorrect. Invalid .flo file') return None else: w = np.fromfile(f, np.int32, count=1) h = np.fromfile(f, np.int32, count=1) # print 'Reading %d x %d flo file\n' % (w, h) data = np.fromfile(f, np.float32, count=2*int(w)*int(h)) # Reshape data into 3D array (columns, rows, bands) # The reshape here is for visualization, the original code is (w,h,2) return np.resize(data, (int(h), int(w), 2)) def writeFlow(filename,uv,v=None): """ Write optical flow to file. If v is None, uv is assumed to contain both u and v channels, stacked in depth. Original code by Deqing Sun, adapted from Daniel Scharstein. """ nBands = 2 if v is None: assert(uv.ndim == 3) assert(uv.shape[2] == 2) u = uv[:,:,0] v = uv[:,:,1] else: u = uv assert(u.shape == v.shape) height,width = u.shape f = open(filename,'wb') # write the header f.write(TAG_CHAR) np.array(width).astype(np.int32).tofile(f) np.array(height).astype(np.int32).tofile(f) # arrange into matrix form tmp = np.zeros((height, width*nBands)) tmp[:,np.arange(width)*2] = u tmp[:,np.arange(width)*2 + 1] = v tmp.astype(np.float32).tofile(f) f.close() # ref: https://github.com/sampepose/flownet2-tf/ # blob/18f87081db44939414fc4a48834f9e0da3e69f4c/src/flowlib.py#L240 def visulize_flow_file(flow_filename, save_dir=None): flow_data = readFlow(flow_filename) img = flow2img(flow_data) # plt.imshow(img) # plt.show() if save_dir: idx = flow_filename.rfind("/") + 1 plt.imsave(os.path.join(save_dir, "%s-vis.png" % flow_filename[idx:-4]), img) def flow2img(flow_data): """ convert optical flow into color image :param flow_data: :return: color image """ # print(flow_data.shape) # print(type(flow_data)) u = flow_data[:, :, 0] v = flow_data[:, :, 1] UNKNOW_FLOW_THRESHOLD = 1e7 pr1 = abs(u) > UNKNOW_FLOW_THRESHOLD pr2 = abs(v) > UNKNOW_FLOW_THRESHOLD idx_unknown = (pr1 | pr2) u[idx_unknown] = v[idx_unknown] = 0 # get max value in each direction maxu = -999. maxv = -999. minu = 999. minv = 999. maxu = max(maxu, np.max(u)) maxv = max(maxv, np.max(v)) minu = min(minu, np.min(u)) minv = min(minv, np.min(v)) rad = np.sqrt(u ** 2 + v ** 2) maxrad = max(-1, np.max(rad)) u = u / maxrad + np.finfo(float).eps v = v / maxrad + np.finfo(float).eps img = compute_color(u, v) idx = np.repeat(idx_unknown[:, :, np.newaxis], 3, axis=2) img[idx] = 0 return np.uint8(img) def compute_color(u, v): """ compute optical flow color map :param u: horizontal optical flow :param v: vertical optical flow :return: """ height, width = u.shape img = np.zeros((height, width, 3)) NAN_idx = np.isnan(u) | np.isnan(v) u[NAN_idx] = v[NAN_idx] = 0 colorwheel = make_color_wheel() ncols = np.size(colorwheel, 0) rad = np.sqrt(u ** 2 + v ** 2) a = np.arctan2(-v, -u) / np.pi fk = (a + 1) / 2 * (ncols - 1) + 1 k0 = np.floor(fk).astype(int) k1 = k0 + 1 k1[k1 == ncols + 1] = 1 f = fk - k0 for i in range(0, np.size(colorwheel, 1)): tmp = colorwheel[:, i] col0 = tmp[k0 - 1] / 255 col1 = tmp[k1 - 1] / 255 col = (1 - f) * col0 + f * col1 idx = rad <= 1 col[idx] = 1 - rad[idx] * (1 - col[idx]) notidx = np.logical_not(idx) col[notidx] *= 0.75 img[:, :, i] = np.uint8(np.floor(255 * col * (1 - NAN_idx))) return img def make_color_wheel(): """ Generate color wheel according Middlebury color code :return: Color wheel """ RY = 15 YG = 6 GC = 4 CB = 11 BM = 13 MR = 6 ncols = RY + YG + GC + CB + BM + MR colorwheel = np.zeros([ncols, 3]) col = 0 # RY colorwheel[0:RY, 0] = 255 colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY)) col += RY # YG colorwheel[col:col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG)) colorwheel[col:col + YG, 1] = 255 col += YG # GC colorwheel[col:col + GC, 1] = 255 colorwheel[col:col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC)) col += GC # CB colorwheel[col:col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB)) colorwheel[col:col + CB, 2] = 255 col += CB # BM colorwheel[col:col + BM, 2] = 255 colorwheel[col:col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM)) col += + BM # MR colorwheel[col:col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR)) colorwheel[col:col + MR, 0] = 255 return colorwheel ================================================ FILE: dvs/flownet2/utils/frame_utils.py ================================================ import numpy as np from os.path import * from imageio import imread from . import flow_utils def read_gen(file_name): ext = splitext(file_name)[-1] if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg': im = imread(file_name) if im.shape[2] > 3: return im[:,:,:3] else: return im elif ext == '.bin' or ext == '.raw': return np.load(file_name) elif ext == '.flo': return flow_utils.readFlow(file_name).astype(np.float32) return [] ================================================ FILE: dvs/flownet2/utils/param_utils.py ================================================ import torch import torch.nn as nn import numpy as np def parse_flownetc(modules, weights, biases): keys = [ 'conv1', 'conv2', 'conv3', 'conv_redir', 'conv3_1', 'conv4', 'conv4_1', 'conv5', 'conv5_1', 'conv6', 'conv6_1', 'deconv5', 'deconv4', 'deconv3', 'deconv2', 'Convolution1', 'Convolution2', 'Convolution3', 'Convolution4', 'Convolution5', 'upsample_flow6to5', 'upsample_flow5to4', 'upsample_flow4to3', 'upsample_flow3to2', ] i = 0 for m in modules: if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): weight = weights[keys[i]].copy() bias = biases[keys[i]].copy() if keys[i] == 'conv1': m.weight.data[:,:,:,:] = torch.from_numpy(np.flip(weight, axis=1).copy()) m.bias.data[:] = torch.from_numpy(bias) else: m.weight.data[:,:,:,:] = torch.from_numpy(weight) m.bias.data[:] = torch.from_numpy(bias) i = i + 1 return def parse_flownets(modules, weights, biases, param_prefix='net2_'): keys = [ 'conv1', 'conv2', 'conv3', 'conv3_1', 'conv4', 'conv4_1', 'conv5', 'conv5_1', 'conv6', 'conv6_1', 'deconv5', 'deconv4', 'deconv3', 'deconv2', 'predict_conv6', 'predict_conv5', 'predict_conv4', 'predict_conv3', 'predict_conv2', 'upsample_flow6to5', 'upsample_flow5to4', 'upsample_flow4to3', 'upsample_flow3to2', ] for i, k in enumerate(keys): if 'upsample' in k: keys[i] = param_prefix + param_prefix + k else: keys[i] = param_prefix + k i = 0 for m in modules: if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): weight = weights[keys[i]].copy() bias = biases[keys[i]].copy() if keys[i] == param_prefix+'conv1': m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy()) m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy()) m.weight.data[:,6:9,:,:] = torch.from_numpy(np.flip(weight[:,6:9,:,:], axis=1).copy()) m.weight.data[:,9::,:,:] = torch.from_numpy(weight[:,9:,:,:].copy()) if m.bias is not None: m.bias.data[:] = torch.from_numpy(bias) else: m.weight.data[:,:,:,:] = torch.from_numpy(weight) if m.bias is not None: m.bias.data[:] = torch.from_numpy(bias) i = i + 1 return def parse_flownetsonly(modules, weights, biases, param_prefix=''): keys = [ 'conv1', 'conv2', 'conv3', 'conv3_1', 'conv4', 'conv4_1', 'conv5', 'conv5_1', 'conv6', 'conv6_1', 'deconv5', 'deconv4', 'deconv3', 'deconv2', 'Convolution1', 'Convolution2', 'Convolution3', 'Convolution4', 'Convolution5', 'upsample_flow6to5', 'upsample_flow5to4', 'upsample_flow4to3', 'upsample_flow3to2', ] for i, k in enumerate(keys): if 'upsample' in k: keys[i] = param_prefix + param_prefix + k else: keys[i] = param_prefix + k i = 0 for m in modules: if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): weight = weights[keys[i]].copy() bias = biases[keys[i]].copy() if keys[i] == param_prefix+'conv1': # print ("%s :"%(keys[i]), m.weight.size(), m.bias.size(), tf_w[keys[i]].shape[::-1]) m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy()) m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy()) if m.bias is not None: m.bias.data[:] = torch.from_numpy(bias) else: m.weight.data[:,:,:,:] = torch.from_numpy(weight) if m.bias is not None: m.bias.data[:] = torch.from_numpy(bias) i = i + 1 return def parse_flownetsd(modules, weights, biases, param_prefix='netsd_'): keys = [ 'conv0', 'conv1', 'conv1_1', 'conv2', 'conv2_1', 'conv3', 'conv3_1', 'conv4', 'conv4_1', 'conv5', 'conv5_1', 'conv6', 'conv6_1', 'deconv5', 'deconv4', 'deconv3', 'deconv2', 'interconv5', 'interconv4', 'interconv3', 'interconv2', 'Convolution1', 'Convolution2', 'Convolution3', 'Convolution4', 'Convolution5', 'upsample_flow6to5', 'upsample_flow5to4', 'upsample_flow4to3', 'upsample_flow3to2', ] for i, k in enumerate(keys): keys[i] = param_prefix + k i = 0 for m in modules: if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): weight = weights[keys[i]].copy() bias = biases[keys[i]].copy() if keys[i] == param_prefix+'conv0': m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy()) m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy()) if m.bias is not None: m.bias.data[:] = torch.from_numpy(bias) else: m.weight.data[:,:,:,:] = torch.from_numpy(weight) if m.bias is not None: m.bias.data[:] = torch.from_numpy(bias) i = i + 1 return def parse_flownetfusion(modules, weights, biases, param_prefix='fuse_'): keys = [ 'conv0', 'conv1', 'conv1_1', 'conv2', 'conv2_1', 'deconv1', 'deconv0', 'interconv1', 'interconv0', '_Convolution5', '_Convolution6', '_Convolution7', 'upsample_flow2to1', 'upsample_flow1to0', ] for i, k in enumerate(keys): keys[i] = param_prefix + k i = 0 for m in modules: if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): weight = weights[keys[i]].copy() bias = biases[keys[i]].copy() if keys[i] == param_prefix+'conv0': m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy()) m.weight.data[:,3::,:,:] = torch.from_numpy(weight[:,3:,:,:].copy()) if m.bias is not None: m.bias.data[:] = torch.from_numpy(bias) else: m.weight.data[:,:,:,:] = torch.from_numpy(weight) if m.bias is not None: m.bias.data[:] = torch.from_numpy(bias) i = i + 1 return ================================================ FILE: dvs/flownet2/utils/tools.py ================================================ # freda (todo) : import os, time, sys, math import subprocess, shutil from os.path import * import numpy as np from inspect import isclass from pytz import timezone from datetime import datetime import inspect import torch def datestr(): pacific = timezone('US/Pacific') now = datetime.now(pacific) return '{}{:02}{:02}_{:02}{:02}'.format(now.year, now.month, now.day, now.hour, now.minute) def module_to_dict(module, exclude=[]): return dict([(x, getattr(module, x)) for x in dir(module) if isclass(getattr(module, x)) and x not in exclude and getattr(module, x) not in exclude]) class TimerBlock: def __init__(self, title): print(("{}".format(title))) def __enter__(self): self.start = time.clock() return self def __exit__(self, exc_type, exc_value, traceback): self.end = time.clock() self.interval = self.end - self.start if exc_type is not None: self.log("Operation failed\n") else: self.log("Operation finished\n") def log(self, string): duration = time.clock() - self.start units = 's' if duration > 60: duration = duration / 60. units = 'm' print((" [{:.3f}{}] {}".format(duration, units, string))) def log2file(self, fid, string): fid = open(fid, 'a') fid.write("%s\n"%(string)) fid.close() def add_arguments_for_module(parser, module, argument_for_class, default, skip_params=[], parameter_defaults={}): argument_group = parser.add_argument_group(argument_for_class.capitalize()) module_dict = module_to_dict(module) argument_group.add_argument('--' + argument_for_class, type=str, default=default, choices=list(module_dict.keys())) args, unknown_args = parser.parse_known_args() class_obj = module_dict[vars(args)[argument_for_class]] argspec = inspect.getargspec(class_obj.__init__) defaults = argspec.defaults[::-1] if argspec.defaults else None args = argspec.args[::-1] for i, arg in enumerate(args): cmd_arg = '{}_{}'.format(argument_for_class, arg) if arg not in skip_params + ['self', 'args']: if arg in list(parameter_defaults.keys()): argument_group.add_argument('--{}'.format(cmd_arg), type=type(parameter_defaults[arg]), default=parameter_defaults[arg]) elif (defaults is not None and i < len(defaults)): argument_group.add_argument('--{}'.format(cmd_arg), type=type(defaults[i]), default=defaults[i]) else: print(("[Warning]: non-default argument '{}' detected on class '{}'. This argument cannot be modified via the command line" .format(arg, module.__class__.__name__))) # We don't have a good way of dealing with inferring the type of the argument # TODO: try creating a custom action and using ast's infer type? # else: # argument_group.add_argument('--{}'.format(cmd_arg), required=True) def kwargs_from_args(args, argument_for_class): argument_for_class = argument_for_class + '_' return {key[len(argument_for_class):]: value for key, value in list(vars(args).items()) if argument_for_class in key and key != argument_for_class + 'class'} def format_dictionary_of_losses(labels, values): try: string = ', '.join([('{}: {:' + ('.3f' if value >= 0.001 else '.1e') +'}').format(name, value) for name, value in zip(labels, values)]) except (TypeError, ValueError) as e: print((list(zip(labels, values)))) string = '[Log Error] ' + str(e) return string class IteratorTimer(): def __init__(self, iterable): self.iterable = iterable self.iterator = self.iterable.__iter__() def __iter__(self): return self def __len__(self): return len(self.iterable) def __next__(self): start = time.time() n = next(self.iterator) self.last_duration = (time.time() - start) return n next = __next__ def gpumemusage(): gpu_mem = subprocess.check_output("nvidia-smi | grep MiB | cut -f 3 -d '|'", shell=True).replace(' ', '').replace('\n', '').replace('i', '') all_stat = [float(a) for a in gpu_mem.replace('/','').split('MB')[:-1]] gpu_mem = '' for i in range(len(all_stat)/2): curr, tot = all_stat[2*i], all_stat[2*i+1] util = "%1.2f"%(100*curr/tot)+'%' cmem = str(int(math.ceil(curr/1024.)))+'GB' gmem = str(int(math.ceil(tot/1024.)))+'GB' gpu_mem += util + '--' + join(cmem, gmem) + ' ' return gpu_mem def update_hyperparameter_schedule(args, epoch, global_iteration, optimizer): if args.schedule_lr_frequency > 0: for param_group in optimizer.param_groups: if (global_iteration + 1) % args.schedule_lr_frequency == 0: param_group['lr'] /= float(args.schedule_lr_fraction) param_group['lr'] = float(np.maximum(param_group['lr'], 0.000001)) def save_checkpoint(state, is_best, path, prefix, filename='checkpoint.pth.tar'): prefix_save = os.path.join(path, prefix) name = prefix_save + '_' + filename torch.save(state, name) if is_best: shutil.copyfile(name, prefix_save + '_model_best.pth.tar') ================================================ FILE: dvs/gyro/__init__.py ================================================ from .gyro_function import ( GetGyroAtTimeStamp, QuaternionProduct, QuaternionReciprocal, ConvertQuaternionToAxisAngle, FindOISAtTimeStamp, GetMetadata, GetProjections, GetVirtualProjection, GetForwardGrid, CenterZoom, GetWarpingFlow, torch_norm_quat, torch_QuaternionProduct, torch_QuaternionReciprocal, torch_GetVirtualProjection, get_static, torch_GetForwardGrid, torch_GetWarpingFlow, train_GetGyroAtTimeStamp, train_ConvertQuaternionToAxisAngle, ConvertAxisAngleToQuaternion, torch_ConvertAxisAngleToQuaternion, torch_ConvertQuaternionToAxisAngle, ConvertAxisAngleToQuaternion_no_angle, ConvertQuaternionToAxisAngle_no_angle, torch_GetHomographyTransformFromProjections, torch_ApplyTransform, norm_quat, SlerpWithDefault ) from .gyro_io import ( LoadGyroData, LoadOISData, LoadFrameData, LoadStabResult, get_grid, get_rotations, visual_rotation ) ================================================ FILE: dvs/gyro/gyro_function.py ================================================ import numpy as np from numpy import linalg as LA import matplotlib.pyplot as plt import torch from torch.autograd import Variable def get_static(height = 1080, width = 1920, ratio = 0.1): static_options = {} static_options["active_array_width"] = 4032 static_options["active_array_height"] = 3024 static_options["crop_window_width"] = 4032 static_options["crop_window_height"] = 2272 static_options["num_grid_rows"] = 12 static_options["num_grid_cols"] = 12 static_options["dim_homography"] = 9 static_options["width"] = width # frame width. static_options["height"] = height # frame height # static_options["fov"] = 1.27 # sensor_width/sensor_focal_length static_options["cropping_ratio"] = 0.0 #ratio # normalized cropping ratio at each side. return static_options # Quaternion: [x, y, z, w] def norm_quat(quat): norm_quat = LA.norm(quat) if norm_quat > 1e-6: quat = quat / norm_quat # [0 norm_quat norm_quat - 1e-6] else: # print('bad len for Reciprocal') quat = np.array([0,0,0,1]) return quat def torch_norm_quat(quat, USE_CUDA = True): # Method 1: batch_size = quat.size()[0] quat_out = Variable(torch.zeros((batch_size, 4), requires_grad=True)) if USE_CUDA == True: quat_out = quat_out.cuda() for i in range(batch_size): norm_quat = torch.norm(quat[i]) if norm_quat > 1e-6: quat_out[i] = quat[i] / norm_quat # [0 norm_quat norm_quat - 1e-6] else: quat_out[i,:3] = quat[i,:3] * 0 quat_out[i,3] = quat[i,3] / quat[i,3] # Method 2: # quat = quat / (torch.unsqueeze(torch.norm(quat, dim = 1), 1) + 1e-6) # check norm return quat_out def ConvertAxisAngleToQuaternion(axis, angle): if LA.norm(axis) > 1e-6 and angle > 1e-6: axis = axis/LA.norm(axis) half_angle = angle*0.5 sin_half_angle = np.sin(half_angle) quat = np.array([sin_half_angle* axis[0], sin_half_angle* axis[1], sin_half_angle* axis[2], np.cos(half_angle)]) return norm_quat(quat) def ConvertAxisAngleToQuaternion_no_angle(axis): angle = LA.norm(axis) if LA.norm(axis) > 1e-6: axis = axis/LA.norm(axis) half_angle = angle*0.5 sin_half_angle = np.sin(half_angle) quat = np.array([sin_half_angle* axis[0], sin_half_angle* axis[1], sin_half_angle* axis[2], np.cos(half_angle)]) return norm_quat(quat) def torch_ConvertAxisAngleToQuaternion(axis, USE_CUDA = True): batch_size = axis.size()[0] angle = torch.norm(axis[:,:3], dim = 1) half_angle = angle * 0.5 sin_half_angle = torch.sin(half_angle) quats = Variable(torch.zeros((batch_size, 4), requires_grad=True)) norm_axis = axis[:,:3] * 1 if USE_CUDA: quats = quats.cuda() for i in range(batch_size): if angle[i] > 1e-6: norm_axis[i] = axis[i,:3]/angle[i] quats[:, :3] = sin_half_angle * norm_axis quats[:, 3] = torch.cos(half_angle) return torch_norm_quat(quats) def ConvertQuaternionToAxisAngle(quat): quat = quat/LA.norm(quat) axis_norm = LA.norm(quat[0:3]) axis = np.array([0.0, 0.0, 0.0]) if axis_norm < 1e-6: angle = 0 else: axis_norm_reciprocal = 1/axis_norm axis[0] = quat[0] * axis_norm_reciprocal axis[1] = quat[1] * axis_norm_reciprocal axis[2] = quat[2] * axis_norm_reciprocal angle = 2 * np.arccos(quat[3]) return [axis, angle] def ConvertQuaternionToAxisAngle_no_angle(quat): quat = quat/LA.norm(quat) axis_norm = LA.norm(quat[0:3]) axis = np.array([0.0, 0.0, 0.0]) if axis_norm > 1e-6: axis_norm_reciprocal = 1 / axis_norm * 2 * np.arccos(quat[3]) axis[0] = quat[0] * axis_norm_reciprocal axis[1] = quat[1] * axis_norm_reciprocal axis[2] = quat[2] * axis_norm_reciprocal return axis def torch_ConvertQuaternionToAxisAngle(quat, USE_CUDA = True): batch_size = quat.size()[0] axis_angle = Variable(torch.zeros((batch_size, 4), requires_grad=True)) if USE_CUDA: axis_angle = axis_angle.cuda() for i in range(batch_size): axis_norm = torch.norm(quat[i, 0:3]) if axis_norm > 1e-6: axis_norm_reciprocal = 1/axis_norm * 2 * torch.acos(quat[i,3]) axis_angle[i,0] = quat[i,0] * axis_norm_reciprocal axis_angle[i,1] = quat[i,1] * axis_norm_reciprocal axis_angle[i,2] = quat[i,2] * axis_norm_reciprocal return axis_angle def train_ConvertQuaternionToAxisAngle(quat): out = np.zeros(4) out[:3] = ConvertQuaternionToAxisAngle_no_angle(quat) return out def AngularVelocityToQuat(angular_v, dt): length = LA.norm(angular_v) if length < 1e-6: angular_v = np.array([1, 0, 0]) print('bad length') else: angular_v = angular_v/length quat = ConvertAxisAngleToQuaternion(angular_v, length*dt) return quat def QuaternionProduct(q1, q2): x1 = q1[0] y1 = q1[1] z1 = q1[2] w1 = q1[3] x2 = q2[0] y2 = q2[1] z2 = q2[2] w2 = q2[3] quat = np.zeros(4) quat[3] = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 quat[0] = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 quat[1] = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 quat[2] = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 return norm_quat(quat) def torch_QuaternionProduct(q1, q2, USE_CUDA = True): x1 = q1[:,0] y1 = q1[:,1] z1 = q1[:,2] w1 = q1[:,3] x2 = q2[:,0] y2 = q2[:,1] z2 = q2[:,2] w2 = q2[:,3] batch_size = q1.size()[0] quat = Variable(torch.zeros((batch_size, 4), requires_grad=True)) if USE_CUDA == True: quat = quat.cuda() quat[:,3] = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 quat[:,0] = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 quat[:,1] = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 quat[:,2] = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 quat = torch_norm_quat(quat) return quat def ProcessGyroRotation(gyro_data): num_inputs = np.shape(gyro_data)[0] quats = np.zeros((num_inputs, 4)) quats[0,:] = np.array([0, 0, 0, 1]) for i in range(1, num_inputs): dt = (gyro_data[i, 0] - gyro_data[i-1, 0])*1e-9 quat = AngularVelocityToQuat(gyro_data[i, 1:4], dt) quats[i,:] = QuaternionProduct(quat, quats[i-1,:]) # R_t = delta R_t * R_t-1 quats[i,:] = quats[i,:] / LA.norm(quats[i,:]) return quats def QuaternionReciprocal(q): quat = np.array([-q[0], -q[1], -q[2], q[3]]) return norm_quat(quat) def torch_QuaternionReciprocal(q, USE_CUDA = True): quat = torch.cat((-q[:,0:1], -q[:,1:2], -q[:,2:3], q[:,3:]), dim = 1) batch_size = quat.size()[0] quat = torch_norm_quat(quat) return quat def ProcessGyroData(gyro_data): quats = ProcessGyroRotation(gyro_data) size = np.shape(gyro_data)[0] axis_dif = np.zeros((size,3)) for i in range(1, size): quat_dif = QuaternionProduct(quats[i,:], QuaternionReciprocal(quats[i-1,:])) [axis_dif_cur, angles_cur] = ConvertQuaternionToAxisAngle(quat_dif) axis_dif[i,:] = axis_dif_cur*angles_cur return [axis_dif, quats] def SlerpWithDefault(q1, q2, t, q_default): t = max(min(t, 1.0), 0.0) kEpsilon = 1e-6 kSlerpLinearThresh = 0.9995 q1 = q1/LA.norm(q1) q2 = q2/LA.norm(q2) if t < kEpsilon: q3 = q1 return q3 elif t > 1-kEpsilon: q3 = q2 return q3 dot_prodcut = np.sum(q1*q2) if abs(dot_prodcut) >= 1: q3= q_default return q3 elif abs(dot_prodcut) > kSlerpLinearThresh: q3 = q1*(1-t) + q2*t q3 = q3/LA.norm(q3) return q3 sign = 1 if dot_prodcut < 0: sign = -1 dot_prodcut = -dot_prodcut theta = np.arccos(dot_prodcut) sin_theta = np.sin(theta) inv_sin_theta = 1.0 / sin_theta coeff1 = np.sin((1.0 - t) * theta) * inv_sin_theta coeff2 = sign * np.sin(t * theta) * inv_sin_theta q3 = q1 * coeff1 + q2 * coeff2 return q3 def GetGyroAtTimeStamp(gyro_data, timestamp): z = np.array([0,0,0,1]) if len(gyro_data) >= 2 and (not(timestamp < gyro_data[0,0] or timestamp > gyro_data[-1, 0])): ind = np.where(gyro_data[:,0] >= timestamp) ind = np.squeeze( ind, axis = 0) if gyro_data[ind[0], 0] == timestamp: z = gyro_data[ind[0],1:] else: start_index = ind[0] -1 end_index = ind[0] ratio = (timestamp - gyro_data[start_index,0])/(gyro_data[end_index,0]-gyro_data[start_index,0]) z = SlerpWithDefault(gyro_data[start_index,1:], gyro_data[end_index, 1:], ratio, gyro_data[start_index,1:]) z = z / (LA.norm(z) + 1e-6) return z def train_GetGyroAtTimeStamp(gyro_data, timestamp, check = False): if len(gyro_data) >= 2 and (not(timestamp < gyro_data[0,0] or timestamp > gyro_data[-1, 0])): ind = np.where(gyro_data[:,0] >= timestamp) ind = np.squeeze( ind, axis = 0) if gyro_data[ind[0], 0] == timestamp: z = gyro_data[ind[0],1:] else: start_index = ind[0] -1 end_index = ind[0] ratio = (timestamp - gyro_data[start_index,0])/(gyro_data[end_index,0]-gyro_data[start_index,0]) z = SlerpWithDefault(gyro_data[start_index,1:], gyro_data[end_index, 1:], ratio, gyro_data[start_index,1:]) return z / (LA.norm(z) + 1e-6) if check: print("bad value") return None def FindOISAtTimeStamp(ois_log, time): ois_time = ois_log[:,2] if time <= ois_time[0]: ois_data = ois_log[0, 0:2] elif time > ois_time[-1]: ois_data = ois_log[-1, 0:2] else: ind = np.where(ois_time >= time) ind = np.squeeze( ind, axis = 0) first_ind = ind[0] if ois_time[first_ind] == ind[0]: ois_data = ois_log[first_ind, 0:2] else: cur_time = ois_time[first_ind] last_timestamp = ois_time[first_ind - 1] ratio = (time - last_timestamp) / (cur_time - last_timestamp) ois_data = ois_log[first_ind - 1,0:2] * (1-ratio) + ois_log[first_ind,0:2]*ratio return ois_data def GetMetadata(frame_data, frame_index, result_poses = {} ): # global static_options # We can just use 1.27 as fov and virtual fov for videos in the data set. metadata = {} metadata["frame_id"] = frame_index metadata["timestamp_ns"] = frame_data[frame_index, 0] metadata["timestamp_ois_ns"] = frame_data[frame_index, 4] metadata["rs_time_ns"] = frame_data[frame_index, 3] if "real fov" in result_poses: metadata["fov"] = result_poses['real fov'][frame_index,:] else: metadata["fov"] = 1.27 if "virtual fov" in result_poses: metadata["virtual_fov"] = result_poses['virtual fov'][frame_index,:] else: metadata["virtual_fov"] = 1.27 return metadata def GetProjections(static_options, metadata, quats_data, ois_data, no_shutter = False): num_rows = static_options["num_grid_rows"] real_projections = [] for i in range(num_rows): if no_shutter: timestmap_ns = metadata["timestamp_ns"] + metadata["rs_time_ns"] * 0.5 timestamp_ois_ns = metadata["timestamp_ois_ns"] + metadata["rs_time_ns"] * 0.5 else: timestmap_ns = metadata["timestamp_ns"] + metadata["rs_time_ns"] * i / (num_rows-1) timestamp_ois_ns = metadata["timestamp_ois_ns"] + metadata["rs_time_ns"] * i / (num_rows-1) real_projections.append(GetRealProjection( static_options, quats_data, ois_data, metadata["fov"], timestmap_ns, timestamp_ois_ns)) return real_projections def GetRealProjection(static_options, quats_data, ois_data, fov, timestamp_ns, timestamp_ois_ns): quat = GetGyroAtTimeStamp(quats_data, timestamp_ns) ois_offset = FindOISAtTimeStamp(ois_data, timestamp_ois_ns) # ois is w.r.t. active array size, thus we need to convert it to normalzied space. ois_offset = np.array(ois_offset) / np.array([static_options["crop_window_width"], static_options["crop_window_height"]]) projection = GetProjectionHomography(quat, fov, ois_offset, static_options["width"], static_options["height"]) return projection def GetProjectionHomography(rot, fov, offset, width, height): # rot: rotation in quaternion # fov: sensor_width / focal_length. # offset: additional ois offset at normalized domain. # width/height: frame size. focal_length = width / fov rotation = ConvertQuaternionToRotationMatrix(rot) intrinsics = GetIntrinsics(focal_length, offset, width, height) projection_homography = np.matmul(intrinsics, rotation) return projection_homography def torch_GetProjectionHomography(rot, fov, width, height, USE_CUDA = True): # rot: rotation in quaternion # fov: sensor_width / focal_length. # offset: additional ois offset at normalized domain. # width/height: frame size. focal_length = width / fov rotation = torch_ConvertQuaternionToRotationMatrix(rot) batch_size = rotation.size()[0] offset = np.array([0,0]) intrinsics = GetIntrinsics(focal_length, offset, width, height) intrinsics = torch.Tensor(np.repeat(np.expand_dims(intrinsics, axis = 0), batch_size, axis = 0)) if USE_CUDA == True: intrinsics = intrinsics.cuda() projection_homography = torch.matmul(intrinsics, rotation) return projection_homography def ConvertQuaternionToRotationMatrix(quat): x = quat[0] y = quat[1] z = quat[2] w = quat[3] rotation = np.zeros(9) rotation[0] = 1 - 2 * y * y - 2 * z * z rotation[1] = 2 * x * y - 2 * z * w rotation[2] = 2 * x * z + 2 * y * w rotation[3] = 2 * x * y + 2 * z * w rotation[4] = 1 - 2 * x * x - 2 * z * z rotation[5] = 2 * y * z - 2 * x * w rotation[6] = 2 * x * z - 2 * y * w rotation[7] = 2 * y * z + 2 * x * w rotation[8] = 1 - 2 * x * x - 2 * y * y rotation = np.reshape(rotation, (3, 3)) # Note reshape is different with matlab return rotation def torch_ConvertQuaternionToRotationMatrix(quat, USE_CUDA = True): x = quat[:,0] y = quat[:,1] z = quat[:,2] w = quat[:,3] batch_size = quat.size()[0] rotation = Variable(torch.zeros((batch_size, 9), requires_grad=True)) if USE_CUDA == True: rotation = rotation.cuda() rotation[:,0] = 1 - 2 * y * y - 2 * z * z rotation[:,1] = 2 * x * y - 2 * z * w rotation[:,2] = 2 * x * z + 2 * y * w rotation[:,3] = 2 * x * y + 2 * z * w rotation[:,4] = 1 - 2 * x * x - 2 * z * z rotation[:,5] = 2 * y * z - 2 * x * w rotation[:,6] = 2 * x * z - 2 * y * w rotation[:,7] = 2 * y * z + 2 * x * w rotation[:,8] = 1 - 2 * x * x - 2 * y * y rotation = rotation.view(batch_size, 3, 3) # Note reshape is different with matlab return rotation def ConvertRotationMatrixToQuaternion(m): tr = m[0,0] + m[1,1] + m[2,2] if tr > 0 : S = 2 * (tr+1.0)**0.5 qw = 0.25 * S qx = (m[2,1] - m[1,2]) / S qy = (m[0,2] - m[2,0]) / S qz = (m[1,0] - m[0,1]) / S elif m[0,0] > m[1,1] and m[0,0] > m[2,2]: S = 2* (1.0 + m[0,0] - m[1,1] - m[2,2]) ** 0.5 qw = (m[2,1] - m[1,2]) / S qx = 0.25 * S qy = (m[0,1] + m[1,0]) / S qz = (m[0,2] + m[2,0]) / S elif m[1,1] > m[2,2]: S = 2* (1.0 - m[0,0] + m[1,1] - m[2,2]) ** 0.5 qw = (m[0,2] - m[2,0]) / S qx = (m[0,1] + m[1,0]) / S qy = 0.25 * S qz = (m[1,2] + m[2,1]) / S else: S = 2* (1.0 - m[0,0] - m[1,1] + m[2,2]) ** 0.5 qw = (m[1,0] - m[0,1]) / S qx = (m[0,2] + m[2,0]) / S qy = (m[1,2] + m[2,1]) / S qz = 0.25 * S return np.array([qx,qy,qz,qw]) def GetIntrinsics(focal_length, offset, width, height): intrinsics = [ [float(focal_length), 0.0, 0.5*(width-1)+offset[0]*width], [0.0, float(focal_length), 0.5*(height-1)+offset[1]*height], [0.0, 0.0, 1.0] ] return np.array(intrinsics) def GetVirtualProjection(static_options, result_pose, metadata, frame_index): # debug only, for getting results and references for comparisons. quat = result_pose['virtual pose'][frame_index,:] if 'vitual lens offset' in result_pose: virutal_lens_offset = result_pose['vitual lens offset'][frame_index,:] else: virutal_lens_offset = np.array([0,0]) virtual_projection = GetProjectionHomography( quat, metadata["virtual_fov"], virutal_lens_offset, static_options["width"], static_options["height"]) return virtual_projection def torch_GetVirtualProjection(static_options, quat, virtual_fov = 1.27): virtual_projection = torch_GetProjectionHomography( quat, virtual_fov, static_options["width"], static_options["height"]) return virtual_projection def GetForwardGrid(static_options, real_projections, virtual_projection): # real_projections: a set of 3x3 projections. # virtual_projection: a single 3x3 projection. grid = np.zeros((4, static_options["num_grid_cols"], static_options["num_grid_rows"])) width = static_options["width"] height = static_options["height"] row_step = 1/ (static_options["num_grid_rows"] - 1) col_step = 1/ (static_options["num_grid_cols"] - 1) for i in range(static_options["num_grid_rows"]): transform = GetHomographyTransformFromProjections(real_projections[i], virtual_projection) v = i * row_step for j in range(static_options["num_grid_cols"]): u = j * col_step point = np.array([u * width, v * height, 1]).T warped_point = ApplyTransform(transform, point) warped_point = warped_point / np.array([width, height, 1]) # normalize grid[:, j, i] = np.array([warped_point[0], warped_point[1], u, v]) return grid def torch_GetForwardGrid(static_options, real_projections, virtual_projection, USE_CUDA = True): # real_projections: a set of 3x3 projections. # virtual_projection: a single 3x3 projection. batch_size = real_projections.size()[0] grid = torch.zeros((batch_size, 4, static_options["num_grid_cols"], static_options["num_grid_rows"])) if USE_CUDA: grid = grid.cuda() width = static_options["width"] height = static_options["height"] row_step = 1/ (static_options["num_grid_rows"] - 1) col_step = 1/ (static_options["num_grid_cols"] - 1) for i in range(static_options["num_grid_rows"]): transform = torch_GetHomographyTransformFromProjections(real_projections[:, i], virtual_projection) v = i * row_step for j in range(static_options["num_grid_cols"]): u = j * col_step point = torch.Tensor([u * width, v * height, 1]) norm = torch.Tensor([width, height, 1]) if USE_CUDA == True: point = point.cuda() norm = norm.cuda() warped_point = torch_ApplyTransform(transform, point) warped_point = warped_point / norm # normalize grid[:, 0, j, i] = warped_point[:,0] grid[:, 1, j, i] = warped_point[:,1] grid[:, 2, j, i] = u grid[:, 3, j, i] = v return grid def GetWarpingFlow(real_projections_src, real_projections_dst, num_rows, num_cols, frame_width, frame_height): # num_rows: rows of the flow. # num_cols: cols of the flow. grid = np.zeros((4, num_cols, num_rows)) row_step = 1/ (num_rows - 1) col_step = 1/ (num_cols - 1) for i in range(num_rows): transform = GetHomographyTransformFromProjections(real_projections_src[i], real_projections_dst[i]) v = i * row_step for j in range(num_cols): u = j * col_step point = np.array([u * frame_width, v * frame_height, 1]).T warped_point = ApplyTransform(transform, point) warped_point = warped_point / np.array([frame_width, frame_height, 1]) # normalize grid[:, j, i] = np.array([warped_point[0], warped_point[1], u, v]) return grid def torch_GetWarpingFlow(static_options, real_projections_src, real_projections_dst, USE_CUDA = True): # real_projections: a set of 3x3 projections. # virtual_projection: a single 3x3 projection. batch_size = real_projections_src.size()[0] grid = torch.zeros((batch_size, 4, static_options["num_grid_cols"], static_options["num_grid_rows"])) if USE_CUDA: grid = grid.cuda() width = static_options["width"] height = static_options["height"] row_step = 1/ (static_options["num_grid_rows"] - 1) col_step = 1/ (static_options["num_grid_cols"] - 1) for i in range(static_options["num_grid_rows"]): transform = torch_GetHomographyTransformFromProjections(real_projections_src[:, i], real_projections_dst[:, i]) v = i * row_step for j in range(static_options["num_grid_cols"]): u = j * col_step point = torch.Tensor([u * width, v * height, 1]) norm = torch.Tensor([width, height, 1]) if USE_CUDA == True: point = point.cuda() norm = norm.cuda() warped_point = torch_ApplyTransform(transform, point) warped_point = warped_point / norm # normalize grid[:, 0, j, i] = warped_point[:,0] grid[:, 1, j, i] = warped_point[:,1] grid[:, 2, j, i] = u grid[:, 3, j, i] = v return grid def GetHomographyTransformFromProjections(proj_src, proj_dst): return np.matmul(proj_dst, LA.inv(proj_src)) def torch_GetHomographyTransformFromProjections(proj_src, proj_dst): return torch.matmul(proj_dst, torch.inverse(proj_src)) def ApplyTransform(transform, point): # Warps a 2D point ([x y 1]) using a homography transform. # Returns the warped 2D point ([warped_x, warped_y, 1]). z = np.matmul(transform, point) z = z / z[2] return z def torch_ApplyTransform(transform, point): # Warps a 2D point ([x y 1]) using a homography transform. # Returns the warped 2D point ([warped_x, warped_y, 1]). z = torch.matmul(transform, point) z = z / z[:,2:] return z def CenterZoom(grid, ratio): grid[:, 0:2, :, :] = (grid[:, 0:2, :, :] - 0.5) * ratio + 0.5 return grid ================================================ FILE: dvs/gyro/gyro_io.py ================================================ import numpy as np from numpy import linalg as LA import matplotlib.pyplot as plt import scipy.io as sio from .gyro_function import ( ProcessGyroData, QuaternionProduct, QuaternionReciprocal, ConvertQuaternionToAxisAngle, FindOISAtTimeStamp, GetMetadata, GetProjections, GetVirtualProjection, GetForwardGrid, CenterZoom, GetGyroAtTimeStamp, get_static, ConvertAxisAngleToQuaternion, ConvertAxisAngleToQuaternion_no_angle, ConvertQuaternionToAxisAngle_no_angle ) def load_gyro_mesh(input_name): data = LoadStabResult(input_name) w, h = data["vertex_grid_size"][0] data["warping grid"] = np.reshape(data["warping grid"],(-1,int(w),int(h),4)) return data def get_grid(static_options, frame_data, quats_data, ois_data, virtual_data, no_shutter = False): grid = [] result_poses = {} result_poses['virtual pose'] = virtual_data for i in range(len(virtual_data)): metadata = GetMetadata(frame_data, i) real_projections = GetProjections(static_options, metadata, quats_data, ois_data, no_shutter = no_shutter) virtual_projection = GetVirtualProjection(static_options, result_poses, metadata, i) grid.append(GetForwardGrid(static_options, real_projections, virtual_projection)) grid = np.array(grid) zoom_ratio = 1 / (1 - 2 * static_options["cropping_ratio"]) curr_grid = CenterZoom(grid, zoom_ratio) curr_grid = np.transpose(curr_grid,(0,3,2,1)) return curr_grid def get_rotations(frame_data, quats_data, ois_data, num_frames): quats = np.zeros((num_frames, 4)) for i in range(num_frames): quats[i,:] = GetGyroAtTimeStamp(quats_data, frame_data[i,0]) rotations = np.zeros((num_frames,3)) lens_offsets = np.zeros((num_frames, 2)) for i in range(num_frames): if i != 0: quat_dif = QuaternionProduct(quats[i,:], QuaternionReciprocal(quats[i-1,:])) axis_dif_cur = ConvertQuaternionToAxisAngle_no_angle(quat_dif) rotations[i,:] = axis_dif_cur lens_offsets[i,:] = FindOISAtTimeStamp(ois_data, frame_data[i, 4]) return rotations, lens_offsets def visual_rotation(rotations_real, lens_offsets_real, rotations_virtual, lens_offsets_virtual, rotations_virtual2, lens_offsets_virtual2, path): # figure('units','normalized','outerposition',[0 0 1 1]) plt.clf() plt.figure(figsize=(8,16)) plt.subplot(5,1,1) plt.plot(rotations_real[:,0], "g") if rotations_virtual is not None: plt.plot(rotations_virtual[:,0], "b") if rotations_virtual2 is not None: plt.plot(rotations_virtual2[:,0], "r") plt.ylim(-0.02, 0.02) plt.xlabel('frame id') plt.ylabel('gyro x') plt.subplot(5,1,2) plt.plot(rotations_real[:,1], "g") if rotations_virtual is not None: plt.plot(rotations_virtual[:,1], "b") if rotations_virtual2 is not None: plt.plot(rotations_virtual2[:,1], "r") plt.ylim(-0.02, 0.02) plt.xlabel('frame id') plt.ylabel('gyro y') plt.subplot(5,1,3) plt.plot(rotations_real[:,2], "g") if rotations_virtual is not None: plt.plot(rotations_virtual[:,2], "b") if rotations_virtual2 is not None: plt.plot(rotations_virtual2[:,2], "r") plt.ylim(-0.02, 0.02) plt.xlabel('frame id') plt.ylabel('gyro z') plt.subplot(5,1,4) plt.plot(lens_offsets_real[:,0], "g") if lens_offsets_virtual is not None: plt.plot(lens_offsets_virtual[:,0], "b") if rotations_virtual2 is not None: plt.plot(lens_offsets_virtual2[:,0], "r") plt.xlabel('frame id') plt.ylabel('ois x') plt.subplot(5,1,5) plt.plot(lens_offsets_real[:,1], "g") if lens_offsets_virtual is not None: plt.plot(lens_offsets_virtual[:,1], "b") if rotations_virtual2 is not None: plt.plot(lens_offsets_virtual2[:,1], "r") plt.xlabel('frame id') plt.ylabel('ois y') plt.savefig(path[:-4]+".jpg") return def LoadOISData(ois_name): ois_log = np.loadtxt(ois_name) ois_log = ois_log[:, -3:] return ois_log def LoadFrameData(frame_log_name): frame_data = np.loadtxt(frame_log_name) frame_data[:, [0,4]] = frame_data[:, [0,4]] - np.expand_dims(frame_data[:,1]/2, axis = 1) return frame_data def LoadGyroData(gyro_log_name): raw_gyro_data = np.loadtxt(gyro_log_name) raw_gyro_data[:,0] = raw_gyro_data[:,0] * 1000 raw_gyro_data = raw_gyro_data[:,[0, 2, 1, 3]] [_, quats_data] = ProcessGyroData(raw_gyro_data) quats_data = np.concatenate((raw_gyro_data[:, 0, None], quats_data), axis = 1) return quats_data def LoadStabResult(input_name): fid = open(input_name) data = {} while True: name, val = ReadLine(fid) if name == None: break if name in data: data[name] = np.concatenate((data[name], val), axis=0) else: data[name] = val fid.close() print("Mesh length: ", len(list(data.values())[0])) return data def ReadLine(fid): name = '' val = 0 tline = fid.readline() if len(tline) == 0: return None, None if tline[-1] == "\n": tline = tline[:-1] ind = tline.find(':') name = tline[:ind] tmp_val= str2num(tline[ind+1:]) if len(tmp_val) > 0: val = tmp_val else: tline = fid.readline() if tline[-1] == "\n": tline = tline[:-1] val = str2num(tline) return name, np.expand_dims(np.array(val), axis=0) def str2num(string): nums = string.split(" ") nums = [float(_) for _ in nums if _ != ""] return nums ================================================ FILE: dvs/inference.py ================================================ import os import sys import torch import torchvision import torch.nn as nn from torch.autograd import Variable import time import yaml import argparse import numpy as np from printer import Printer from dataset import get_data_loader, get_inference_data_loader from model import Model import datetime import copy from util import make_dir, get_optimizer, norm_flow from gyro import ( get_grid, get_rotations, visual_rotation, torch_QuaternionProduct, torch_norm_quat ) from warp import warp_video os.environ["CUDA_VISIBLE_DEVICES"] = "0" def run(model, loader, cf, USE_CUDA=True): no_flo = False number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"] model.net.eval() model.unet.eval() activation = nn.Softshrink(0.0006) # 0.0036 for i, data in enumerate(loader, 0): # get the inputs; data is a list of [inputs, labels] real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data print("Fininsh Load data") real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4] real_projections = real_projections.type(torch.float) flo = flo.type(torch.float) flo_back = flo_back.type(torch.float) ois = ois.type(torch.float) batch_size, step, dim = real_inputs.size() times = times.numpy() real_queue_idx = real_queue_idx.numpy() virtual_queue = [None] * batch_size run_loss = 0 model.net.init_hidden(batch_size) count = 0 for j in range(step): if (j+1) % 100 == 0: print("Step: "+str(j+1)+"/"+str(step)) virtual_inputs, vt_1 = loader.dataset.get_virtual_data( virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j]) real_inputs_step = real_inputs[:,j,:] inputs = torch.cat((real_inputs_step,virtual_inputs), dim = 1) # inputs = Variable(real_inputs_step) if USE_CUDA: real_inputs_step = real_inputs_step.cuda() virtual_inputs = virtual_inputs.cuda() inputs = inputs.cuda() if no_flo is False: flo_step = flo[:,j].cuda() flo_back_step = flo_back[:,j].cuda() else: flo_step = None flo_back_step = None vt_1 = vt_1.cuda() real_projections_t = real_projections[:,j+1].cuda() real_projections_t_1 = real_projections[:,j].cuda() real_postion_anchor = real_postion[:,j].cuda() ois_step = ois[:,j].cuda() if no_flo is False: b, h, w, _ = flo_step.size() flo_step = norm_flow(flo_step, h, w) flo_back_step = norm_flow(flo_back_step, h, w) with torch.no_grad(): if no_flo is False: flo_out = model.unet(flo_step, flo_back_step) else: flo_out = None if j < 1: for i in range(2): out = model.net(inputs, flo_out, ois_step) else: out = model.net(inputs, flo_out, ois_step) real_position = real_inputs_step[:,40:44] virtual_position = virtual_inputs[:, -4:] out[:, :3] = activation(out[:, :3]) out = torch_norm_quat(out) pos = torch_QuaternionProduct(virtual_position, real_postion_anchor) loss_step = model.loss(out, vt_1, virtual_inputs, real_inputs_step, \ flo_step, flo_back_step, real_projections_t, real_projections_t_1, real_postion_anchor, \ follow = True, optical = True, undefine = True) run_loss += loss_step out = torch_QuaternionProduct(out, pos) if USE_CUDA: out = out.cpu().detach().numpy() virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1]) run_loss /= step print( "\nLoss: follow, angle, smooth, c2_smooth, undefine, optical") print(run_loss.cpu().numpy()[:-1], "\n") return np.squeeze(virtual_queue, axis=0) def inference(cf, data_path, USE_CUDA): checkpoints_dir = cf['data']['checkpoints_dir'] checkpoints_dir = make_dir(checkpoints_dir, cf) files = os.listdir(data_path) for f in files: if f[-3:] == "mp4" and "no_ois" not in f and "no_shutter" not in f and "gimbal" not in f.lower() and "grid" not in f.lower() and "flo" not in f.lower(): video_name = f[:-4] # Define the model model = Model(cf) load_model = cf["model"]["load_model"] print("------Load Pretrined Model--------") if load_model is not None: checkpoint = torch.load(load_model) print(load_model) else: load_last = os.path.join(checkpoints_dir, cf['data']['exp']+'_last.checkpoint') checkpoint = torch.load(load_last) print(load_last) model.net.load_state_dict(checkpoint['state_dict']) model.unet.load_state_dict(checkpoint['unet']) if USE_CUDA: model.net.cuda() model.unet.cuda() print("-----------Load Dataset----------") test_loader = get_inference_data_loader(cf, data_path, no_flo = False) data = test_loader.dataset.data[0] start_time = time.time() virtual_queue= run(model, test_loader, cf, USE_CUDA=USE_CUDA) virtual_data = np.zeros((1,5)) virtual_data[:,1:] = virtual_queue[0, 1:] virtual_data[:,0] = data.frame[0,0] virtual_queue = np.concatenate((virtual_data, virtual_queue), axis = 0) print(virtual_queue.shape) time_used = (time.time() - start_time) / 60 print("Time_used: %.4f minutes" % (time_used)) virtual_path = os.path.join("./test", cf['data']['exp'], data_path.split("/")[-1]+'.txt') np.savetxt(virtual_path, virtual_queue, delimiter=' ') print("------Start Warping Video--------") grid = get_grid(test_loader.dataset.static_options, \ data.frame[:data.length], data.gyro, data.ois, virtual_queue[:data.length,1:], no_shutter = False) return data, virtual_queue, video_name, grid def visual_result(cf, data, video_name, virtual_queue, virtual_queue2 = None, compare_exp = None): print("------Start Visual Result--------") rotations_virtual, lens_offsets_virtual = get_rotations(data.frame[:data.length], virtual_queue, np.zeros(data.ois.shape), data.length) rotations_real, lens_offsets_real = get_rotations(data.frame[:data.length], data.gyro, data.ois, data.length) if virtual_queue2 is not None: rotations_virtual2, lens_offsets_virtual2 = get_rotations(data.frame[:data.length], virtual_queue2, np.zeros(data.ois.shape), data.length) path = os.path.join("./test", cf['data']['exp'], video_name+'_'+compare_exp+'.jpg') else: rotations_virtual2, lens_offsets_virtual2 = None, None path = os.path.join("./test", cf['data']['exp'], video_name+'.jpg') visual_rotation(rotations_real, lens_offsets_real, rotations_virtual, lens_offsets_virtual, rotations_virtual2, lens_offsets_virtual2, path) def main(args = None): config_file = args.config dir_path = args.dir_path cf = yaml.load(open(config_file, 'r')) USE_CUDA = cf['data']["use_cuda"] log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'_test.log'), 'w+') printer = Printer(sys.stdout, log_file).open() data_name = sorted(os.listdir(dir_path)) for i in range(len(data_name)): print("Running Inference: " + str(i+1) + "/" + str(len(data_name))) save_path = os.path.join("./test", cf['data']['exp'], data_name[i]+'_stab.mp4') data_path = os.path.join(dir_path, data_name[i]) data, virtual_queue, video_name, grid= inference(cf, data_path, USE_CUDA) virtual_queue2 = None visual_result(cf, data, data_name[i], virtual_queue, virtual_queue2 = virtual_queue2, compare_exp = None) video_path = os.path.join(data_path, video_name+".mp4") warp_video(grid, video_path, save_path, frame_number = False) return if __name__ == '__main__': parser = argparse.ArgumentParser("Training model") parser.add_argument("--config", default="./conf/stabilzation.yaml", help="Config file.") parser.add_argument("--dir_path", default="./video") args = parser.parse_args() main(args = args) ================================================ FILE: dvs/load_frame_sensor_data.py ================================================ import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" import sys import torch import torchvision import torch.nn as nn from torch.autograd import Variable import time import yaml import argparse import numpy as np from printer import Printer from dataset import get_data_loader, get_inference_data_loader from model import Model import datetime import copy from util import make_dir, get_optimizer, norm_flow from gyro import ( get_grid, get_rotations, visual_rotation, GetGyroAtTimeStamp, torch_ConvertQuaternionToAxisAngle, torch_ConvertAxisAngleToQuaternion, torch_QuaternionProduct, get_static ) from warp import warp_video def run(loader, cf, USE_CUDA=True): number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"] for i, data in enumerate(loader, 0): # get the inputs; data is a list of [inputs, labels] real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data print("Fininsh Load data") real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4] real_projections = real_projections.type(torch.float) batch_size, step, dim = real_inputs.size() times = times.numpy() real_queue_idx = real_queue_idx.numpy() virtual_queue = [None] * batch_size for j in range(step): virtual_inputs, vt_1 = loader.dataset.get_virtual_data( virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j]) real_inputs_step = real_inputs[:,j,:] if USE_CUDA: real_inputs_step = real_inputs_step.cuda() virtual_inputs = virtual_inputs.cuda() real_postion_anchor = real_postion[:,j].cuda() out = real_inputs_step[:,40:44] virtual_position = virtual_inputs[:, -4:] pos = torch_QuaternionProduct(virtual_position, real_postion_anchor) out = torch_QuaternionProduct(out, pos) if USE_CUDA: out = out.cpu().detach().numpy() virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1]) return np.squeeze(virtual_queue, axis=0) def inference(cf, data_path, USE_CUDA): print("-----------Load Dataset----------") test_loader = get_inference_data_loader(cf, data_path) data = test_loader.dataset.data[0] test_loader.dataset.no_flo = True test_loader.dataset.static_options = get_static(ratio = 0) start_time = time.time() virtual_queue = run(test_loader, cf, USE_CUDA=USE_CUDA) virtual_data = np.zeros((1,5)) virtual_data[:,1:] = virtual_queue[0, 1:] virtual_data[:,0] = data.frame[0,0] virtual_queue = np.concatenate((virtual_data, virtual_queue), axis = 0) files = os.listdir(data_path) for f in files: if f[-3:] == "mp4" and "no_ois" not in f and "gimbal" not in f.lower(): video_name = f[:-4] print(video_name) virtual_path = os.path.join("./test", cf['data']['exp'], video_name+'.txt') print("------Start Visual Result--------") rotations_real, lens_offsets_real = get_rotations(data.frame[:data.length], data.gyro, data.ois, data.length) fig_path = os.path.join(data_path, video_name+"_real.jpg") visual_rotation(rotations_real, lens_offsets_real, None, None, None, None, fig_path) return def main(args = None): config_file = args.config dir_path = args.dir_path cf = yaml.load(open(config_file, 'r')) USE_CUDA = cf['data']["use_cuda"] checkpoints_dir = cf['data']['checkpoints_dir'] checkpoints_dir = make_dir(checkpoints_dir, cf) data_name = sorted(os.listdir(dir_path)) for i in range(len(data_name)): print("Running: " + str(i+1) + "/" + str(len(data_name))) inference(cf, os.path.join(dir_path, data_name[i]), USE_CUDA) return if __name__ == '__main__': parser = argparse.ArgumentParser("Training model") parser.add_argument("--config", default="./conf/stabilzation.yaml", help="Config file.") parser.add_argument("--dir_path", default="./video") args = parser.parse_args() main(args = args) ================================================ FILE: dvs/loss.py ================================================ import torch import numpy as np from torch.autograd import Variable import operator import torch.nn.functional as F import matplotlib.pyplot as plt from gyro import ( torch_QuaternionProduct, torch_QuaternionReciprocal, get_static, torch_GetVirtualProjection, torch_GetForwardGrid, torch_GetWarpingFlow, torch_ConvertAxisAngleToQuaternion, torch_ConvertQuaternionToAxisAngle, torch_norm_quat, torch_GetHomographyTransformFromProjections, torch_ApplyTransform ) class C2_Smooth_loss(torch.nn.Module): def __init__(self): super(C2_Smooth_loss, self).__init__() self.MSE = torch.nn.MSELoss() def forward(self, Qt, Qt_1, Qt_2): detaQt_1 = torch_QuaternionProduct(Qt_1, torch_QuaternionReciprocal(Qt_2)) return self.MSE(Qt, detaQt_1) class C1_Smooth_loss(torch.nn.Module): def __init__(self): super(C1_Smooth_loss, self).__init__() self.MSE = torch.nn.MSELoss() def forward(self, v_r_axis, v_axis_t_1 = None, real_postion = None): quat_zero = torch.zeros(v_r_axis.shape).cuda() quat_zero[:,3] = 1 return self.MSE(v_r_axis, quat_zero) class Follow_loss(torch.nn.Module): def __init__(self): super(Follow_loss, self).__init__() self.MSE = torch.nn.MSELoss() def forward(self, virtual_quat, real_quat, real_postion = None): if real_postion is not None: real_quat = torch_QuaternionProduct(real_quat, real_postion) return self.MSE(virtual_quat, real_quat) class Stay_loss(torch.nn.Module): def __init__(self): super(Stay_loss, self).__init__() self.zero = torch.tensor([0.0,0.0,0.0,1.0]).cuda() def forward(self, virtual_quat): return torch.mean(torch.abs(virtual_quat - self.zero)) class Angle_loss(torch.nn.Module): def __init__(self): super(Angle_loss, self).__init__() self.MSE = torch.nn.MSELoss() def forward(self, Q1, Q2, threshold = 0.5236, logistic_beta1 = 100): batch_size = Q1.shape[0] Q3 = torch_norm_quat(torch_QuaternionProduct(Q2, torch_QuaternionReciprocal(Q1))) theta = torch.zeros(batch_size).cuda() index = (Q3[:,3] < 1).nonzero() theta[index] = torch.acos(Q3[index,3]) * 2 loss = torch.mean(theta * (1 / (1 + torch.exp(-logistic_beta1 * (theta - threshold))))) return loss, theta class Optical_loss(torch.nn.Module): def __init__(self): super(Optical_loss, self).__init__() self.static_options = get_static() self.mesh = get_mesh() def forward(self, Vt, Vt_1, flo, flo_back, real_projection_t, real_projection_t_1): virtual_projection_t = torch_GetVirtualProjection(self.static_options, Vt) virtual_projection_t_1 = torch_GetVirtualProjection(self.static_options, Vt_1) b, h, w = flo.size()[:3] grid_t = torch_GetForwardGrid(self.static_options, real_projection_t, virtual_projection_t)[:,:2,:,:].permute(0,1,3,2) grid_t = torch.nn.functional.upsample_bilinear(grid_t, size = (h, w)) # [B,C(xy),H,W] grid_t_1 = torch_GetForwardGrid(self.static_options, real_projection_t_1, virtual_projection_t_1)[:,:2,:,:].permute(0,1,3,2) grid_t_1 = torch.nn.functional.upsample_bilinear(grid_t_1, size = (h, w)) # [B,C(xy),H,W] mesh = self.mesh.repeat(b, 1, 1, 1) flo = flo + mesh flo_back = flo_back + mesh # [B,H,W,C] valid = (flo[:,:,:,0] > 0) * (flo[:,:,:,1] > 0) * (flo[:,:,:,0] < 1) * (flo[:,:,:,1] < 1) valid_f = torch.unsqueeze(valid, dim = 3).type(torch.cuda.FloatTensor) valid = torch.unsqueeze(valid, dim = 1).type(torch.cuda.FloatTensor) valid_back = (flo_back[:,:,:,0] > 0) * (flo_back[:,:,:,1] > 0) * (flo_back[:,:,:,0] < 1) * (flo_back[:,:,:,1] < 1) valid_back_f = torch.unsqueeze(valid_back, dim = 3).type(torch.cuda.FloatTensor) valid_back = torch.unsqueeze(valid_back, dim = 1).type(torch.cuda.FloatTensor) # [B,C,H,W] flo = (flo * 2 - 1) * valid_f flo_back = (flo_back * 2 - 1) * valid_back_f forward_t = torch.nn.functional.grid_sample(grid_t, flo, padding_mode="reflection") # default bilinear backward_t_1 = torch.nn.functional.grid_sample(grid_t_1, flo_back, padding_mode="reflection") # default bilinear forward_diff = ((forward_t - grid_t_1) * valid) ** 2 backward_diff = ((backward_t_1 - grid_t) * valid_back) ** 2 forward_loss = torch.sum(forward_diff, dim = (1,2,3)) / torch.sum(valid, dim = (1,2,3)) backward_loss = torch.sum(backward_diff, dim = (1,2,3)) / torch.sum(valid_back, dim = (1,2,3)) loss = forward_loss + backward_loss loss = torch.min(loss, loss - loss + 1) #[0] loss = torch.sum(loss) / b return loss def get_mesh(height = 270, width = 480, USE_CUDA = True): xs = np.linspace(0, 1, width, endpoint = False) + 0.5 / height ys = np.linspace(0, 1, height, endpoint = False) + 0.5 / width xmesh, ymesh = np.meshgrid(xs, ys) # Reshape the sampling positions to a H x W x 2 tensor mesh = torch.Tensor(np.expand_dims(np.moveaxis(np.array(list(zip(xmesh, ymesh))), 1, 2),axis=0)) if USE_CUDA: mesh = mesh.cuda() return mesh class Undefine_loss(torch.nn.Module): def __init__(self, ratio = 0.08, inner_ratio = 0.04, USE_CUDA = True): super(Undefine_loss, self).__init__() self.static_options = get_static() self.inner_ratio = inner_ratio width = self.static_options["width"] height = self.static_options["height"] x0, x1, y0, y1 = \ int(width*ratio), int(width*(1-ratio)), int(height*ratio), int(height*(1-ratio)) self.norm = torch.Tensor([width, height, 1]) self.p00 = torch.Tensor([x0, y0, 1]) self.p01 = torch.Tensor([x0, y1, 1]) self.p10 = torch.Tensor([x1, y0, 1]) self.p11 = torch.Tensor([x1, y1, 1]) if USE_CUDA == True: self.p00 = self.p00.cuda() self.p01 = self.p01.cuda() self.p10 = self.p10.cuda() self.p11 = self.p11.cuda() self.norm = self.norm.cuda() def forward(self, Vt, Rt, ratio = 0.04): batch_size = Vt.size()[0] row_mid = self.static_options["num_grid_rows"] // 2 virtual_projection_t = torch_GetVirtualProjection(self.static_options, Vt) real_projection_t = torch_GetVirtualProjection(self.static_options, Rt) # virtual projection and real projection transform = torch_GetHomographyTransformFromProjections(real_projection_t, virtual_projection_t) p00 = (torch_ApplyTransform(transform, self.p00) / self.norm)[:,:2] p01 = (torch_ApplyTransform(transform, self.p01) / self.norm)[:,:2] p10 = (torch_ApplyTransform(transform, self.p10) / self.norm)[:,:2] p11 = (torch_ApplyTransform(transform, self.p11) / self.norm)[:,:2] loss = torch.stack((self.get_loss(p00), self.get_loss(p01), self.get_loss(p10), self.get_loss(p11)),dim = 1) loss,_ = torch.max(loss, dim = 1) loss = torch.min(loss, loss - loss + 1) #[0] loss = torch.sum(loss) / batch_size return loss def get_loss(self, p): d = (p - self.inner_ratio) * (p < self.inner_ratio).type(torch.cuda.FloatTensor) + \ (1 - self.inner_ratio - p) * (p > (1 - self.inner_ratio)).type(torch.cuda.FloatTensor) return torch.sum(d**2, dim = 1) ================================================ FILE: dvs/metrics.py ================================================ import os import sys import numpy as np import cv2 import math import pdb import matplotlib.pyplot as plt from printer import Printer from warp import video2frame_one_seq import datetime import torch import copy import csv import copyreg import shutil import matplotlib.pyplot as plt from util import crop_video def _pickle_keypoints(point): return cv2.KeyPoint, (*point.pt, point.size, point.angle, point.response, point.octave, point.class_id) copyreg.pickle(cv2.KeyPoint().__class__, _pickle_keypoints) os.environ["CUDA_VISIBLE_DEVICES"] = "0" h_size = 480 w_size = 640 def crop_metric(M): points = np.array([[0,0,1],[0,h_size,1], [w_size,0,1], [w_size,h_size,1]]).T result = np.matmul(M,points).T result = result[:,:2]/result[:,2:] w_out = 1 - max(result[0,0], result[1,0], w_size - result[2,0], w_size - result[3,0], 0)/w_size h_out = 1 - max(result[0,1], result[2,1], h_size - result[1,1], h_size - result[3,1], 0)/h_size return w_out, h_out # https://stackoverflow.com/questions/34389125/how-to-get-the-scale-factor-of-getperspectivetransform-in-opencv def get_scale(M): h1 = M[0, 0] h2 = M[0, 1] h3 = M[0, 2] h4 = M[1, 0] h5 = M[1, 1] h6 = M[1, 2] h7 = M[2, 0] h8 = M[2, 1] QR = np.array([[h1-(h7*h3), h2-(h8*h3)], [h4-(h7*h6), h5-(h8*h6)]]) Q, R = np.linalg.qr(QR) return abs(R[0,0]), abs(R[1,1]) # https://stackoverflow.com/questions/21019338/how-to-change-the-homography-with-the-scale-of-the-image def get_rescale_matrix(M, sx, sy): S = np.eye(3, dtype = float) S[0,0] = sx S[1,1] = sy S1 = np.eye(3, dtype = float) S1[0,0] = 1/sx S1[1,1] = 1/sy return np.matmul(M, S1) # Part of code reference from https://github.com/jinsc37/DIFRINT/blob/master/metrics.py def metrics(in_src, out_src, package, crop_scale = False, re_compute = False): load_dic = None if re_compute and os.path.exists(package): print("Start load") load_dic = torch.load(package) print("Finish load") dic = { 'M': None, 'CR_seq': [], 'DV_seq': [], 'SS_t': None, 'SS_r': None, 'w_crop':[], 'h_crop':[], 'distortion': [], 'count': 0, 'in_sift': {}, 'out_sift': {}, 'fft_t': {}, 'fft_r': {} } if load_dic is not None: dic["in_sift"] = load_dic["in_sift"] dic["out_sift"] = load_dic["out_sift"] frameList_in = sorted(os.listdir(in_src)) frameList = sorted(os.listdir(out_src)) frameList = frameList[:min(len(frameList_in),len(frameList))] # Create brute-force matcher object bf = cv2.BFMatcher() # Apply the homography transformation if we have enough good matches MIN_MATCH_COUNT = 10 #10 ratio = 0.7 #0.7 thresh = 5.0 #5.0 Pt = np.asarray([[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]]) P_seq = [] count = 1 for index, f in enumerate(frameList, 0): if f.endswith('.png'): # Load the images in gray scale img1 = cv2.imread(os.path.join(in_src, f), 0) img1 = cv2.resize(img1, (w_size,h_size), interpolation = cv2.INTER_LINEAR) img1o = cv2.imread(os.path.join(out_src, f), 0) img1o = cv2.resize(img1o, (w_size,h_size), interpolation = cv2.INTER_LINEAR) sift = cv2.SIFT_create() if f in dic["in_sift"]: keyPoints1, descriptors1 = dic["in_sift"][f] else: # Detect the SIFT key points and compute the descriptors for the two images keyPoints1, descriptors1 = sift.detectAndCompute(img1, None) dic["in_sift"][f] = (keyPoints1, descriptors1) if f in dic["out_sift"]: keyPoints1o, descriptors1o = dic["out_sift"][f] else: keyPoints1o, descriptors1o = sift.detectAndCompute(img1o, None) dic["out_sift"][f] = (keyPoints1o, descriptors1o) # Match the descriptors matches = bf.knnMatch(descriptors1, descriptors1o, k=2) # Select the good matches using the ratio test goodMatches = [] for m, n in matches: if m.distance < ratio * n.distance: goodMatches.append(m) if len(goodMatches) > MIN_MATCH_COUNT: # Get the good key points positions sourcePoints = np.float32([ keyPoints1[m.queryIdx].pt for m in goodMatches ]).reshape(-1, 1, 2) destinationPoints = np.float32([ keyPoints1o[m.trainIdx].pt for m in goodMatches ]).reshape(-1, 1, 2) M, mask = cv2.findHomography(sourcePoints, destinationPoints, method=cv2.RANSAC, ransacReprojThreshold=thresh) im_dst = cv2.warpPerspective(img1, M, (w_size,h_size)) cm = [] for i in range(6): for j in range(6): hs = int(h_size * (0.2 + 0.1 * i)) he = int(h_size * (0.3 + 0.1 * i)) ws = int(w_size * (0.2 + 0.1 * j)) we = int(w_size * (0.3 + 0.1 * j)) cm.append(np.corrcoef(img1o[hs:he, ws:we].flat, im_dst[hs:he, ws:we].flat)) dic["distortion"].append(cm) if crop_scale: sx, sy = get_scale(M) M_scale = get_rescale_matrix(M, sx, sy) w_crop, h_crop = crop_metric(M_scale) else: w_crop, h_crop = crop_metric(M) dic["w_crop"].append(w_crop) dic["h_crop"].append(h_crop) # Obtain Scale, Translation, Rotation, Distortion value sx = M[0, 0] sy = M[1, 1] scaleRecovered = math.sqrt(np.abs(sx*sy)) w, _ = np.linalg.eig(M[0:2,0:2]) w = np.sort(w)[::-1] DV = w[1]/w[0] #pdb.set_trace() dic["CR_seq"].append(1.0/scaleRecovered) dic["DV_seq"].append(DV) # For Stability score calculation if count < len(frameList): f_path = f[:-9] + '%05d.png' % (int(f[-9:-4])+1) if f_path in dic["out_sift"]: keyPoints2o, descriptors2o = dic["out_sift"][f_path] else: img2o = cv2.imread(os.path.join(out_src, f_path), 0) img2o = cv2.resize(img2o, (w_size,h_size), interpolation = cv2.INTER_LINEAR) keyPoints2o, descriptors2o = sift.detectAndCompute(img2o, None) dic["out_sift"][f_path] = (keyPoints2o, descriptors2o) matches = bf.knnMatch(descriptors1o, descriptors2o, k=2) goodMatches = [] for m, n in matches: if m.distance < ratio * n.distance: goodMatches.append(m) if len(goodMatches) > MIN_MATCH_COUNT: # Get the good key points positions sourcePoints = np.float32([ keyPoints1o[m.queryIdx].pt for m in goodMatches ]).reshape(-1, 1, 2) destinationPoints = np.float32([ keyPoints2o[m.trainIdx].pt for m in goodMatches ]).reshape(-1, 1, 2) # Obtain the homography matrix M, mask = cv2.findHomography(sourcePoints, destinationPoints, method=cv2.RANSAC, ransacReprojThreshold=thresh) P_seq.append(np.matmul(Pt, M)) Pt = np.matmul(Pt, M) if count % 10 ==0: sys.stdout.write('\rFrame: ' + str(count) + '/' + str(len(frameList))) sys.stdout.flush() dic["count"] = count count += 1 # Make 1D temporal signals P_seq_t = np.asarray([1]) P_seq_r = np.asarray([1]) #pdb.set_trace() for Mp in P_seq: sx = Mp[0, 0] sy = Mp[1, 1] c = Mp[0, 2] f = Mp[1, 2] transRecovered = math.sqrt(c*c + f*f) thetaRecovered = math.atan2(sx, sy) * 180 / math.pi P_seq_t = np.concatenate((P_seq_t, [transRecovered]), axis=0) P_seq_r = np.concatenate((P_seq_r, [thetaRecovered]), axis=0) P_seq_t = np.delete(P_seq_t, 0) P_seq_r = np.delete(P_seq_r, 0) # FFT fft_t = np.fft.fft(P_seq_t) fft_r = np.fft.fft(P_seq_r) fft_t = abs(fft_t)**2 fft_r = abs(fft_r)**2 fft_t = np.delete(fft_t, 0) fft_r = np.delete(fft_r, 0) fft_t = fft_t[:int(len(fft_t)/2)] fft_r = fft_r[:int(len(fft_r)/2)] dic["fft_t"] = fft_t dic["fft_r"] = fft_r SS_t = np.sum(fft_t[:5])/np.sum(fft_t) SS_r = np.sum(fft_r[:5])/np.sum(fft_r) dic["CR_seq"] = np.array(dic["CR_seq"]) dic["DV_seq"] = np.array(dic["DV_seq"]) dic["w_crop"] = np.array(dic["w_crop"]) dic["h_crop"] = np.array(dic["h_crop"]) dic["distortion"] = np.array(dic["distortion"]) dic["SS_t"] = SS_t dic["SS_r"] = SS_r if not (re_compute and os.path.exists(package)): torch.save(dic, package) DV_seq = np.absolute(dic["DV_seq"]) DV_seq = DV_seq[np.where((DV_seq >= 0.5) & (DV_seq <= 1))] Distortion = str.format('{0:.4f}', np.nanmin(DV_seq)) Distortion_avg = str.format('{0:.4f}', np.nanmean(DV_seq)) Trans = str.format('{0:.4f}', dic["SS_t"]) Rot = str.format('{0:.4f}', dic["SS_r"]) w_crop = crop_rm_outlier(dic["w_crop"]) h_crop = crop_rm_outlier(dic["h_crop"]) FOV = str.format( '{0:.4f}', min(np.nanmin(w_crop), np.nanmin(h_crop)) ) FOV_avg = str.format( '{0:.4f}', (np.nanmean(w_crop)+np.nanmean(h_crop)) / 2 ) Correlation_avg = str.format( '{0:.4f}', np.nanmean(dic["distortion"][10:]) ) Correlation_min = str.format( '{0:.4f}', np.nanmin(dic["distortion"][10:]) ) # Print results print('\n***Distortion value (Avg, Min):') print(Distortion_avg +' | '+ Distortion) print('***Stability Score (Avg, Trans, Rot):') print(str.format('{0:.4f}', (dic["SS_t"]+dic["SS_r"])/2) +' | '+ Trans +' | '+ Rot ) print("=================") print('***FOV ratio (Avg, Min):') print( FOV_avg +' | '+ FOV ) print('***Correlation value (Avg, Min):') print( Correlation_avg +' | '+ Correlation_min , "\n") dic['in_sift'] = 0 dic['out_sift'] = 0 torch.save(dic, package[:-3]+"_light.pt") return float(FOV) def crop_rm_outlier(crop): crop = np.array(crop) crop = crop[crop >= 0.5] return sorted(crop)[5:] if __name__ == '__main__': metric_path = os.path.join("./test/stabilzation/metric") if not os.path.exists(metric_path): os.makedirs(metric_path) in_video = "./video/s_114_outdoor_running_trail_daytime/ControlCam_20200930_104820.mp4" in_folder = os.path.join(metric_path, "in_frame") if not os.path.exists(in_folder): os.makedirs(in_folder) print("Convert video to frames") video2frame_one_seq(in_video, in_folder) out_video = "./test/stabilzation/s_114_outdoor_running_trail_daytime_stab.mp4" out_folder = os.path.join(metric_path, "out_frame") if not os.path.exists(out_folder): os.makedirs(out_folder) print("Convert video to frames") video2frame_one_seq(out_video, out_folder) package = os.path.join(metric_path, "stabilzation.pt") FOV = metrics(in_folder, out_folder, package) crop_path = out_video[:-4] + "_crop.mp4" crop_video(out_video, crop_path, FOV) ================================================ FILE: dvs/model.py ================================================ import math import torch from collections import OrderedDict import torch.nn as nn import numpy as np import util import yaml import os from loss import C2_Smooth_loss, C1_Smooth_loss, Optical_loss, Undefine_loss, Angle_loss, Follow_loss, Stay_loss from gyro import torch_norm_quat, torch_QuaternionProduct import torch.nn.functional as F Activates = {"sigmoid": nn.Sigmoid, "relu": nn.ReLU, "tanh": nn.Tanh} class LayerLSTM(nn.Module): def __init__(self, input_size, hidden_size, bias): super(LayerLSTM, self).__init__() self.LSTM = nn.LSTMCell(input_size, hidden_size, bias) self.hidden_size = hidden_size def init_hidden(self, batch_size): self.hx = torch.zeros((batch_size, self.hidden_size)).cuda() self.cx = torch.zeros((batch_size, self.hidden_size)).cuda() def forward(self, x): self.hx, self.cx = self.LSTM(x, (self.hx, self.cx)) return self.hx class LayerCNN(nn.Module): def __init__(self, in_channel, out_channel, kernel_size, stride, padding, pooling_size=None, activation_function=nn.ReLU, batch_norm=True): super(LayerCNN, self).__init__() self.conv = nn.Conv2d(in_channel, out_channel, kernel_size=kernel_size, stride=stride, padding=padding) self.batch_norm = nn.BatchNorm2d(out_channel) if batch_norm else None self.activation = activation_function(inplace=True) if pooling_size is not None: self.pooling = nn.MaxPool2d(pooling_size) else: self.pooling = None def forward(self, x): x = self.conv(x) #x->[batch,channel,height,width] if self.batch_norm is not None: x = self.batch_norm(x) x = self.activation(x) if self.pooling is not None: x = self.pooling(x) return x class LayerFC(nn.Module): def __init__(self, in_features, out_features, bias, drop_out=0, activation_function=nn.ReLU, batch_norm = False): super(LayerFC, self).__init__() self.fc = nn.Linear(in_features, out_features, bias=bias) # self.activation = activation_function(inplace=True) if activation_function is not None else None self.activation = activation_function() if activation_function is not None else None self.dropout = nn.Dropout(p=drop_out,inplace=False) if drop_out else None self.batch_norm = nn.BatchNorm1d(out_features) if batch_norm else None def forward(self, x): if self.dropout is not None: x = self.dropout(x) x = self.fc(x) if self.batch_norm is not None: x = self.batch_norm(x) if self.activation is not None: x = self.activation(x) return x class Net(nn.Module): def __init__(self, cf): super(Net, self).__init__() self.cnn_param = cf["model"]["cnn"] self.rnn_param = cf["model"]["rnn"] self.fc_param = cf["model"]["fc"] self.unit_size = 4 self.no_flo = False if self.no_flo is False: self._rnn_input_size = (2*cf["data"]["number_real"]+1+cf["data"]["number_virtual"]) * 4 + 64 else: self._rnn_input_size = (2*cf["data"]["number_real"]+1+cf["data"]["number_virtual"]) * self.unit_size #CNN Layers cnns = [] cnn_activation = Activates[self.cnn_param["activate_function"]] cnn_batch_norm = self.cnn_param["batch_norm"] cnn_layer_param = self.cnn_param["layers"] if cnn_layer_param is not None: cnn_layers = len(cnn_layer_param) for layer in range(cnn_layers): in_channel = eval(cnn_layer_param[layer][0])[0] out_channel = eval(cnn_layer_param[layer][0])[1] kernel_size = eval(cnn_layer_param[layer][1]) stride = eval(cnn_layer_param[layer][2]) padding = eval(cnn_layer_param[layer][3]) pooling_size = eval(cnn_layer_param[layer][4]) cnn = None cnn = LayerCNN(in_channel, out_channel, kernel_size, stride, padding, pooling_size, activation_function=cnn_activation, batch_norm=cnn_batch_norm) cnns.append(('%d' % layer, cnn)) self._rnn_input_size = int(math.floor((self._rnn_input_size+2*padding[1]-kernel_size[1])/stride[1])+1) if pooling_size is not None: self._rnn_input_size = int(math.floor((self._rnn_input_size-pooling_size[1])/pooling_size[1])+1) self.convs = nn.Sequential(OrderedDict(cnns)) else: self.convs = None out_channel = cf["data"]["channel_size"] self.gap = nn.AvgPool2d(self._rnn_input_size) if self.cnn_param["gap"] else None self._rnn_input_size = out_channel if self.cnn_param["gap"] else out_channel*(self._rnn_input_size) #RNN Layers rnns = [] rnn_layer_param = self.rnn_param["layers"] rnn_layers = len(rnn_layer_param) for layer in range(rnn_layers): if layer: rnn = LayerLSTM(rnn_layer_param[layer-1][0], rnn_layer_param[layer][0], rnn_layer_param[layer][1]) else: rnn = LayerLSTM(self._rnn_input_size, rnn_layer_param[layer][0], rnn_layer_param[layer][1]) rnns.append(('%d'%layer, rnn)) self.rnns = nn.Sequential(OrderedDict(rnns)) self._fc_input_size = rnn_layer_param[rnn_layers-1][0] #* 2 # ois #FC Layers fcs = [] fc_activation = Activates[self.fc_param["activate_function"]] fc_batch_norm = self.fc_param["batch_norm"] fc_layer_param = self.fc_param["layers"] fc_drop_out = self.fc_param["drop_out"] fc_layers = len(fc_layer_param) if fc_layers == 1: fc = LayerFC(self._fc_input_size,fc_layer_param[0][0],fc_layer_param[0][1], fc_drop_out, None, fc_batch_norm) fcs.append(('%d'%(fc_layers-1), fc)) else: for layer in range(fc_layers-1): if layer: fc = LayerFC(fc_layer_param[layer-1][0],fc_layer_param[layer][0],fc_layer_param[layer][1], fc_drop_out, fc_activation, fc_batch_norm) else: fc = LayerFC(self._fc_input_size,fc_layer_param[layer][0],fc_layer_param[layer][1], fc_drop_out,fc_activation, fc_batch_norm) fcs.append(('%d'%layer, fc)) fc = LayerFC(fc_layer_param[fc_layers-2][0],fc_layer_param[fc_layers-1][0],fc_layer_param[fc_layers-1][1], fc_drop_out,None, fc_batch_norm) # Modified fcs.append(('%d'%(fc_layers-1), fc)) self.class_num = fc_layer_param[fc_layers-1][0] self.fcs = nn.Sequential(OrderedDict(fcs)) def init_hidden(self, batch_size): for i in range(len(self.rnns)): self.rnns[i].init_hidden(batch_size) def forward(self, x, flo, ois): b,c = x.size() #x->[batch,channel,height,width] if self.convs is not None: x = self.convs(x) if self.gap is not None: x = self.gap(x) x = x.view(b,-1) if self.no_flo is False: x = torch.cat((x, flo), dim = 1) x = self.rnns(x) x = self.fcs(x) # [b, 4] x = torch_norm_quat(x) return x class Model(): def __init__(self, cf): super().__init__() self.net = Net(cf) self.unet = UNet() self.init_weights(cf) self.loss_smooth = C1_Smooth_loss() self.loss_follow = Follow_loss() self.loss_c2_smooth = C2_Smooth_loss() self.loss_optical = Optical_loss() self.loss_undefine = Undefine_loss(ratio = 0.08) self.loss_angle = Angle_loss() self.loss_stay = Stay_loss() self.loss_smooth_w = cf["loss"]["smooth"] self.loss_angle_w = cf["loss"]["angle"] self.loss_follow_w = cf["loss"]["follow"] self.loss_c2_smooth_w = cf["loss"]["c2_smooth"] self.loss_undefine_w = cf["loss"]["undefine"] self.loss_opt_w = cf["loss"]["opt"] self.loss_stay_w = cf["loss"]["stay"] self.gaussian_weight = np.array([0.072254, 0.071257, 0.068349, 0.063764, 0.057856, 0.051058, 0.043824, 0.036585, 0.029705, 0.023457, 0.01801]) def loss( self, out, vt_1, virtual_inputs, real_inputs, flo, flo_back, real_projections_t, real_projections_t_1, real_postion_anchor, follow = True, undefine = True, optical = True, stay = False ): unit_size = self.net.unit_size mid = real_inputs.size()[1]//(2*unit_size) Rt = real_inputs[:,unit_size*(mid):unit_size*(mid)+4] v_pos = torch_QuaternionProduct(out, virtual_inputs[:, -4:]) r_pos = torch_QuaternionProduct(v_pos, real_postion_anchor) loss = torch.zeros(7).cuda() if self.loss_follow_w > 0 and follow: for i in range(-2,3): loss[0] += self.loss_follow_w * self.loss_follow(v_pos, real_inputs[:,unit_size*(i+mid):unit_size*(i+mid)+4], None) if self.loss_angle_w > 0 and follow: threshold = 6 / 180 * 3.1415926 loss_angle, theta = self.loss_angle(v_pos, Rt, threshold = threshold) loss[1] = self.loss_angle_w * loss_angle if self.loss_smooth_w > 0: loss_smooth = self.loss_smooth(out) loss[2] = self.loss_smooth_w * loss_smooth if self.loss_c2_smooth_w > 0: loss[3] = self.loss_c2_smooth_w * self.loss_c2_smooth(out, virtual_inputs[:, -4:], virtual_inputs[:, -8:-4]) if self.loss_undefine_w > 0 and undefine: Vt_undefine = v_pos.clone() for i in range(0, 10, 2): Rt_undefine = real_inputs[:,unit_size*(mid+i):unit_size*(mid+i)+4] loss_undefine_w = self.loss_undefine_w * self.gaussian_weight[i] loss[4] += loss_undefine_w * self.loss_undefine(Vt_undefine, Rt_undefine) Vt_undefine = torch_QuaternionProduct(out, Vt_undefine) Vt_undefine = torch_QuaternionProduct(out, Vt_undefine) if self.loss_opt_w > 0 and optical: loss[5] = self.loss_opt_w * self.loss_optical(r_pos, vt_1, flo, flo_back, real_projections_t, real_projections_t_1) if self.loss_stay_w > 0 and stay: loss[6] = self.loss_stay_w * self.loss_stay(out) return loss def init_weights(self, cf): for m in self.net.modules(): if isinstance(m, nn.Conv1d) or isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv3d) or isinstance(m, nn.Linear): if cf["train"]["init"] == "xavier_uniform": nn.init.xavier_uniform_(m.weight.data) elif cf["train"]["init"] == "xavier_normal": nn.init.xavier_normal_(m.weight.data) for m in self.unet.modules(): if isinstance(m, nn.Conv1d) or isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv3d) or isinstance(m, nn.Linear): if cf["train"]["init"] == "xavier_uniform": nn.init.xavier_uniform_(m.weight.data) elif cf["train"]["init"] == "xavier_normal": nn.init.xavier_normal_(m.weight.data) def save_checkpoint(self, epoch = 0, optimizer=None): package = { 'cnn': self.net.cnn_param, 'fc': self.net.fc_param, 'state_dict': self.net.state_dict(), } if optimizer is not None: package['optim_dict'] = optimizer.state_dict() if self.unet is not None: package['unet'] = self.unet.state_dict() package["epoch"] = epoch return package class UNet(nn.Module): def __init__(self, n_channels = 4, n_classes = 16, bilinear=True): super(UNet, self).__init__() self.n_channels = n_channels self.n_classes = n_classes self.bilinear = bilinear self.inc = DoubleConv(n_channels, 8) self.down1 = Down(8, 16) self.down2 = Down(16, 32) self.down3 = Down(32, 64) # factor = 2 if bilinear else 1 self.down4 = Down(64, 128) self._fc_input_size = 128 * 1 * 1 self.fc = LayerFC(self._fc_input_size, 64, bias = True) def forward(self, x, x_back = None): if x_back is not None: x = torch.cat((x,x_back), dim =3) x = x.permute(0,3,1,2) b,c,h,w = x.size() x1 = self.inc(x) x2 = self.down1(x1) x3 = self.down2(x2) x4 = self.down3(x3) x5 = self.down4(x4) x = torch.reshape(x5, (b, -1)) x = self.fc(x) return x class DoubleConv(nn.Module): """(convolution => [BN] => ReLU) * 2""" def __init__(self, in_channels, out_channels, mid_channels=None): super().__init__() if not mid_channels: mid_channels = out_channels self.double_conv = nn.Sequential( nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1), nn.ReLU(inplace=True), ) def forward(self, x): return self.double_conv(x) class Down(nn.Module): """Downscaling with maxpool then double conv""" def __init__(self, in_channels, out_channels): super().__init__() self.maxpool_conv = nn.Sequential( nn.MaxPool2d(4), DoubleConv(in_channels, out_channels) ) def forward(self, x): return self.maxpool_conv(x) class Up(nn.Module): """Upscaling then double conv""" def __init__(self, in_channels, out_channels, bilinear=True): super().__init__() # if bilinear, use the normal convolutions to reduce the number of channels if bilinear: self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) self.conv = DoubleConv(in_channels, out_channels, in_channels // 2) else: self.up = nn.ConvTranspose2d(in_channels , in_channels // 2, kernel_size=2, stride=2) self.conv = DoubleConv(in_channels, out_channels) def forward(self, x1, x2): x1 = self.up(x1) # input is CHW diffY = x2.size()[2] - x1.size()[2] diffX = x2.size()[3] - x1.size()[3] x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]) # if you have padding issues, see # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd x = torch.cat([x2, x1], dim=1) return self.conv(x) class OutConv(nn.Module): def __init__(self, in_channels, out_channels): super(OutConv, self).__init__() self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1) def forward(self, x): return self.conv(x) ================================================ FILE: dvs/printer.py ================================================ import sys class Printer(object): def __init__(self, *files): self.files = files #Redirect Printer def open(self): if not hasattr(sys, '_stdout'): sys._stdout = sys.stdout sys.stdout = self return self #Restore the Default Printer def close(self): stdout = sys._stdout for f in self.files: if f != stdout: f.close() sys.stdout = stdout #Overloading write() Function def write(self, obj): for f in self.files: f.write(obj) f.flush() def flush(self): pass if __name__ == '__main__': print("Start testing") t = Printer(sys.stdout, open('./test.txt', 'w+')).open() print("In files") t.close() print("Not in files") ================================================ FILE: dvs/requirements.txt ================================================ colorama==0.4.4 ffmpeg==1.4 imageio==2.9.0 matplotlib==3.3.4 opencv-contrib-python==4.5.1.48 opencv-python==4.5.1.48 pytz==2021.1 PyYAML==5.4.1 scipy==1.5.4 tensorboardX==2.1 tqdm==4.59.0 ================================================ FILE: dvs/train.py ================================================ import os import sys import torch import torchvision import torch.nn as nn from torch.autograd import Variable import time import yaml import argparse import numpy as np from printer import Printer from dataset import get_data_loader from model import Model import datetime import copy from util import make_dir, get_optimizer, AverageMeter, save_train_info, norm_flow from gyro import torch_QuaternionProduct, torch_QuaternionReciprocal, torch_norm_quat os.environ["CUDA_VISIBLE_DEVICES"] = "0" def run_epoch(model, loader, cf, epoch, lr, optimizer=None, is_training=True, USE_CUDA=True, clip_norm=0): no_flo = False number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"] avg_loss = AverageMeter() if is_training: model.net.train() model.unet.train() else: model.net.eval() model.unet.eval() for i, data in enumerate(loader, 0): # get the inputs; data is a list of [inputs, labels] real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data print("Fininsh Load data") real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4] real_projections = real_projections.type(torch.float) flo = flo.type(torch.float) flo_back = flo_back.type(torch.float) ois = ois.type(torch.float) batch_size, step, dim = real_inputs.size() times = times.numpy() real_queue_idx = real_queue_idx.numpy() virtual_queue = loader.dataset.random_init_virtual_queue(batch_size, real_postion[:,0,:].numpy(), times[:,1]) # TODO # virtual_queue = [None] * batch_size loss = 0 model.net.init_hidden(batch_size) for j in range(step): virtual_inputs, vt_1 = loader.dataset.get_virtual_data( virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j]) real_inputs_step = real_inputs[:,j,:] inputs = torch.cat((real_inputs_step,virtual_inputs), dim = 1) # inputs = Variable(real_inputs_step) if USE_CUDA: real_inputs_step = real_inputs_step.cuda() virtual_inputs = virtual_inputs.cuda() inputs = inputs.cuda() if no_flo is False: flo_step = flo[:,j].cuda() flo_back_step = flo_back[:,j].cuda() else: flo_step = None flo_back_step = None vt_1 = vt_1.cuda() real_projections_t = real_projections[:,j+1].cuda() real_projections_t_1 = real_projections[:,j].cuda() real_postion_anchor = real_postion[:,j].cuda() ois_step = ois[:,j].cuda() if no_flo is False: b, h, w, _ = flo_step.size() flo_step = norm_flow(flo_step, h, w) flo_back_step = norm_flow(flo_back_step, h, w) if is_training: if no_flo is False: flo_out = model.unet(flo_step, flo_back_step) else: flo_out = None if j < 1: for i in range(2): out = model.net(inputs, flo_out, ois_step) else: out = model.net(inputs, flo_out, ois_step) else: with torch.no_grad(): if no_flo is False: flo_out = model.unet(flo_step, flo_back_step) else: flo_out = None if j < 1: for i in range(2): out = model.net(inputs, flo_out, ois_step) else: out = model.net(inputs, flo_out, ois_step) if epoch <= 30: follow = True else: follow = False if epoch > 30: undefine = True else: undefine = False if epoch > 40: optical = True else: optical = False loss_step = model.loss(out, vt_1, virtual_inputs, real_inputs_step, \ flo_step, flo_back_step, real_projections_t, real_projections_t_1, real_postion_anchor, \ follow = follow, undefine = undefine, optical = optical, stay = optical) loss = loss_step virtual_position = virtual_inputs[:, -4:] pos = torch_QuaternionProduct(virtual_position, real_postion_anchor) out = torch_QuaternionProduct(out, pos) if USE_CUDA: out = out.cpu().detach().numpy() virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1]) if (j+1) % 10 == 0: print("Step: "+str(j+1)+"/"+str(step)) print(loss) loss = torch.sum(loss) if is_training: optimizer.zero_grad() loss.backward(retain_graph=True) if clip_norm: nn.utils.clip_grad_norm_(model.net.parameters(), max_norm=clip_norm) nn.utils.clip_grad_norm_(model.unet.parameters(), max_norm=clip_norm) optimizer.step() avg_loss.update(loss.item(), batch_size) return avg_loss.avg def train(args = None): torch.autograd.set_detect_anomaly(True) config_file = args.config cf = yaml.load(open(config_file, 'r')) USE_CUDA = cf['data']["use_cuda"] seed = cf['train']["seed"] torch.manual_seed(seed) if USE_CUDA: torch.cuda.manual_seed(seed) checkpoints_dir = cf['data']['checkpoints_dir'] epochs = cf["train"]["epoch"] snapshot = cf["train"]["snapshot"] decay_epoch = cf['train']['decay_epoch'] init_lr = cf["train"]["init_lr"] lr_decay = cf["train"]["lr_decay"] lr_step = cf["train"]["lr_step"] clip_norm = cf["train"]["clip_norm"] load_model = cf["model"]["load_model"] checkpoints_dir = make_dir(checkpoints_dir, cf) if load_model is None: log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'.log'), 'w+') else: log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'.log'), 'a') printer = Printer(sys.stdout, log_file).open() print('----Print Arguments Setting------') for key in cf: print('{}:'.format(key)) for para in cf[key]: print('{:50}:{}'.format(para,cf[key][para])) print('\n') # Define the model model = Model(cf) optimizer = get_optimizer(cf["train"]["optimizer"], model, init_lr, cf) for idx, m in enumerate(model.net.children()): print('{}:{}'.format(idx,m)) for idx, m in enumerate(model.unet.children()): print('{}:{}'.format(idx,m)) if load_model is not None: print("------Load Pretrined Model--------") checkpoint = torch.load(load_model) model.net.load_state_dict(checkpoint['state_dict']) model.unet.load_state_dict(checkpoint['unet']) print("------Resume Training Process-----") optimizer.load_state_dict(checkpoint['optim_dict']) epoch_load = checkpoint['epoch'] print("Epoch load: ", epoch_load) else: epoch_load = 0 if USE_CUDA: model.net.cuda() model.unet.cuda() if load_model is not None: for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() for param in optimizer.param_groups: init_lr = param['lr'] print("-----------Load Dataset----------") train_loader, test_loader = get_data_loader(cf, no_flo = False) print("----------Start Training----------") currentDT = datetime.datetime.now() print(currentDT.strftime(" %Y-%m-%d %H:%M:%S")) start_time = time.time() if lr_step: decay_epoch = list(range(1+lr_step, epochs+1, lr_step)) lr = init_lr for count in range(epoch_load+1, epochs+1): if decay_epoch != None and count in decay_epoch: lr *= lr_decay for param in optimizer.param_groups: param['lr'] *= lr_decay print("Epoch: %d, learning_rate: %.5f" % (count, lr)) train_loss = run_epoch(model, train_loader, cf, count, lr, optimizer=optimizer, clip_norm=clip_norm, is_training=True, USE_CUDA=USE_CUDA) test_loss = run_epoch(model, test_loader, cf, count, lr, is_training=False, USE_CUDA=USE_CUDA) time_used = (time.time() - start_time) / 60 print("Epoch %d done | TrLoss: %.4f | TestLoss: %.4f | Time_used: %.4f minutes" % ( count, train_loss, test_loss, time_used)) if count % snapshot == 0: save_train_info("epoch", checkpoints_dir, cf, model, count, optimizer) save_train_info("last", checkpoints_dir, cf, model, count, optimizer) print("Model stored at epoch %d"%count) currentDT = datetime.datetime.now() print(currentDT.strftime(" %Y-%m-%d %H:%M:%S")) print("------------End Training----------") return if __name__ == '__main__': parser = argparse.ArgumentParser("Training model") parser.add_argument("--config", default="./conf/stabilzation_train.yaml", help="Config file.") args = parser.parse_args() train(args = args) ================================================ FILE: dvs/util.py ================================================ import os import torch import cv2 from itertools import chain from warp import load_video, save_video import numpy as np import matplotlib.pyplot as plt from gyro import get_rotations import shutil def save_train_info(name, checkpoints_dir, cf, model, count, optimizer = None): path = None if name == "last": path = os.path.join(checkpoints_dir, cf['data']['exp']+'_last.checkpoint') elif name == "best": path = os.path.join(checkpoints_dir, cf['data']['exp']+'_best.checkpoint') else: path = os.path.join(checkpoints_dir, cf['data']['exp']+'_epoch%d.checkpoint'%count) torch.save(model.save_checkpoint(epoch = count, optimizer=optimizer), path) def make_dir(checkpoints_dir ,cf): inference_path = "./test" if not os.path.exists(checkpoints_dir): os.makedirs(checkpoints_dir) if not os.path.exists(cf["data"]["log"]): os.makedirs(cf["data"]["log"]) if not os.path.exists(inference_path): os.makedirs(inference_path) inference_path = os.path.join(inference_path, cf['data']['exp']) if not os.path.exists(inference_path): os.makedirs(inference_path) checkpoints_dir = os.path.join(checkpoints_dir, cf['data']['exp']) if not os.path.exists(checkpoints_dir): os.makedirs(checkpoints_dir) return checkpoints_dir def get_optimizer(optimizer, model, init_lr, cf): if optimizer == "adam": optimizer = torch.optim.Adam(chain(model.net.parameters(), model.unet.parameters()), lr=init_lr, weight_decay=cf["train"]["weight_decay"]) elif optimizer == "sgd": optimizer = torch.optim.SGD(chain(model.net.parameters(), model.unet.parameters()), lr=init_lr, momentum=cf["train"]["momentum"]) return optimizer def crop_video(in_path, out_path, crop_ratio): frame_array, fps, size = load_video(in_path) hs = int((1-crop_ratio)*1080) + 1 he = int(crop_ratio*1080) - 1 ws = int((1-crop_ratio)*1920) + 1 we = int(crop_ratio*1920) - 1 for i in range(len(frame_array)): frame_array[i] = cv2.resize(frame_array[i][hs:he,ws:we,:], size, interpolation = cv2.INTER_LINEAR) save_video(out_path, frame_array, fps, size= size) def norm_flow(flow, h, w): if flow.shape[2] == 2: flow[:,:,0] /= h flow[:,:,1] /= w else: flow[:,:,:,0] /= h flow[:,:,:,1] /= w return flow class AverageMeter(object): def __init__(self): self.reset() def reset(self): self.avg = 0 self.sum = 0 self.cnt = 0 def update(self, val, n=1): self.sum += val * n self.cnt += n if self.cnt > 0: self.avg = self.sum / self.cnt ================================================ FILE: dvs/warp/__init__.py ================================================ from .warping import ( warp_video ) from .read_write import ( save_video, load_video, video2frame_one_seq ) ================================================ FILE: dvs/warp/rasterizer.py ================================================ import numpy as np import matplotlib.pyplot as plt from numpy import array import torch import cv2 import time device = torch.device("cuda") def Rasterization(image, grid, get_mesh_only = False): # grid xy WH shape = image.size() height = shape[1] width = shape[2] wapper_upper_triangle, wapper_lower_triangle = grid_to_triangle(grid[:,:,:2]) origin_upper_triangle, origin_lower_triangle = grid_to_triangle(grid[:,:,2:]) [xmax, xmin, ymax, ymin], xlength, ylength = grid_size(wapper_upper_triangle, wapper_lower_triangle, height, width) xratio = xlength / width yratio = ylength / height wapper_triangle = torch.stack((wapper_upper_triangle,wapper_lower_triangle),dim = 1).to(device) # grid * upper/lower * point * xy origin_triangle = torch.stack((origin_upper_triangle,origin_lower_triangle),dim = 1).to(device) # grid * upper/lower * point * xy tran_triangle = torch.zeros(wapper_triangle.size()).to(device) tran_triangle[:,:,:,0] = (wapper_triangle[:,:,:,0] - xmin.view(-1,1,1).to(device)/width) / xratio tran_triangle[:,:,:,1] = (wapper_triangle[:,:,:,1] - ymin.view(-1,1,1).to(device)/height) / yratio mask = triangle2mask(tran_triangle, ylength, xlength) # consuming mask = torch.unsqueeze(mask, 4) origin_triangle = torch.unsqueeze(origin_triangle, 1) grid_sample = origin_triangle * mask # consuming grid_sample = torch.sum(torch.sum(grid_sample, dim = 3), dim = 2).view(-1,ylength,xlength,2) # consuming gxmin = min(0, int(torch.min(xmin))) gxmax = int(torch.max(xmin) + xlength) gymin = min(0, int(torch.min(ymin))) gymax = int(torch.max(ymin) + ylength) grid_merge = torch.zeros((max(gymax-gymin, height, height - gymin),max(gxmax - gxmin, width, width - gxmin),2)).to(device) for i in range(grid_sample.size()[0]): x_s = int(xmin[i] - gxmin) x_e = int(xmin[i] + xlength - gxmin) y_s = int(ymin[i] - gymin) y_e = int(ymin[i] + ylength -gymin) grid_merge[ y_s:y_e, x_s:x_e, :] += grid_sample[i, :, :, :] # grid_merge = grid_merge[min(-gxmin,0):min(-gxmin,0)+height, min(-gymin,0):min(-gymin,0)+width, :] grid_merge = grid_merge[-gymin:-gymin+height, -gxmin:-gxmin+width, :] # if get_mesh_only: # grid_merge = grid_merge.cpu().numpy() # mesh_grid = generate_mesh_grid(height, width) # out = grid_merge - mesh_grid # return np.concatenate((out[:,:,1:],out[:,:,:1]),2) shift = torch.tensor([0.5/height,0.5/width])[None, None, :].to(device) grid_merge = (grid_merge + 1*shift) * 2 - 1 image[:3,:2,:2] = 0 image = torch.unsqueeze(image, 0).to(device) grid_merge = torch.unsqueeze(grid_merge, 0) image = torch.nn.functional.grid_sample(image, grid_merge) # default bilinear image = torch.squeeze(image, 0) return image.cpu() def grid_to_triangle(grid): grid_shape = grid.size() num = (grid_shape[0] - 1) * (grid_shape[1] - 1) upper_triangle = grid[:-1, :-1, :, None] upper_triangle = torch.cat(( upper_triangle, grid[1:, :-1, :, None]), dim = 3) upper_triangle = torch.cat(( upper_triangle, grid[:-1, 1:, :, None]), dim = 3) upper_triangle = upper_triangle.view(num, 2, 3) upper_triangle = torch.transpose(upper_triangle, 1, 2) # grid * point * xy lower_triangle = grid[:-1, 1:, :, None] lower_triangle = torch.cat(( lower_triangle, grid[1:, :-1, :, None]), dim = 3) lower_triangle = torch.cat(( lower_triangle, grid[1:, 1:, :, None]), dim = 3) lower_triangle = lower_triangle.view(num, 2, 3) lower_triangle = torch.transpose(lower_triangle, 1, 2) return upper_triangle, lower_triangle # grid * point * xy def grid_size(upper_triangle, lower_triangle, height, width): wapper_grid = torch.cat((upper_triangle, lower_triangle),dim =1) xmax = torch.floor(torch.max(wapper_grid[:,:,0]*width, 1)[0]) + 1 ymax = torch.floor(torch.max(wapper_grid[:,:,1]*height, 1)[0]) + 1 xmin = torch.floor(torch.min(wapper_grid[:,:,0]*width, 1)[0]) ymin = torch.floor(torch.min(wapper_grid[:,:,1]*height, 1)[0]) xlength = int(torch.max(xmax - xmin)) ylength = int(torch.max(ymax - ymin)) return [xmax, xmin, ymax, ymin], xlength, ylength def generate_mesh_grid(height, width): # Create a grid of sampling positions xs = np.linspace(0, 1, width, endpoint=False) ys = np.linspace(0, 1, height, endpoint=False) xmesh, ymesh = np.meshgrid(xs, ys) # Reshape the sampling positions to a H x W x 2 tensor return np.moveaxis(array(list(zip(xmesh, ymesh))), 1, 2) def triangle2mask(d, height, width): # d: [N x T x 3 x 2] N = d.size()[0] # batch size T = d.size()[1] # triangle number P = height * width # The number of pixels in the output image. area = edgefunc(d[:, :, 1, :], d[:, :, 2, :], d[:, :, None, 0, :]) gridcpu = generate_mesh_grid(height, width) gridcpu = np.reshape(gridcpu, (height*width, 2)) grid = torch.Tensor(gridcpu) grid = grid.unsqueeze(0).repeat((N, T, 1, 1)) # [N x T x P x 2] grid = grid.to(device) # Evaluate the edge functions at every position. # We should get a [N x P] vector out of each. w0 = edgefunc(d[:, :, 1, :], d[:, :, 2, :], grid) / area w1 = edgefunc(d[:, :, 2, :], d[:, :, 0, :], grid) / area w2 = edgefunc(d[:, :, 0, :], d[:, :, 1, :], grid) / area # Only pixels inside the triangles will have color # [N x P] mask = (w0 > 0) & (w1 > 0) & (w2 > 0) mask = torch.unsqueeze(mask, 3).type(torch.cuda.FloatTensor) w = torch.stack((w0,w1,w2),dim = 3) * mask return torch.transpose(w, 1, 2) # [N x P x T x 3] def edgefunc(v0, v1, p): """ let P = H * W v0 and v1 have vertex positions for all T triangles. Their shapes are [N x T X 2] p is a list of sampling points as a [N x T X P x 2] tensor. Each of the T triangles has an [P x 2] matrix of sampling points. returns a [N x T x P] matrix """ P = p.size()[2] # Take all the x and y coordinates of all the positions as a # [N x S] tensor py = p[:, :, :, 1] px = p[:, :, :, 0] # We need to manually broadcast the vector to cover all sample points x10 = v0[:, :, 0] - v1[:, :, 0] # [N x T] y01 = v1[:, :, 1] - v0[:, :, 1] # [N x T] x10 = x10.unsqueeze(2).repeat((1, 1, P)) # [N x T x P] y01 = y01.unsqueeze(2).repeat((1, 1, P)) # [N x T x P] cross = v0[:,:,1]*v1[:,:,0] - v0[:,:,0]*v1[:,:,1] # [N x T] cross = cross.unsqueeze(2).repeat((1, 1, P)) # [N x T x P] return y01*px + x10*py + cross if __name__ == '__main__': print(generate_mesh_grid(2,3)) ================================================ FILE: dvs/warp/read_write.py ================================================ import numpy as np import cv2 import os from PIL import Image, ImageDraw, ImageFont import matplotlib.pyplot as plt import ffmpeg import json import torch import argparse def load_video(path, save_dir = None, resize = None, length = -1): # N x H x W x C vidcap = cv2.VideoCapture(path) fps = vidcap.get(cv2.CAP_PROP_FPS) success,image = vidcap.read() print(image.shape) height, width, layers = image.shape if resize is None: size = (width,height) elif type(resize) is int: size = (width//resize,height//resize) else: size = resize count = 0 frames = [] while success: if resize is not None: image = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR) if save_dir != None: path = os.path.join(save_dir, "frame_" + str(count).zfill(4) + ".png") cv2.imwrite(path, image) frames.append(image) success,image = vidcap.read() count += 1 if length > 0 and count >= length: break print("Video length: ", len(frames)) return frames, fps, size def video2frame(path, resize = None): data_name = sorted(os.listdir(path)) for i in range(len(data_name)): print(str(i+1)+" / " + str(len(data_name))) data_folder = os.path.join(path, data_name[i]) print(data_folder) files = os.listdir(data_folder) for f in files: if f[-4:] == ".mp4": video_name = f video_path = os.path.join(data_folder, video_name) frame_folder = os.path.join(data_folder, "frames") if not os.path.exists(frame_folder): os.makedirs(frame_folder) load_video(video_path, save_dir = frame_folder, resize=resize) def video2frame_one_seq(path, save_dir = None, resize = None): # N x H x W x C vidcap = cv2.VideoCapture(path) fps = vidcap.get(cv2.CAP_PROP_FPS) success,image = vidcap.read() print(path) print(image.shape) height, width, layers = image.shape if resize is None: size = (width,height) elif type(resize) is int: size = (width//resize,height//resize) else: size = resize count = 0 while success: if resize is not None: image = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR) if save_dir != None: path = os.path.join(save_dir, "frame_" + str(count).zfill(5) + ".png") cv2.imwrite(path, image) success,image = vidcap.read() count += 1 return fps, size def save_video(path,frame_array, fps, size, losses = None, frame_number = False, writer = None): if writer is None: if path[-3:] == "mp4": out = cv2.VideoWriter(path,cv2.VideoWriter_fourcc(*'mp4v'), fps, size) else: out = cv2.VideoWriter(path,cv2.VideoWriter_fourcc('M','J','P','G'), fps, size) else: out = writer for i in range(len(frame_array)): # writing to a image array if frame_number: frame_array[i] = draw_number(np.asarray(frame_array[i]), i) if losses is not None: frame_array[i] = draw_number(np.asarray(frame_array[i]), losses[i], x = 900, message = "Loss: ") out.write(frame_array[i]) if writer is None: out.release() def draw_number(frame, num, x = 10, y = 10, message = "Frame: "): image=Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(image) font = ImageFont.truetype("./data/arial.ttf", 45) message = message + str(num) color = 'rgb(0, 0, 0)' # black color draw.text((x, y), message, fill=color, font=font) return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) if __name__ == "__main__": parser = argparse.ArgumentParser("FlowNet2 Preparation") parser.add_argument("--dir_path", default="./video") args = parser.parse_args() dir_path = args.dir_path if dir_path == "./video": video2frame(dir_path, resize = 4) else: video2frame(os.path.join(dir_path, "test"), resize = 4) video2frame(os.path.join(dir_path, "training"), resize = 4) ================================================ FILE: dvs/warp/warping.py ================================================ import numpy as np from .read_write import load_video, save_video import torch import cv2 from .rasterizer import Rasterization import time import os def warp_video(mesh_path, video_path, save_path, losses = None, frame_number = False, fps_fix = None): if type(mesh_path) == str: print("Error") else: grid_data = mesh_path frame_array, fps, size = load_video(video_path, length = grid_data.shape[0]) if fps_fix is not None: fps = fps_fix length = min(grid_data.shape[0], len(frame_array)) seq_length = 100 seq = length//seq_length writer = cv2.VideoWriter(save_path,cv2.VideoWriter_fourcc(*'mp4v'), fps, size) for i in range(seq+1): if seq_length*i==length: break print("Frame: "+str(i*seq_length)+"/"+str(length)) frame_array_save = warpping_rast(grid_data[seq_length*i:min(seq_length*(i+1),length)], frame_array[seq_length*i:min(seq_length*(i+1),length)], losses = losses) save_video(save_path,frame_array_save, fps, size, losses = losses, frame_number = frame_number, writer = writer) writer.release() def warpping_rast(grid_data, frame_array, losses = None): output = [] for i in range(0, min(len(frame_array), grid_data.shape[0])): frame = warpping_one_frame_rast(frame_array[i], grid_data[i]) output.append(frame) return output def warpping_one_frame_rast(image, grid): img = torch.Tensor(image).permute(2,0,1)/255 grid = torch.Tensor(grid) output_image = Rasterization(img, grid) return np.clip(output_image.permute(1,2,0).numpy() * 255, 0, 255).astype("uint8")