Repository: googleinterns/deep-stabilization
Branch: master
Commit: 7159c09d21ae
Files: 65
Total size: 42.8 MB
Directory structure:
gitextract__lkvtuhi/
├── .gitignore
├── LICENSE
├── README.md
├── docs/
│ ├── code-of-conduct.md
│ └── contributing.md
└── dvs/
├── checkpoint/
│ └── stabilzation/
│ └── stabilzation_last.checkpoint
├── conf/
│ ├── stabilzation.yaml
│ └── stabilzation_train.yaml
├── dataset.py
├── flownet2/
│ ├── LICENSE
│ ├── README.md
│ ├── __init__.py
│ ├── convert.py
│ ├── datasets.py
│ ├── install.sh
│ ├── losses.py
│ ├── main.py
│ ├── models.py
│ ├── networks/
│ │ ├── FlowNetC.py
│ │ ├── FlowNetFusion.py
│ │ ├── FlowNetS.py
│ │ ├── FlowNetSD.py
│ │ ├── __init__.py
│ │ ├── channelnorm_package/
│ │ │ ├── __init__.py
│ │ │ ├── channelnorm.py
│ │ │ ├── channelnorm_cuda.cc
│ │ │ ├── channelnorm_kernel.cu
│ │ │ ├── channelnorm_kernel.cuh
│ │ │ └── setup.py
│ │ ├── correlation_package/
│ │ │ ├── __init__.py
│ │ │ ├── correlation.py
│ │ │ ├── correlation_cuda.cc
│ │ │ ├── correlation_cuda_kernel.cu
│ │ │ ├── correlation_cuda_kernel.cuh
│ │ │ └── setup.py
│ │ ├── resample2d_package/
│ │ │ ├── __init__.py
│ │ │ ├── resample2d.py
│ │ │ ├── resample2d_cuda.cc
│ │ │ ├── resample2d_kernel.cu
│ │ │ ├── resample2d_kernel.cuh
│ │ │ └── setup.py
│ │ └── submodules.py
│ ├── run.sh
│ ├── run_release.sh
│ └── utils/
│ ├── __init__.py
│ ├── flow_utils.py
│ ├── frame_utils.py
│ ├── param_utils.py
│ └── tools.py
├── gyro/
│ ├── __init__.py
│ ├── gyro_function.py
│ └── gyro_io.py
├── inference.py
├── load_frame_sensor_data.py
├── loss.py
├── metrics.py
├── model.py
├── printer.py
├── requirements.txt
├── train.py
├── util.py
└── warp/
├── __init__.py
├── rasterizer.py
├── read_write.py
└── warping.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.pyc
.torch
_ext
*.o
_ext/
*.png
*.jpg
*.tar
log/*
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# Deep Online Fused Video Stabilization
[[Paper]](https://openaccess.thecvf.com/content/WACV2022/papers/Shi_Deep_Online_Fused_Video_Stabilization_WACV_2022_paper.pdf)[[Supplementary]](https://zhmeishi.github.io/dvs/paper/dvs_supp.pdf) [[Project Page]](https://zhmeishi.github.io/dvs/) [[Dataset]](https://storage.googleapis.com/dataset_release/all.zip) [[Our Result]](https://storage.googleapis.com/dataset_release/inference_result_release.zip) [[More Results]](https://zhmeishi.github.io/dvs/supp/results.html)
This repository contains the Pytorch implementation of our method in the paper "Deep Online Fused Video Stabilization".
## Environment Setting
Python version >= 3.6
Pytorch with CUDA >= 1.0.0 (guide is [here](https://pytorch.org/get-started/locally/))
Install other used packages:
```
cd dvs
pip install -r requirements.txt --ignore-installed
```
## Data Preparation
Download sample video [here](https://drive.google.com/file/d/1PpF3-6BbQKy9fldjIfwa5AlbtQflx3sG/view?usp=sharing).
Uncompress the *video* folder under the *dvs* folder.
```
python load_frame_sensor_data.py
```
Demo of curve visualization:
The **gyro/OIS curve visualization** can be found at *dvs/video/s_114_outdoor_running_trail_daytime/ControlCam_20200930_104820_real.jpg*.
## FlowNet2 Preparation
Note, we provide optical flow result of one test video in our Data Preparation. If you would like to generate them for all test videos, please follow [FlowNet2 official website](https://github.com/NVIDIA/flownet2-pytorch) and guide below. Otherwise, you can skip this section.
Note, FlowNet2 installation is tricky. Please use Python=3.6 and Pytorch=1.0.0. More details are [here](https://github.com/NVIDIA/flownet2-pytorch/issues/156) or contact us for any questions.
Download FlowNet2 model *FlowNet2_checkpoint.pth.tar* [here](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view). Move it under folder *dvs/flownet2*.
```
python warp/read_write.py # video2frames
cd flownet2
bash install.sh # install package
bash run.sh # generate optical flow file for dataset
```
## Running Inference
```
python inference.py
python metrics.py
```
The loss and metric information will be printed in the terminal. The metric numbers can be slightly different due to difference on opencv/pytorch versions.
The result is under *dvs/test/stabilzation*.
In *s_114_outdoor_running_trail_daytime.jpg*, the blue curve is the output of our models, and the green curve is the input.
*s_114_outdoor_running_trail_daytime_stab.mp4* is uncropped stabilized video.
*s_114_outdoor_running_trail_daytime_stab_crop.mp4* is cropped stabilized video. Note, the cropped video is generated after running the metrics code.
## Training
Download dataset for training and test [here](https://storage.googleapis.com/dataset_release/all.zip).
Uncompress *all.zip* and move *dataset_release* folder under the *dvs* folder.
Follow FlowNet2 Preparation Section.
```
python warp/read_write.py --dir_path ./dataset_release # video2frames
cd flownet2
bash run_release.sh # generate optical flow file for dataset
```
Run training code.
```
python train.py
```
The model is saved in *checkpoint/stabilzation_train*.
## Citation
If you use this code or dataset for your research, please cite our paper.
```
@inproceedings{shi2022deep,
title={Deep Online Fused Video Stabilization},
author={Shi, Zhenmei and Shi, Fuhao and Lai, Wei-Sheng and Liang, Chia-Kai and Liang, Yingyu},
booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
pages={1250--1258},
year={2022}
}
```
================================================
FILE: docs/code-of-conduct.md
================================================
# Google Open Source Community Guidelines
At Google, we recognize and celebrate the creativity and collaboration of open
source contributors and the diversity of skills, experiences, cultures, and
opinions they bring to the projects and communities they participate in.
Every one of Google's open source projects and communities are inclusive
environments, based on treating all individuals respectfully, regardless of
gender identity and expression, sexual orientation, disabilities,
neurodiversity, physical appearance, body size, ethnicity, nationality, race,
age, religion, or similar personal characteristic.
We value diverse opinions, but we value respectful behavior more.
Respectful behavior includes:
* Being considerate, kind, constructive, and helpful.
* Not engaging in demeaning, discriminatory, harassing, hateful, sexualized, or
physically threatening behavior, speech, and imagery.
* Not engaging in unwanted physical contact.
Some Google open source projects [may adopt][] an explicit project code of
conduct, which may have additional detailed expectations for participants. Most
of those projects will use our [modified Contributor Covenant][].
[may adopt]: https://opensource.google/docs/releasing/preparing/#conduct
[modified Contributor Covenant]: https://opensource.google/docs/releasing/template/CODE_OF_CONDUCT/
## Resolve peacefully
We do not believe that all conflict is necessarily bad; healthy debate and
disagreement often yields positive results. However, it is never okay to be
disrespectful.
If you see someone behaving disrespectfully, you are encouraged to address the
behavior directly with those involved. Many issues can be resolved quickly and
easily, and this gives people more control over the outcome of their dispute.
If you are unable to resolve the matter for any reason, or if the behavior is
threatening or harassing, report it. We are dedicated to providing an
environment where participants feel welcome and safe.
## Reporting problems
Some Google open source projects may adopt a project-specific code of conduct.
In those cases, a Google employee will be identified as the Project Steward,
who will receive and handle reports of code of conduct violations. In the event
that a project hasn’t identified a Project Steward, you can report problems by
emailing opensource@google.com.
We will investigate every complaint, but you may not receive a direct response.
We will use our discretion in determining when and how to follow up on reported
incidents, which may range from not taking action to permanent expulsion from
the project and project-sponsored spaces. We will notify the accused of the
report and provide them an opportunity to discuss it before any action is
taken. The identity of the reporter will be omitted from the details of the
report supplied to the accused. In potentially harmful situations, such as
ongoing harassment or threats to anyone's safety, we may take action without
notice.
*This document was adapted from the [IndieWeb Code of Conduct][] and can also
be found at .*
[IndieWeb Code of Conduct]: https://indieweb.org/code-of-conduct
================================================
FILE: docs/contributing.md
================================================
# How to Contribute
We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.
## Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution;
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to to see
your current agreements on file or to sign a new one.
You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.
## Code reviews
All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.
## Community Guidelines
This project follows [Google's Open Source Community
Guidelines](https://opensource.google/conduct/).
================================================
FILE: dvs/checkpoint/stabilzation/stabilzation_last.checkpoint
================================================
[File too large to display: 42.5 MB]
================================================
FILE: dvs/conf/stabilzation.yaml
================================================
data:
exp: 'stabilzation'
checkpoints_dir: './checkpoint'
log: './log'
data_dir: './video'
use_cuda: true
batch_size: 16
resize_ratio: 0.25
number_real: 10
number_virtual: 2
time_train: 2000 # ms
sample_freq: 40 # ms
channel_size: 1
num_workers: 16 # num_workers for data_loader
model:
load_model: null
cnn:
activate_function: relu # sigmoid, relu, tanh, quadratic
batch_norm: true
gap: false
layers:
rnn:
layers:
- - 512
- true
- - 512
- true
fc:
activate_function: relu
batch_norm: false # (batch_norm and drop_out) is False
layers:
- - 256
- true
- - 4 # last layer should be equal to nr_class
- true
drop_out: 0
train:
optimizer: "adam" # adam or sgd
momentum: 0.9 # for sgd
decay_epoch: null
epoch: 400
snapshot: 2
init_lr: 0.0001
lr_decay: 0.5
lr_step: 200 # if > 0 decay_epoch should be null
seed: 1
weight_decay: 0.0001
clip_norm: False
init: "xavier_uniform" # xavier_uniform or xavier_normal
loss:
follow: 10
angle: 1
smooth: 10 #10
c2_smooth: 200 #20
undefine: 2.0
opt: 0.1
stay: 0
================================================
FILE: dvs/conf/stabilzation_train.yaml
================================================
data:
exp: 'stabilzation_train'
checkpoints_dir: './checkpoint'
log: './log'
data_dir: './dataset_release'
use_cuda: true
batch_size: 16
resize_ratio: 0.25
number_real: 10
number_virtual: 2
time_train: 2000 # ms
sample_freq: 40 # ms
channel_size: 1
num_workers: 16 # num_workers for data_loader
model:
load_model: null
cnn:
activate_function: relu # sigmoid, relu, tanh, quadratic
batch_norm: true
gap: false
layers:
rnn:
layers:
- - 512
- true
- - 512
- true
fc:
activate_function: relu
batch_norm: false # (batch_norm and drop_out) is False
layers:
- - 256
- true
- - 4 # last layer should be equal to nr_class
- true
drop_out: 0
train:
optimizer: "adam" # adam or sgd
momentum: 0.9 # for sgd
decay_epoch: null
epoch: 400
snapshot: 2
init_lr: 0.0001
lr_decay: 0.5
lr_step: 200 # if > 0 decay_epoch should be null
seed: 1
weight_decay: 0.0001
clip_norm: False
init: "xavier_uniform" # xavier_uniform or xavier_normal
loss:
follow: 10
angle: 1
smooth: 10 #10
c2_smooth: 200 #20
undefine: 2.0
opt: 0.1
stay: 0
================================================
FILE: dvs/dataset.py
================================================
from torch.utils.data import Dataset
import os
import collections
from gyro import (
LoadGyroData,
LoadOISData,
LoadFrameData,
GetGyroAtTimeStamp,
get_static,
GetMetadata,
GetProjections,
train_GetGyroAtTimeStamp,
QuaternionProduct,
QuaternionReciprocal,
FindOISAtTimeStamp,
norm_quat
)
import random
import numpy as np
import torchvision.transforms as transforms
import torch
from flownet2 import flow_utils
from scipy import ndimage, misc
from numpy import linalg as LA
def get_data_loader(cf, no_flo = False):
size = cf["data"]["batch_size"]
num_workers = cf["data"]["num_workers"]
train_data, test_data = get_dataset(cf, no_flo)
trainloader = torch.utils.data.DataLoader(train_data, batch_size=size,shuffle=True, pin_memory=True, num_workers=num_workers)
testloader = torch.utils.data.DataLoader(test_data, batch_size=size,shuffle=False, pin_memory=True, num_workers=num_workers)
return trainloader,testloader
def get_dataset(cf, no_flo = False):
resize_ratio = cf["data"]["resize_ratio"]
train_transform, test_transform = _data_transforms()
train_path = os.path.join(cf["data"]["data_dir"], "training")
test_path = os.path.join(cf["data"]["data_dir"], "test")
if not os.path.exists(train_path):
train_path = cf["data"]["data_dir"]
if not os.path.exists(test_path):
test_path = cf["data"]["data_dir"]
train_data = Dataset_Gyro(
train_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"],
time_train = cf["data"]["time_train"]*1000000, transform = train_transform, resize_ratio = resize_ratio, no_flo = no_flo)
test_data = Dataset_Gyro(
test_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"],
time_train = cf["data"]["time_train"]*1000000, transform = test_transform, resize_ratio = resize_ratio, no_flo = no_flo)
return train_data, test_data
def get_inference_data_loader(cf, data_path, no_flo = False):
test_data = get_inference_dataset(cf, data_path, no_flo)
testloader = torch.utils.data.DataLoader(test_data, batch_size=1,shuffle=False, pin_memory=True, num_workers=1)
return testloader
def get_inference_dataset(cf, data_path, no_flo = False):
resize_ratio = cf["data"]["resize_ratio"]
_, test_transform = _data_transforms()
test_data = Dataset_Gyro(
data_path, sample_freq = cf["data"]["sample_freq"]*1000000, number_real = cf["data"]["number_real"],
time_train = cf["data"]["time_train"]*1000000, transform = test_transform, resize_ratio = resize_ratio,
inference_only = True, no_flo = no_flo)
return test_data
def _data_transforms():
test_transform = transforms.Compose(
[transforms.ToTensor(),
])
train_transform = transforms.Compose(
[transforms.ToTensor(),
])
return train_transform, test_transform
class DVS_data():
def __init__(self):
self.gyro = None
self.ois = None
self.frame = None
self.length = 0
self.flo_path = None
self.flo_shape = None
self.flo_back_path = None
class Dataset_Gyro(Dataset):
def __init__(self, path, sample_freq = 33*1000000, number_real = 10, time_train = 2000*1000000, \
transform = None, inference_only = False, no_flo = False, resize_ratio = 1):
r"""
Arguments:
sample_freq: real quaternions [t-sample_freq*number_real, t+sample_freq*number_real] ns
number_real: real gyro num in half time_interval
time_train: time for a batch ns
"""
self.sample_freq = sample_freq
self.number_real = number_real
self.no_flo = no_flo
self.resize_ratio = resize_ratio
self.static_options = get_static()
self.inference_only = inference_only
self.ois_ratio = np.array([self.static_options["crop_window_width"] / self.static_options["width"], \
self.static_options["crop_window_height"] / self.static_options["height"]]) * 0.01
self.unit_size = 4
if inference_only:
self.length = 1
self.data = [self.process_one_video(path)]
self.number_train = self.data[0].length
return
self.time_train = time_train
self.number_train = time_train//self.sample_freq
self.data_name = sorted(os.listdir(path))
self.length = len(self.data_name)
self.data = []
for i in range(self.length):
self.data.append(self.process_one_video(os.path.join(path,self.data_name[i])))
def process_one_video(self, path):
dvs_data = DVS_data()
files = sorted(os.listdir(path))
print(path)
for f in files:
file_path = os.path.join(path,f)
if "gimbal" in file_path.lower():
continue
if "frame" in f and "txt" in f:
dvs_data.frame = LoadFrameData(file_path)
print("frame:", dvs_data.frame.shape, end=" ")
elif "gyro" in f:
dvs_data.gyro = LoadGyroData(file_path)
dvs_data.gyro = preprocess_gyro(dvs_data.gyro)
print("gyro:", dvs_data.gyro.shape, end=" ")
elif "ois" in f and "txt" in f:
dvs_data.ois = LoadOISData(file_path)
print("ois:", dvs_data.ois.shape, end=" ")
elif f == "flo":
dvs_data.flo_path, dvs_data.flo_shape = LoadFlow(file_path)
print("flo_path:", len(dvs_data.flo_path), end=" ")
print("flo_shape:", dvs_data.flo_shape, end=" ")
elif f == "flo_back":
dvs_data.flo_back_path, _ = LoadFlow(file_path)
print()
if dvs_data.flo_path is not None:
dvs_data.length = min(dvs_data.frame.shape[0] - 1, len(dvs_data.flo_path))
else:
dvs_data.length = dvs_data.frame.shape[0] - 1
return dvs_data
def generate_quaternions(self, dvs_data):
first_id = random.randint(0, dvs_data.length - self.number_train) + 1 # skip the first frame
sample_data = np.zeros((self.number_train, 2 * self.number_real + 1, self.unit_size), dtype=np.float32)
sample_ois = np.zeros((self.number_train, 2), dtype=np.float32)
sample_time = np.zeros((self.number_train+1), dtype=np.float32)
sample_time[0] = get_timestamp(dvs_data.frame, first_id - 1)
real_postion = np.zeros((self.number_train, 4), dtype=np.float32)
time_start = sample_time[0]
for i in range(self.number_train):
sample_time[i+1] = get_timestamp(dvs_data.frame, first_id + i)
real_postion[i] = GetGyroAtTimeStamp(dvs_data.gyro, sample_time[i+1] - self.sample_freq)
sample_ois[i] = self.get_ois_at_timestamp(dvs_data.ois, sample_time[i+1])
for j in range(-self.number_real, self.number_real+1):
index = j + self.number_real
time_stamp = sample_time[i+1] + self.sample_freq * j
sample_data[i, index] = self.get_data_at_timestamp(dvs_data.gyro, dvs_data.ois, time_stamp, real_postion[i])
sample_data = np.reshape(sample_data, (self.number_train, (2*self.number_real+1) * self.unit_size))
return sample_data, sample_time, first_id, real_postion, sample_ois
def load_flo(self, idx, first_id):
shape = self.data[idx].flo_shape
h, w = shape[0], shape[1]
flo = np.zeros((self.number_train, h, w, 2))
flo_back = np.zeros((self.number_train, h, w, 2))
for i in range(self.number_train):
frame_id = i + first_id
f = flow_utils.readFlow(self.data[idx].flo_path[frame_id-1]).astype(np.float32)
flo[i] = f
f_b = flow_utils.readFlow(self.data[idx].flo_back_path[frame_id-1]).astype(np.float32)
flo_back[i] = f_b
return flo, flo_back
def load_real_projections(self, idx, first_id):
real_projections = np.zeros((self.number_train + 1, self.static_options["num_grid_rows"], 3, 3))
for i in range(self.number_train + 1):
frame_id = i + first_id
metadata = GetMetadata(self.data[idx].frame, frame_id - 1)
real_projections[i] = np.array(GetProjections(self.static_options, metadata, self.data[idx].gyro, np.zeros(self.data[idx].ois.shape), no_shutter = True))
return real_projections
def __getitem__(self, idx):
inputs, times, first_id, real_postion, ois = self.generate_quaternions(self.data[idx])
real_projections = self.load_real_projections(idx, first_id)
if self.no_flo:
flo, flo_back = 0, 0
else:
flo, flo_back = self.load_flo(idx, first_id)
return inputs, times, flo, flo_back, real_projections, real_postion, ois, idx
def __len__(self):
return self.length
def get_virtual_data(self, virtual_queue, real_queue_idx, pre_times, cur_times, time_start, batch_size, number_virtual, quat_t_1):
# virtual_queue: [batch_size, num, 5 (timestamp, quats)]
# eular angle,
# deta R angular velocity [Q't-1, Q't-2]
# output virtual angular velocity, x, x*dtime => detaQt
virtual_data = np.zeros((batch_size, number_virtual, 4), dtype=np.float32)
vt_1 = np.zeros((batch_size, 4), dtype=np.float32)
quat_t_1 = quat_t_1.numpy()
for i in range(batch_size):
sample_time = cur_times[i]
for j in range(number_virtual):
time_stamp = sample_time - self.sample_freq * (number_virtual - j)
virtual_data[i, j] = get_virtual_at_timestamp(virtual_queue[i], self.data[real_queue_idx[i]].gyro, time_stamp, time_start[i], quat_t_1[i])
vt_1[i] = get_virtual_at_timestamp(virtual_queue[i], self.data[real_queue_idx[i]].gyro, pre_times[i], time_start[i], None)
virtual_data = np.reshape(virtual_data, (batch_size, number_virtual * 4))
return torch.tensor(virtual_data, dtype=torch.float), torch.tensor(vt_1, dtype=torch.float)
def update_virtual_queue(self, batch_size, virtual_queue, out, times):
virtual_data = np.zeros((batch_size, 5))
virtual_data[:,0] = times
virtual_data[:, 1:] = out
virtual_data = np.expand_dims(virtual_data, axis = 1)
if None in virtual_queue:
virtual_queue = virtual_data
else:
virtual_queue = np.concatenate((virtual_queue, virtual_data), axis = 1)
return virtual_queue
def random_init_virtual_queue(self, batch_size, real_postion, times):
virtual_queue = np.zeros((batch_size, 3, 5))
virtual_queue[:, 2, 0] = times - 0.1 * self.sample_freq
virtual_queue[:, 1, 0] = times - 1.1 * self.sample_freq
virtual_queue[:, 0, 0] = times - 2.1 * self.sample_freq
for i in range(batch_size):
quat = np.random.uniform(low=-0.06, high= 0.06, size=4) # transfer to angle # 0.05
quat[3] = 1
quat = quat / LA.norm(quat)
quat = norm_quat(QuaternionProduct(real_postion[i], quat))
virtual_queue[i, 2, 1:] = quat
virtual_queue[i, 1, 1:] = quat
virtual_queue[i, 0, 1:] = quat
return virtual_queue
def get_data_at_timestamp(self, gyro_data, ois_data, time_stamp, quat_t_1):
quat_t = GetGyroAtTimeStamp(gyro_data, time_stamp)
quat_dif = QuaternionProduct(quat_t, QuaternionReciprocal(quat_t_1))
return quat_dif
def get_ois_at_timestamp(self, ois_data, time_stamp):
ois_t = FindOISAtTimeStamp(ois_data, time_stamp)
ois_t = np.array(ois_t) / self.ois_ratio
return ois_t
def get_timestamp(frame_data, idx):
sample_time = frame_data[idx, 0]
metadata = GetMetadata(frame_data, idx)
timestmap_ns = metadata["timestamp_ns"] + metadata["rs_time_ns"] * 0.5
return timestmap_ns
def preprocess_gyro(gyro, extend = 200):
fake_gyro = np.zeros((extend, 5))
time_start = gyro[0,0]
for i in range(extend):
fake_gyro[-i-1, 0] = time_start - (gyro[i+1, 0] - time_start)
fake_gyro[-i-1, 4] = gyro[i+1, 4]
fake_gyro[-i-1, 1:4] = -gyro[i+1, 1:4]
new_gyro = np.concatenate((fake_gyro, gyro), axis = 0)
return new_gyro
def LoadFlow(path):
file_names = sorted(os.listdir(path))
file_path =[]
for n in file_names:
file_path.append(os.path.join(path, n))
return file_path, flow_utils.readFlow(file_path[0]).shape
def get_virtual_at_timestamp(virtual_queue, real_queue, time_stamp, time_start, quat_t_1 = None, sample_freq = None):
if virtual_queue is None:
quat_t = GetGyroAtTimeStamp(real_queue, time_stamp)
else:
quat_t = train_GetGyroAtTimeStamp(virtual_queue, time_stamp)
if quat_t is None:
quat_t = GetGyroAtTimeStamp(real_queue, time_stamp)
if quat_t_1 is None:
return quat_t
else:
quat_dif = QuaternionProduct(quat_t, QuaternionReciprocal(quat_t_1))
return quat_dif
================================================
FILE: dvs/flownet2/LICENSE
================================================
Copyright 2017 NVIDIA CORPORATION
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: dvs/flownet2/README.md
================================================
# flownet2-pytorch
Pytorch implementation of [FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks](https://arxiv.org/abs/1612.01925).
Multiple GPU training is supported, and the code provides examples for training or inference on [MPI-Sintel](http://sintel.is.tue.mpg.de/) clean and final datasets. The same commands can be used for training or inference with other datasets. See below for more detail.
Inference using fp16 (half-precision) is also supported.
For more help, type
python main.py --help
## Network architectures
Below are the different flownet neural network architectures that are provided.
A batchnorm version for each network is also available.
- **FlowNet2S**
- **FlowNet2C**
- **FlowNet2CS**
- **FlowNet2CSS**
- **FlowNet2SD**
- **FlowNet2**
## Custom layers
`FlowNet2` or `FlowNet2C*` achitectures rely on custom layers `Resample2d` or `Correlation`.
A pytorch implementation of these layers with cuda kernels are available at [./networks](./networks).
Note : Currently, half precision kernels are not available for these layers.
## Data Loaders
Dataloaders for FlyingChairs, FlyingThings, ChairsSDHom and ImagesFromFolder are available in [datasets.py](./datasets.py).
## Loss Functions
L1 and L2 losses with multi-scale support are available in [losses.py](./losses.py).
## Installation
# get flownet2-pytorch source
git clone https://github.com/NVIDIA/flownet2-pytorch.git
cd flownet2-pytorch
# install custom layers
bash install.sh
### Python requirements
Currently, the code supports python 3
* numpy
* PyTorch ( == 0.4.1, for <= 0.4.0 see branch [python36-PyTorch0.4](https://github.com/NVIDIA/flownet2-pytorch/tree/python36-PyTorch0.4))
* scipy
* scikit-image
* tensorboardX
* colorama, tqdm, setproctitle
## Converted Caffe Pre-trained Models
We've included caffe pre-trained models. Should you use these pre-trained weights, please adhere to the [license agreements](https://drive.google.com/file/d/1TVv0BnNFh3rpHZvD-easMb9jYrPE2Eqd/view?usp=sharing).
* [FlowNet2](https://drive.google.com/file/d/1hF8vS6YeHkx3j2pfCeQqqZGwA_PJq_Da/view?usp=sharing)[620MB]
* [FlowNet2-C](https://drive.google.com/file/d/1BFT6b7KgKJC8rA59RmOVAXRM_S7aSfKE/view?usp=sharing)[149MB]
* [FlowNet2-CS](https://drive.google.com/file/d/1iBJ1_o7PloaINpa8m7u_7TsLCX0Dt_jS/view?usp=sharing)[297MB]
* [FlowNet2-CSS](https://drive.google.com/file/d/157zuzVf4YMN6ABAQgZc8rRmR5cgWzSu8/view?usp=sharing)[445MB]
* [FlowNet2-CSS-ft-sd](https://drive.google.com/file/d/1R5xafCIzJCXc8ia4TGfC65irmTNiMg6u/view?usp=sharing)[445MB]
* [FlowNet2-S](https://drive.google.com/file/d/1V61dZjFomwlynwlYklJHC-TLfdFom3Lg/view?usp=sharing)[148MB]
* [FlowNet2-SD](https://drive.google.com/file/d/1QW03eyYG_vD-dT-Mx4wopYvtPu_msTKn/view?usp=sharing)[173MB]
## Inference
# Example on MPISintel Clean
python main.py --inference --model FlowNet2 --save_flow --inference_dataset MpiSintelClean \
--inference_dataset_root /path/to/mpi-sintel/clean/dataset \
--resume /path/to/checkpoints
## Training and validation
# Example on MPISintel Final and Clean, with L1Loss on FlowNet2 model
python main.py --batch_size 8 --model FlowNet2 --loss=L1Loss --optimizer=Adam --optimizer_lr=1e-4 \
--training_dataset MpiSintelFinal --training_dataset_root /path/to/mpi-sintel/final/dataset \
--validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset
# Example on MPISintel Final and Clean, with MultiScale loss on FlowNet2C model
python main.py --batch_size 8 --model FlowNet2C --optimizer=Adam --optimizer_lr=1e-4 --loss=MultiScale --loss_norm=L1 \
--loss_numScales=5 --loss_startScale=4 --optimizer_lr=1e-4 --crop_size 384 512 \
--training_dataset FlyingChairs --training_dataset_root /path/to/flying-chairs/dataset \
--validation_dataset MpiSintelClean --validation_dataset_root /path/to/mpi-sintel/clean/dataset
## Results on MPI-Sintel
[](https://www.youtube.com/watch?v=HtBmabY8aeU "Predicted flows on MPI-Sintel")
## Reference
If you find this implementation useful in your work, please acknowledge it appropriately and cite the paper:
````
@InProceedings{IMKDB17,
author = "E. Ilg and N. Mayer and T. Saikia and M. Keuper and A. Dosovitskiy and T. Brox",
title = "FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks",
booktitle = "IEEE Conference on Computer Vision and Pattern Recognition (CVPR)",
month = "Jul",
year = "2017",
url = "http://lmb.informatik.uni-freiburg.de//Publications/2017/IMKDB17"
}
````
```
@misc{flownet2-pytorch,
author = {Fitsum Reda and Robert Pottorff and Jon Barker and Bryan Catanzaro},
title = {flownet2-pytorch: Pytorch implementation of FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks},
year = {2017},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/NVIDIA/flownet2-pytorch}}
}
```
## Related Optical Flow Work from Nvidia
Code (in Caffe and Pytorch): [PWC-Net](https://github.com/NVlabs/PWC-Net)
Paper : [PWC-Net: CNNs for Optical Flow Using Pyramid, Warping, and Cost Volume](https://arxiv.org/abs/1709.02371).
## Acknowledgments
Parts of this code were derived, as noted in the code, from [ClementPinard/FlowNetPytorch](https://github.com/ClementPinard/FlowNetPytorch).
================================================
FILE: dvs/flownet2/__init__.py
================================================
from .utils import flow_utils, tools
================================================
FILE: dvs/flownet2/convert.py
================================================
#!/usr/bin/env python2.7
import caffe
from caffe.proto import caffe_pb2
import sys, os
import torch
import torch.nn as nn
import argparse, tempfile
import numpy as np
parser = argparse.ArgumentParser()
parser.add_argument('caffe_model', help='input model in hdf5 or caffemodel format')
parser.add_argument('prototxt_template',help='prototxt template')
parser.add_argument('flownet2_pytorch', help='path to flownet2-pytorch')
args = parser.parse_args()
args.rgb_max = 255
args.fp16 = False
args.grads = {}
# load models
sys.path.append(args.flownet2_pytorch)
import models
from utils.param_utils import *
width = 256
height = 256
keys = {'TARGET_WIDTH': width,
'TARGET_HEIGHT': height,
'ADAPTED_WIDTH':width,
'ADAPTED_HEIGHT':height,
'SCALE_WIDTH':1.,
'SCALE_HEIGHT':1.,}
template = '\n'.join(np.loadtxt(args.prototxt_template, dtype=str, delimiter='\n'))
for k in keys:
template = template.replace('$%s$'%(k),str(keys[k]))
prototxt = tempfile.NamedTemporaryFile(mode='w', delete=True)
prototxt.write(template)
prototxt.flush()
net = caffe.Net(prototxt.name, args.caffe_model, caffe.TEST)
weights = {}
biases = {}
for k, v in list(net.params.items()):
weights[k] = np.array(v[0].data).reshape(v[0].data.shape)
biases[k] = np.array(v[1].data).reshape(v[1].data.shape)
print((k, weights[k].shape, biases[k].shape))
if 'FlowNet2/' in args.caffe_model:
model = models.FlowNet2(args)
parse_flownetc(model.flownetc.modules(), weights, biases)
parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
parse_flownetsd(model.flownets_d.modules(), weights, biases, param_prefix='netsd_')
parse_flownetfusion(model.flownetfusion.modules(), weights, biases, param_prefix='fuse_')
state = {'epoch': 0,
'state_dict': model.state_dict(),
'best_EPE': 1e10}
torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2_checkpoint.pth.tar'))
elif 'FlowNet2-C/' in args.caffe_model:
model = models.FlowNet2C(args)
parse_flownetc(model.modules(), weights, biases)
state = {'epoch': 0,
'state_dict': model.state_dict(),
'best_EPE': 1e10}
torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-C_checkpoint.pth.tar'))
elif 'FlowNet2-CS/' in args.caffe_model:
model = models.FlowNet2CS(args)
parse_flownetc(model.flownetc.modules(), weights, biases)
parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
state = {'epoch': 0,
'state_dict': model.state_dict(),
'best_EPE': 1e10}
torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CS_checkpoint.pth.tar'))
elif 'FlowNet2-CSS/' in args.caffe_model:
model = models.FlowNet2CSS(args)
parse_flownetc(model.flownetc.modules(), weights, biases)
parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
state = {'epoch': 0,
'state_dict': model.state_dict(),
'best_EPE': 1e10}
torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS_checkpoint.pth.tar'))
elif 'FlowNet2-CSS-ft-sd/' in args.caffe_model:
model = models.FlowNet2CSS(args)
parse_flownetc(model.flownetc.modules(), weights, biases)
parse_flownets(model.flownets_1.modules(), weights, biases, param_prefix='net2_')
parse_flownets(model.flownets_2.modules(), weights, biases, param_prefix='net3_')
state = {'epoch': 0,
'state_dict': model.state_dict(),
'best_EPE': 1e10}
torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-CSS-ft-sd_checkpoint.pth.tar'))
elif 'FlowNet2-S/' in args.caffe_model:
model = models.FlowNet2S(args)
parse_flownetsonly(model.modules(), weights, biases, param_prefix='')
state = {'epoch': 0,
'state_dict': model.state_dict(),
'best_EPE': 1e10}
torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-S_checkpoint.pth.tar'))
elif 'FlowNet2-SD/' in args.caffe_model:
model = models.FlowNet2SD(args)
parse_flownetsd(model.modules(), weights, biases, param_prefix='')
state = {'epoch': 0,
'state_dict': model.state_dict(),
'best_EPE': 1e10}
torch.save(state, os.path.join(args.flownet2_pytorch, 'FlowNet2-SD_checkpoint.pth.tar'))
else:
print(('model type cound not be determined from input caffe model %s'%(args.caffe_model)))
quit()
print(("done converting ", args.caffe_model))
================================================
FILE: dvs/flownet2/datasets.py
================================================
import torch
import torch.utils.data as data
import os, math, random
from os.path import *
import numpy as np
from glob import glob
import utils.frame_utils as frame_utils
from imageio import imread
class StaticRandomCrop(object):
def __init__(self, image_size, crop_size):
self.th, self.tw = crop_size
h, w = image_size
self.h1 = random.randint(0, h - self.th)
self.w1 = random.randint(0, w - self.tw)
def __call__(self, img):
return img[self.h1:(self.h1+self.th), self.w1:(self.w1+self.tw),:]
class StaticCenterCrop(object):
def __init__(self, image_size, crop_size):
self.th, self.tw = crop_size
self.h, self.w = image_size
def __call__(self, img):
return img[(self.h-self.th)//2:(self.h+self.th)//2, (self.w-self.tw)//2:(self.w+self.tw)//2,:]
class Padding(object):
def __init__(self, image_size, pad_size):
self.th, self.tw = pad_size
self.h, self.w = image_size
def __call__(self, img):
out = np.zeros((self.th, self.tw, 3))
out[:self.h, :self.w,:] = img
return out
class MpiSintel(data.Dataset):
def __init__(self, args, is_cropped = False, root = '', dstype = 'clean', replicates = 1):
self.args = args
self.is_cropped = is_cropped
self.crop_size = args.crop_size
self.render_size = args.inference_size
self.replicates = replicates
flow_root = join(root, 'flow')
image_root = join(root, dstype)
file_list = sorted(glob(join(flow_root, '*/*.flo')))
self.flow_list = []
self.image_list = []
for file in file_list:
if 'test' in file:
# print file
continue
fbase = file[len(flow_root)+1:]
fprefix = fbase[:-8]
fnum = int(fbase[-8:-4])
img1 = join(image_root, fprefix + "%04d"%(fnum+0) + '.png')
img2 = join(image_root, fprefix + "%04d"%(fnum+1) + '.png')
if not isfile(img1) or not isfile(img2) or not isfile(file):
continue
self.image_list += [[img1, img2]]
self.flow_list += [file]
self.size = len(self.image_list)
self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape
if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64):
self.render_size[0] = ( (self.frame_size[0])//64 ) * 64
self.render_size[1] = ( (self.frame_size[1])//64 ) * 64
args.inference_size = self.render_size
assert (len(self.image_list) == len(self.flow_list))
def __getitem__(self, index):
index = index % self.size
img1 = frame_utils.read_gen(self.image_list[index][0])
img2 = frame_utils.read_gen(self.image_list[index][1])
flow = frame_utils.read_gen(self.flow_list[index])
images = [img1, img2]
image_size = img1.shape[:2]
if self.is_cropped:
cropper = StaticRandomCrop(image_size, self.crop_size)
else:
cropper = StaticCenterCrop(image_size, self.render_size)
images = list(map(cropper, images))
flow = cropper(flow)
images = np.array(images).transpose(3,0,1,2)
flow = flow.transpose(2,0,1)
images = torch.from_numpy(images.astype(np.float32))
flow = torch.from_numpy(flow.astype(np.float32))
return [images], [flow]
def __len__(self):
return self.size * self.replicates
class MpiSintelClean(MpiSintel):
def __init__(self, args, is_cropped = False, root = '', replicates = 1):
super(MpiSintelClean, self).__init__(args, is_cropped = is_cropped, root = root, dstype = 'clean', replicates = replicates)
class MpiSintelFinal(MpiSintel):
def __init__(self, args, is_cropped = False, root = '', replicates = 1):
super(MpiSintelFinal, self).__init__(args, is_cropped = is_cropped, root = root, dstype = 'final', replicates = replicates)
class FlyingChairs(data.Dataset):
def __init__(self, args, is_cropped, root = '/path/to/FlyingChairs_release/data', replicates = 1):
self.args = args
self.is_cropped = is_cropped
self.crop_size = args.crop_size
self.render_size = args.inference_size
self.replicates = replicates
images = sorted( glob( join(root, '*.ppm') ) )
self.flow_list = sorted( glob( join(root, '*.flo') ) )
assert (len(images)//2 == len(self.flow_list))
self.image_list = []
for i in range(len(self.flow_list)):
im1 = images[2*i]
im2 = images[2*i + 1]
self.image_list += [ [ im1, im2 ] ]
assert len(self.image_list) == len(self.flow_list)
self.size = len(self.image_list)
self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape
if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64):
self.render_size[0] = ( (self.frame_size[0])//64 ) * 64
self.render_size[1] = ( (self.frame_size[1])//64 ) * 64
args.inference_size = self.render_size
def __getitem__(self, index):
index = index % self.size
img1 = frame_utils.read_gen(self.image_list[index][0])
img2 = frame_utils.read_gen(self.image_list[index][1])
flow = frame_utils.read_gen(self.flow_list[index])
images = [img1, img2]
image_size = img1.shape[:2]
if self.is_cropped:
cropper = StaticRandomCrop(image_size, self.crop_size)
else:
cropper = StaticCenterCrop(image_size, self.render_size)
images = list(map(cropper, images))
flow = cropper(flow)
images = np.array(images).transpose(3,0,1,2)
flow = flow.transpose(2,0,1)
images = torch.from_numpy(images.astype(np.float32))
flow = torch.from_numpy(flow.astype(np.float32))
return [images], [flow]
def __len__(self):
return self.size * self.replicates
class FlyingThings(data.Dataset):
def __init__(self, args, is_cropped, root = '/path/to/flyingthings3d', dstype = 'frames_cleanpass', replicates = 1):
self.args = args
self.is_cropped = is_cropped
self.crop_size = args.crop_size
self.render_size = args.inference_size
self.replicates = replicates
image_dirs = sorted(glob(join(root, dstype, 'TRAIN/*/*')))
image_dirs = sorted([join(f, 'left') for f in image_dirs] + [join(f, 'right') for f in image_dirs])
flow_dirs = sorted(glob(join(root, 'optical_flow_flo_format/TRAIN/*/*')))
flow_dirs = sorted([join(f, 'into_future/left') for f in flow_dirs] + [join(f, 'into_future/right') for f in flow_dirs])
assert (len(image_dirs) == len(flow_dirs))
self.image_list = []
self.flow_list = []
for idir, fdir in zip(image_dirs, flow_dirs):
images = sorted( glob(join(idir, '*.png')) )
flows = sorted( glob(join(fdir, '*.flo')) )
for i in range(len(flows)):
self.image_list += [ [ images[i], images[i+1] ] ]
self.flow_list += [flows[i]]
assert len(self.image_list) == len(self.flow_list)
self.size = len(self.image_list)
self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape
if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64):
self.render_size[0] = ( (self.frame_size[0])//64 ) * 64
self.render_size[1] = ( (self.frame_size[1])//64 ) * 64
args.inference_size = self.render_size
def __getitem__(self, index):
index = index % self.size
img1 = frame_utils.read_gen(self.image_list[index][0])
img2 = frame_utils.read_gen(self.image_list[index][1])
flow = frame_utils.read_gen(self.flow_list[index])
images = [img1, img2]
image_size = img1.shape[:2]
if self.is_cropped:
cropper = StaticRandomCrop(image_size, self.crop_size)
else:
cropper = StaticCenterCrop(image_size, self.render_size)
images = list(map(cropper, images))
flow = cropper(flow)
images = np.array(images).transpose(3,0,1,2)
flow = flow.transpose(2,0,1)
images = torch.from_numpy(images.astype(np.float32))
flow = torch.from_numpy(flow.astype(np.float32))
return [images], [flow]
def __len__(self):
return self.size * self.replicates
class FlyingThingsClean(FlyingThings):
def __init__(self, args, is_cropped = False, root = '', replicates = 1):
super(FlyingThingsClean, self).__init__(args, is_cropped = is_cropped, root = root, dstype = 'frames_cleanpass', replicates = replicates)
class FlyingThingsFinal(FlyingThings):
def __init__(self, args, is_cropped = False, root = '', replicates = 1):
super(FlyingThingsFinal, self).__init__(args, is_cropped = is_cropped, root = root, dstype = 'frames_finalpass', replicates = replicates)
class ChairsSDHom(data.Dataset):
def __init__(self, args, is_cropped, root = '/path/to/chairssdhom/data', dstype = 'train', replicates = 1):
self.args = args
self.is_cropped = is_cropped
self.crop_size = args.crop_size
self.render_size = args.inference_size
self.replicates = replicates
image1 = sorted( glob( join(root, dstype, 't0/*.png') ) )
image2 = sorted( glob( join(root, dstype, 't1/*.png') ) )
self.flow_list = sorted( glob( join(root, dstype, 'flow/*.flo') ) )
assert (len(image1) == len(self.flow_list))
self.image_list = []
for i in range(len(self.flow_list)):
im1 = image1[i]
im2 = image2[i]
self.image_list += [ [ im1, im2 ] ]
assert len(self.image_list) == len(self.flow_list)
self.size = len(self.image_list)
self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape
if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64):
self.render_size[0] = ( (self.frame_size[0])//64 ) * 64
self.render_size[1] = ( (self.frame_size[1])//64 ) * 64
args.inference_size = self.render_size
def __getitem__(self, index):
index = index % self.size
img1 = frame_utils.read_gen(self.image_list[index][0])
img2 = frame_utils.read_gen(self.image_list[index][1])
flow = frame_utils.read_gen(self.flow_list[index])
flow = flow[::-1,:,:]
images = [img1, img2]
image_size = img1.shape[:2]
if self.is_cropped:
cropper = StaticRandomCrop(image_size, self.crop_size)
else:
cropper = StaticCenterCrop(image_size, self.render_size)
images = list(map(cropper, images))
flow = cropper(flow)
images = np.array(images).transpose(3,0,1,2)
flow = flow.transpose(2,0,1)
images = torch.from_numpy(images.astype(np.float32))
flow = torch.from_numpy(flow.astype(np.float32))
return [images], [flow]
def __len__(self):
return self.size * self.replicates
class ChairsSDHomTrain(ChairsSDHom):
def __init__(self, args, is_cropped = False, root = '', replicates = 1):
super(ChairsSDHomTrain, self).__init__(args, is_cropped = is_cropped, root = root, dstype = 'train', replicates = replicates)
class ChairsSDHomTest(ChairsSDHom):
def __init__(self, args, is_cropped = False, root = '', replicates = 1):
super(ChairsSDHomTest, self).__init__(args, is_cropped = is_cropped, root = root, dstype = 'test', replicates = replicates)
class ImagesFromFolder(data.Dataset):
def __init__(self, args, is_cropped, root = '/path/to/frames/only/folder', iext = 'png', replicates = 1):
self.args = args
self.is_cropped = is_cropped
self.crop_size = args.crop_size
self.render_size = args.inference_size
self.replicates = replicates
images = sorted( glob( join(root, '*.' + iext) ) )
self.image_list = []
for i in range(len(images)-1):
im1 = images[i]
im2 = images[i+1]
self.image_list += [ [ im1, im2 ] ]
self.size = len(self.image_list)
self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape
if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64):
self.render_size[0] = ( (self.frame_size[0])//64 ) * 64
self.render_size[1] = ( (self.frame_size[1])//64 ) * 64
args.inference_size = self.render_size
def __getitem__(self, index):
index = index % self.size
img1 = frame_utils.read_gen(self.image_list[index][0])
img2 = frame_utils.read_gen(self.image_list[index][1])
images = [img1, img2]
image_size = img1.shape[:2]
if self.is_cropped:
cropper = StaticRandomCrop(image_size, self.crop_size)
else:
cropper = StaticCenterCrop(image_size, self.render_size)
images = list(map(cropper, images))
images = np.array(images).transpose(3,0,1,2)
images = torch.from_numpy(images.astype(np.float32))
return [images], [torch.zeros(images.size()[0:1] + (2,) + images.size()[-2:])]
def __len__(self):
return self.size * self.replicates
class Google(data.Dataset):
def __init__(self, args, is_cropped = False, root = '', dstype = 'frames', replicates = 1):
self.args = args
self.is_cropped = is_cropped
self.crop_size = args.crop_size
self.render_size = args.inference_size
self.replicates = replicates
image_root = join(root, dstype)
file_list = sorted(glob(join(image_root, '*.png')))
self.image_list = []
for i in range(len(file_list)-1):
img1 = join(file_list[i])
img2 = join(file_list[i+1])
if not isfile(img1) or not isfile(img2):
continue
self.image_list += [[img1, img2]]
self.size = len(self.image_list)
self.frame_size = frame_utils.read_gen(self.image_list[0][0]).shape
if (self.render_size[0] < 0) or (self.render_size[1] < 0) or (self.frame_size[0]%64) or (self.frame_size[1]%64):
self.render_size[0] = ( math.ceil(self.frame_size[0]/64) ) * 64
self.render_size[1] = ( math.ceil(self.frame_size[1]/64) ) * 64
args.inference_size = self.render_size
def __getitem__(self, index):
index = index % self.size
img1 = frame_utils.read_gen(self.image_list[index][0])
img2 = frame_utils.read_gen(self.image_list[index][1])
images = [img1, img2]
image_size = img1.shape[:2]
if self.is_cropped:
cropper = StaticRandomCrop(image_size, self.crop_size)
else:
cropper = Padding(image_size, self.render_size)
images = list(map(cropper, images))
images = np.array(images).transpose(3,0,1,2)
images = torch.from_numpy(images.astype(np.float32))
return [images]
def __len__(self):
return self.size * self.replicates
'''
import argparse
import sys, os
import importlib
from scipy.misc import imsave
import numpy as np
import datasets
reload(datasets)
parser = argparse.ArgumentParser()
args = parser.parse_args()
args.inference_size = [1080, 1920]
args.crop_size = [384, 512]
args.effective_batch_size = 1
index = 500
v_dataset = datasets.MpiSintelClean(args, True, root='../MPI-Sintel/flow/training')
a, b = v_dataset[index]
im1 = a[0].numpy()[:,0,:,:].transpose(1,2,0)
im2 = a[0].numpy()[:,1,:,:].transpose(1,2,0)
imsave('./img1.png', im1)
imsave('./img2.png', im2)
flow_utils.writeFlow('./flow.flo', b[0].numpy().transpose(1,2,0))
'''
================================================
FILE: dvs/flownet2/install.sh
================================================
#!/bin/bash
cd ./networks/correlation_package
rm -rf *_cuda.egg-info build dist __pycache__
python3 setup.py install --user
cd ../resample2d_package
rm -rf *_cuda.egg-info build dist __pycache__
python3 setup.py install --user
cd ../channelnorm_package
rm -rf *_cuda.egg-info build dist __pycache__
python3 setup.py install --user
cd ..
================================================
FILE: dvs/flownet2/losses.py
================================================
'''
Portions of this code copyright 2017, Clement Pinard
'''
# freda (todo) : adversarial loss
import torch
import torch.nn as nn
import math
def EPE(input_flow, target_flow):
return torch.norm(target_flow-input_flow,p=2,dim=1).mean()
class L1(nn.Module):
def __init__(self):
super(L1, self).__init__()
def forward(self, output, target):
lossvalue = torch.abs(output - target).mean()
return lossvalue
class L2(nn.Module):
def __init__(self):
super(L2, self).__init__()
def forward(self, output, target):
lossvalue = torch.norm(output-target,p=2,dim=1).mean()
return lossvalue
class L1Loss(nn.Module):
def __init__(self, args):
super(L1Loss, self).__init__()
self.args = args
self.loss = L1()
self.loss_labels = ['L1', 'EPE']
def forward(self, output, target):
lossvalue = self.loss(output, target)
epevalue = EPE(output, target)
return [lossvalue, epevalue]
class L2Loss(nn.Module):
def __init__(self, args):
super(L2Loss, self).__init__()
self.args = args
self.loss = L2()
self.loss_labels = ['L2', 'EPE']
def forward(self, output, target):
lossvalue = self.loss(output, target)
epevalue = EPE(output, target)
return [lossvalue, epevalue]
class MultiScale(nn.Module):
def __init__(self, args, startScale = 4, numScales = 5, l_weight= 0.32, norm= 'L1'):
super(MultiScale,self).__init__()
self.startScale = startScale
self.numScales = numScales
self.loss_weights = torch.FloatTensor([(l_weight / 2 ** scale) for scale in range(self.numScales)])
self.args = args
self.l_type = norm
self.div_flow = 0.05
assert(len(self.loss_weights) == self.numScales)
if self.l_type == 'L1':
self.loss = L1()
else:
self.loss = L2()
self.multiScales = [nn.AvgPool2d(self.startScale * (2**scale), self.startScale * (2**scale)) for scale in range(self.numScales)]
self.loss_labels = ['MultiScale-'+self.l_type, 'EPE'],
def forward(self, output, target):
lossvalue = 0
epevalue = 0
if type(output) is tuple:
target = self.div_flow * target
for i, output_ in enumerate(output):
target_ = self.multiScales[i](target)
epevalue += self.loss_weights[i]*EPE(output_, target_)
lossvalue += self.loss_weights[i]*self.loss(output_, target_)
return [lossvalue, epevalue]
else:
epevalue += EPE(output, target)
lossvalue += self.loss(output, target)
return [lossvalue, epevalue]
================================================
FILE: dvs/flownet2/main.py
================================================
#!/usr/bin/env python
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.autograd import Variable
from tensorboardX import SummaryWriter
import argparse, os, sys, subprocess
import colorama
import numpy as np
from tqdm import tqdm
from glob import glob
from os.path import *
import models, datasets
from utils import flow_utils, tools
import time
# Reusable function for inference
def inference(args, epoch, data_path, data_loader, model, offset=0):
model.eval()
if args.save_flow or args.render_validation:
flow_folder = "{}/flo".format(data_path)
flow_back_folder = "{}/flo_back".format(data_path)
if not os.path.exists(flow_folder):
os.makedirs(flow_folder)
if not os.path.exists(flow_back_folder):
os.makedirs(flow_back_folder)
# visualization folder
if args.inference_visualize:
flow_vis_folder = "{}/flo_vis".format(data_path)
if not os.path.exists(flow_vis_folder):
os.makedirs(flow_vis_folder)
flow_back_vis_folder = "{}/flo_back_vis".format(data_path)
if not os.path.exists(flow_back_vis_folder):
os.makedirs(flow_back_vis_folder)
args.inference_n_batches = np.inf if args.inference_n_batches < 0 else args.inference_n_batches
progress = tqdm(data_loader, ncols=100, total=np.minimum(len(data_loader), args.inference_n_batches), desc='Inferencing ',
leave=True, position=offset)
for batch_idx, (data) in enumerate(progress):
data = data[0]
data_back = torch.cat((data[:,:,1:,:,:], data[:,:,:1,:,:]), dim = 2)
if args.cuda:
data_forward = data.cuda(non_blocking=True)
data_back = data_back.cuda(non_blocking=True)
data_forward = Variable(data_forward)
data_back = Variable(data_back)
flo_path = join(flow_folder, '%06d.flo'%(batch_idx))
flo_back_path = join(flow_back_folder, '%06d.flo'%(batch_idx))
frame_size = data_loader.dataset.frame_size
if not os.path.exists(flo_path):
with torch.no_grad():
output = model(data_forward)[:,:,:frame_size[0], :frame_size[1]]
if args.save_flow or args.render_validation:
_pflow = output[0].data.cpu().numpy().transpose(1, 2, 0)
flow_utils.writeFlow( flo_path, _pflow)
if args.inference_visualize:
flow_utils.visulize_flow_file(
join(flow_folder, '%06d.flo' % (batch_idx)),flow_vis_folder)
if not os.path.exists(flo_back_path):
with torch.no_grad():
output = model(data_back)[:,:,:frame_size[0], :frame_size[1]]
if args.save_flow or args.render_validation:
_pflow = output[0].data.cpu().numpy().transpose(1, 2, 0)
flow_utils.writeFlow( flo_back_path, _pflow)
if args.inference_visualize:
flow_utils.visulize_flow_file(
join(flow_back_folder, '%06d.flo' % (batch_idx)), flow_back_vis_folder)
progress.update(1)
if batch_idx == (args.inference_n_batches - 1):
break
progress.close()
return
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).')
parser.add_argument('--fp16_scale', type=float, default=1024., help='Loss scaling, positive power of 2 values can improve fp16 convergence.')
parser.add_argument('--start_epoch', type=int, default=1)
parser.add_argument('--batch_size', '-b', type=int, default=8, help="Batch size")
parser.add_argument('--crop_size', type=int, nargs='+', default = [256, 256], help="Spatial dimension to crop training samples for training")
parser.add_argument("--rgb_max", type=float, default = 255.)
parser.add_argument('--number_workers', '-nw', '--num_workers', type=int, default=8)
parser.add_argument('--number_gpus', '-ng', type=int, default=-1, help='number of GPUs to use')
parser.add_argument('--no_cuda', action='store_true')
parser.add_argument('--save', '-s', default='./Google', type=str, help='directory for saving')
parser.add_argument('--inference', action='store_true')
parser.add_argument('--inference_visualize', action='store_true',
help="visualize the optical flow during inference")
parser.add_argument('--inference_size', type=int, nargs='+', default = [-1,-1], help='spatial size divisible by 64. default (-1,-1) - largest possible valid size would be used')
parser.add_argument('--inference_batch_size', type=int, default=1)
parser.add_argument('--inference_n_batches', type=int, default=-1)
parser.add_argument('--save_flow', action='store_true', help='save predicted flows to file')
parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
parser.add_argument('--log_frequency', '--summ_iter', type=int, default=1, help="Log every n batches")
tools.add_arguments_for_module(parser, models, argument_for_class='model', default='FlowNet2')
tools.add_arguments_for_module(parser, datasets, argument_for_class='inference_dataset', default='Google',
skip_params=['is_cropped'],
parameter_defaults={'root': './Google/train',
'replicates': 1})
main_dir = os.path.dirname(os.path.realpath(__file__))
os.chdir(main_dir)
# Parse the official arguments
with tools.TimerBlock("Parsing Arguments") as block:
args = parser.parse_args()
if args.number_gpus < 0 : args.number_gpus = torch.cuda.device_count()
# Get argument defaults (hastag #thisisahack)
parser.add_argument('--IGNORE', action='store_true')
defaults = vars(parser.parse_args(['--IGNORE']))
# Print all arguments, color the non-defaults
for argument, value in sorted(vars(args).items()):
reset = colorama.Style.RESET_ALL
color = reset if value == defaults[argument] else colorama.Fore.MAGENTA
block.log('{}{}: {}{}'.format(color, argument, value, reset))
args.model_class = tools.module_to_dict(models)[args.model]
args.inference_dataset_class = tools.module_to_dict(datasets)[args.inference_dataset]
args.cuda = not args.no_cuda and torch.cuda.is_available()
# args.current_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]).rstrip()
args.log_file = join(args.save, 'args.txt')
# dict to collect activation gradients (for training debug purpose)
args.grads = {}
args.total_epochs = 1
args.inference_dir = "{}/inference".format(args.save)
print('Source Code')
# print((' Current Git Hash: {}\n'.format(args.current_hash)))
# Dynamically load the dataset class with parameters passed in via "--argument_[param]=[value]" arguments
with tools.TimerBlock("Initializing Datasets") as block:
args.effective_batch_size = args.batch_size * args.number_gpus
args.effective_inference_batch_size = args.inference_batch_size * args.number_gpus
args.effective_number_workers = args.number_workers * args.number_gpus
gpuargs = {'num_workers': args.effective_number_workers,
'pin_memory': True,
'drop_last' : True} if args.cuda else {}
inf_gpuargs = gpuargs.copy()
inf_gpuargs['num_workers'] = args.number_workers
block.log('Inference Dataset: {}'.format(args.inference_dataset))
dataset_root = args.inference_dataset_root
data_name = sorted(os.listdir(dataset_root))
block.log(data_name)
inference_loaders = {}
for i in range(len(data_name)):
dataset_path = os.path.join(dataset_root, data_name[i])
args.inference_dataset_root = dataset_path
inference_dataset = args.inference_dataset_class(args, False, **tools.kwargs_from_args(args, 'inference_dataset'))
inference_loaders[dataset_path] = DataLoader(inference_dataset, batch_size=args.effective_inference_batch_size, shuffle=False, **inf_gpuargs)
block.log('Inference Input: {}'.format(' '.join([str([d for d in x.size()]) for x in inference_dataset[0][0]])))
# Dynamically load model and loss class with parameters passed in via "--model_[param]=[value]" or "--loss_[param]=[value]" arguments
with tools.TimerBlock("Building {} model".format(args.model)) as block:
class Model(nn.Module):
def __init__(self, args):
super(Model, self).__init__()
kwargs = tools.kwargs_from_args(args, 'model')
self.model = args.model_class(args, **kwargs)
def forward(self, data):
output = self.model(data)
return output
model = Model(args)
block.log('Effective Batch Size: {}'.format(args.effective_batch_size))
block.log('Number of parameters: {}'.format(sum([p.data.nelement() if p.requires_grad else 0 for p in model.parameters()])))
if args.cuda and args.number_gpus > 0:
block.log('Initializing CUDA')
model = model.cuda()
block.log('Parallelizing')
model = nn.parallel.DataParallel(model, device_ids=list(range(args.number_gpus)))
# Load weights if needed, otherwise randomly initialize
if args.resume and os.path.isfile(args.resume):
block.log("Loading checkpoint '{}'".format(args.resume))
checkpoint = torch.load(args.resume)
model.module.model.load_state_dict(checkpoint['state_dict'])
block.log("Loaded checkpoint '{}' (at epoch {})".format(args.resume, checkpoint['epoch']))
elif args.resume and args.inference:
block.log("No checkpoint found at '{}'".format(args.resume))
quit()
else:
block.log("Random initialization")
block.log("Initializing save directory: {}".format(args.save))
if not os.path.exists(args.save):
os.makedirs(args.save)
# Log all arguments to file
for argument, value in sorted(vars(args).items()):
block.log2file(args.log_file, '{}: {}'.format(argument, value))
for data_path in inference_loaders:
# Primary epoch loop
progress = tqdm(list(range(args.start_epoch, args.total_epochs + 1)), miniters=1, ncols=100, desc='Overall Progress', leave=True, position=0)
offset = 1
for epoch in progress:
stats = inference(args=args, epoch=epoch - 1, data_path = data_path, data_loader=inference_loaders[data_path], model=model, offset=offset)
offset += 1
print("\n")
================================================
FILE: dvs/flownet2/models.py
================================================
import torch
import torch.nn as nn
from torch.nn import init
import math
import numpy as np
try:
from networks.resample2d_package.resample2d import Resample2d
from networks.channelnorm_package.channelnorm import ChannelNorm
from networks import FlowNetC
from networks import FlowNetS
from networks import FlowNetSD
from networks import FlowNetFusion
from networks.submodules import *
except:
from .networks.resample2d_package.resample2d import Resample2d
from .networks.channelnorm_package.channelnorm import ChannelNorm
from .networks import FlowNetC
from .networks import FlowNetS
from .networks import FlowNetSD
from .networks import FlowNetFusion
from .networks.submodules import *
'Parameter count = 162,518,834'
class FlowNet2(nn.Module):
def __init__(self, args, batchNorm=False, div_flow = 20.):
super(FlowNet2,self).__init__()
self.batchNorm = batchNorm
self.div_flow = div_flow
self.rgb_max = args.rgb_max
self.args = args
self.channelnorm = ChannelNorm()
# First Block (FlowNetC)
self.flownetc = FlowNetC.FlowNetC(args, batchNorm=self.batchNorm)
self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
if args.fp16:
self.resample1 = nn.Sequential(
tofp32(),
Resample2d(),
tofp16())
else:
self.resample1 = Resample2d()
# Block (FlowNetS1)
self.flownets_1 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm)
self.upsample2 = nn.Upsample(scale_factor=4, mode='bilinear')
if args.fp16:
self.resample2 = nn.Sequential(
tofp32(),
Resample2d(),
tofp16())
else:
self.resample2 = Resample2d()
# Block (FlowNetS2)
self.flownets_2 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm)
# Block (FlowNetSD)
self.flownets_d = FlowNetSD.FlowNetSD(args, batchNorm=self.batchNorm)
self.upsample3 = nn.Upsample(scale_factor=4, mode='nearest')
self.upsample4 = nn.Upsample(scale_factor=4, mode='nearest')
if args.fp16:
self.resample3 = nn.Sequential(
tofp32(),
Resample2d(),
tofp16())
else:
self.resample3 = Resample2d()
if args.fp16:
self.resample4 = nn.Sequential(
tofp32(),
Resample2d(),
tofp16())
else:
self.resample4 = Resample2d()
# Block (FLowNetFusion)
self.flownetfusion = FlowNetFusion.FlowNetFusion(args, batchNorm=self.batchNorm)
for m in self.modules():
if isinstance(m, nn.Conv2d):
if m.bias is not None:
init.uniform_(m.bias)
init.xavier_uniform_(m.weight)
if isinstance(m, nn.ConvTranspose2d):
if m.bias is not None:
init.uniform_(m.bias)
init.xavier_uniform_(m.weight)
# init_deconv_bilinear(m.weight)
def init_deconv_bilinear(self, weight):
f_shape = weight.size()
heigh, width = f_shape[-2], f_shape[-1]
f = np.ceil(width/2.0)
c = (2 * f - 1 - f % 2) / (2.0 * f)
bilinear = np.zeros([heigh, width])
for x in range(width):
for y in range(heigh):
value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
bilinear[x, y] = value
min_dim = min(f_shape[0], f_shape[1])
weight.data.fill_(0.)
for i in range(min_dim):
weight.data[i,i,:,:] = torch.from_numpy(bilinear)
return
def forward(self, inputs):
rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,))
x = (inputs - rgb_mean) / self.rgb_max
x1 = x[:,:,0,:,:]
x2 = x[:,:,1,:,:]
x = torch.cat((x1,x2), dim = 1)
# flownetc
flownetc_flow2 = self.flownetc(x)[0]
flownetc_flow = self.upsample1(flownetc_flow2*self.div_flow)
# warp img1 to img0; magnitude of diff between img0 and and warped_img1,
resampled_img1 = self.resample1(x[:,3:,:,:], flownetc_flow)
diff_img0 = x[:,:3,:,:] - resampled_img1
norm_diff_img0 = self.channelnorm(diff_img0)
# concat img0, img1, img1->img0, flow, diff-mag ;
concat1 = torch.cat((x, resampled_img1, flownetc_flow/self.div_flow, norm_diff_img0), dim=1)
# flownets1
flownets1_flow2 = self.flownets_1(concat1)[0]
flownets1_flow = self.upsample2(flownets1_flow2*self.div_flow)
# warp img1 to img0 using flownets1; magnitude of diff between img0 and and warped_img1
resampled_img1 = self.resample2(x[:,3:,:,:], flownets1_flow)
diff_img0 = x[:,:3,:,:] - resampled_img1
norm_diff_img0 = self.channelnorm(diff_img0)
# concat img0, img1, img1->img0, flow, diff-mag
concat2 = torch.cat((x, resampled_img1, flownets1_flow/self.div_flow, norm_diff_img0), dim=1)
# flownets2
flownets2_flow2 = self.flownets_2(concat2)[0]
flownets2_flow = self.upsample4(flownets2_flow2 * self.div_flow)
norm_flownets2_flow = self.channelnorm(flownets2_flow)
diff_flownets2_flow = self.resample4(x[:,3:,:,:], flownets2_flow)
# if not diff_flownets2_flow.volatile:
# diff_flownets2_flow.register_hook(save_grad(self.args.grads, 'diff_flownets2_flow'))
diff_flownets2_img1 = self.channelnorm((x[:,:3,:,:]-diff_flownets2_flow))
# if not diff_flownets2_img1.volatile:
# diff_flownets2_img1.register_hook(save_grad(self.args.grads, 'diff_flownets2_img1'))
# flownetsd
flownetsd_flow2 = self.flownets_d(x)[0]
flownetsd_flow = self.upsample3(flownetsd_flow2 / self.div_flow)
norm_flownetsd_flow = self.channelnorm(flownetsd_flow)
diff_flownetsd_flow = self.resample3(x[:,3:,:,:], flownetsd_flow)
# if not diff_flownetsd_flow.volatile:
# diff_flownetsd_flow.register_hook(save_grad(self.args.grads, 'diff_flownetsd_flow'))
diff_flownetsd_img1 = self.channelnorm((x[:,:3,:,:]-diff_flownetsd_flow))
# if not diff_flownetsd_img1.volatile:
# diff_flownetsd_img1.register_hook(save_grad(self.args.grads, 'diff_flownetsd_img1'))
# concat img1 flownetsd, flownets2, norm_flownetsd, norm_flownets2, diff_flownetsd_img1, diff_flownets2_img1
concat3 = torch.cat((x[:,:3,:,:], flownetsd_flow, flownets2_flow, norm_flownetsd_flow, norm_flownets2_flow, diff_flownetsd_img1, diff_flownets2_img1), dim=1)
flownetfusion_flow = self.flownetfusion(concat3)
# if not flownetfusion_flow.volatile:
# flownetfusion_flow.register_hook(save_grad(self.args.grads, 'flownetfusion_flow'))
return flownetfusion_flow
class FlowNet2C(FlowNetC.FlowNetC):
def __init__(self, args, batchNorm=False, div_flow=20):
super(FlowNet2C,self).__init__(args, batchNorm=batchNorm, div_flow=20)
self.rgb_max = args.rgb_max
def forward(self, inputs):
rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,))
x = (inputs - rgb_mean) / self.rgb_max
x1 = x[:,:,0,:,:]
x2 = x[:,:,1,:,:]
# FlownetC top input stream
out_conv1a = self.conv1(x1)
out_conv2a = self.conv2(out_conv1a)
out_conv3a = self.conv3(out_conv2a)
# FlownetC bottom input stream
out_conv1b = self.conv1(x2)
out_conv2b = self.conv2(out_conv1b)
out_conv3b = self.conv3(out_conv2b)
# Merge streams
out_corr = self.corr(out_conv3a, out_conv3b) # False
out_corr = self.corr_activation(out_corr)
# Redirect top input stream and concatenate
out_conv_redir = self.conv_redir(out_conv3a)
in_conv3_1 = torch.cat((out_conv_redir, out_corr), 1)
# Merged conv layers
out_conv3_1 = self.conv3_1(in_conv3_1)
out_conv4 = self.conv4_1(self.conv4(out_conv3_1))
out_conv5 = self.conv5_1(self.conv5(out_conv4))
out_conv6 = self.conv6_1(self.conv6(out_conv5))
flow6 = self.predict_flow6(out_conv6)
flow6_up = self.upsampled_flow6_to_5(flow6)
out_deconv5 = self.deconv5(out_conv6)
concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
flow5 = self.predict_flow5(concat5)
flow5_up = self.upsampled_flow5_to_4(flow5)
out_deconv4 = self.deconv4(concat5)
concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
flow4 = self.predict_flow4(concat4)
flow4_up = self.upsampled_flow4_to_3(flow4)
out_deconv3 = self.deconv3(concat4)
concat3 = torch.cat((out_conv3_1,out_deconv3,flow4_up),1)
flow3 = self.predict_flow3(concat3)
flow3_up = self.upsampled_flow3_to_2(flow3)
out_deconv2 = self.deconv2(concat3)
concat2 = torch.cat((out_conv2a,out_deconv2,flow3_up),1)
flow2 = self.predict_flow2(concat2)
if self.training:
return flow2,flow3,flow4,flow5,flow6
else:
return self.upsample1(flow2*self.div_flow)
class FlowNet2S(FlowNetS.FlowNetS):
def __init__(self, args, batchNorm=False, div_flow=20):
super(FlowNet2S,self).__init__(args, input_channels = 6, batchNorm=batchNorm)
self.rgb_max = args.rgb_max
self.div_flow = div_flow
def forward(self, inputs):
rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,))
x = (inputs - rgb_mean) / self.rgb_max
x = torch.cat( (x[:,:,0,:,:], x[:,:,1,:,:]), dim = 1)
out_conv1 = self.conv1(x)
out_conv2 = self.conv2(out_conv1)
out_conv3 = self.conv3_1(self.conv3(out_conv2))
out_conv4 = self.conv4_1(self.conv4(out_conv3))
out_conv5 = self.conv5_1(self.conv5(out_conv4))
out_conv6 = self.conv6_1(self.conv6(out_conv5))
flow6 = self.predict_flow6(out_conv6)
flow6_up = self.upsampled_flow6_to_5(flow6)
out_deconv5 = self.deconv5(out_conv6)
concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
flow5 = self.predict_flow5(concat5)
flow5_up = self.upsampled_flow5_to_4(flow5)
out_deconv4 = self.deconv4(concat5)
concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
flow4 = self.predict_flow4(concat4)
flow4_up = self.upsampled_flow4_to_3(flow4)
out_deconv3 = self.deconv3(concat4)
concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
flow3 = self.predict_flow3(concat3)
flow3_up = self.upsampled_flow3_to_2(flow3)
out_deconv2 = self.deconv2(concat3)
concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
flow2 = self.predict_flow2(concat2)
if self.training:
return flow2,flow3,flow4,flow5,flow6
else:
return self.upsample1(flow2*self.div_flow)
class FlowNet2SD(FlowNetSD.FlowNetSD):
def __init__(self, args, batchNorm=False, div_flow=20):
super(FlowNet2SD,self).__init__(args, batchNorm=batchNorm)
self.rgb_max = args.rgb_max
self.div_flow = div_flow
def forward(self, inputs):
rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,))
x = (inputs - rgb_mean) / self.rgb_max
x = torch.cat( (x[:,:,0,:,:], x[:,:,1,:,:]), dim = 1)
out_conv0 = self.conv0(x)
out_conv1 = self.conv1_1(self.conv1(out_conv0))
out_conv2 = self.conv2_1(self.conv2(out_conv1))
out_conv3 = self.conv3_1(self.conv3(out_conv2))
out_conv4 = self.conv4_1(self.conv4(out_conv3))
out_conv5 = self.conv5_1(self.conv5(out_conv4))
out_conv6 = self.conv6_1(self.conv6(out_conv5))
flow6 = self.predict_flow6(out_conv6)
flow6_up = self.upsampled_flow6_to_5(flow6)
out_deconv5 = self.deconv5(out_conv6)
concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
out_interconv5 = self.inter_conv5(concat5)
flow5 = self.predict_flow5(out_interconv5)
flow5_up = self.upsampled_flow5_to_4(flow5)
out_deconv4 = self.deconv4(concat5)
concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
out_interconv4 = self.inter_conv4(concat4)
flow4 = self.predict_flow4(out_interconv4)
flow4_up = self.upsampled_flow4_to_3(flow4)
out_deconv3 = self.deconv3(concat4)
concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
out_interconv3 = self.inter_conv3(concat3)
flow3 = self.predict_flow3(out_interconv3)
flow3_up = self.upsampled_flow3_to_2(flow3)
out_deconv2 = self.deconv2(concat3)
concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
out_interconv2 = self.inter_conv2(concat2)
flow2 = self.predict_flow2(out_interconv2)
if self.training:
return flow2,flow3,flow4,flow5,flow6
else:
return self.upsample1(flow2*self.div_flow)
class FlowNet2CS(nn.Module):
def __init__(self, args, batchNorm=False, div_flow = 20.):
super(FlowNet2CS,self).__init__()
self.batchNorm = batchNorm
self.div_flow = div_flow
self.rgb_max = args.rgb_max
self.args = args
self.channelnorm = ChannelNorm()
# First Block (FlowNetC)
self.flownetc = FlowNetC.FlowNetC(args, batchNorm=self.batchNorm)
self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
if args.fp16:
self.resample1 = nn.Sequential(
tofp32(),
Resample2d(),
tofp16())
else:
self.resample1 = Resample2d()
# Block (FlowNetS1)
self.flownets_1 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm)
self.upsample2 = nn.Upsample(scale_factor=4, mode='bilinear')
for m in self.modules():
if isinstance(m, nn.Conv2d):
if m.bias is not None:
init.uniform(m.bias)
init.xavier_uniform(m.weight)
if isinstance(m, nn.ConvTranspose2d):
if m.bias is not None:
init.uniform(m.bias)
init.xavier_uniform(m.weight)
# init_deconv_bilinear(m.weight)
def forward(self, inputs):
rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,))
x = (inputs - rgb_mean) / self.rgb_max
x1 = x[:,:,0,:,:]
x2 = x[:,:,1,:,:]
x = torch.cat((x1,x2), dim = 1)
# flownetc
flownetc_flow2 = self.flownetc(x)[0]
flownetc_flow = self.upsample1(flownetc_flow2*self.div_flow)
# warp img1 to img0; magnitude of diff between img0 and and warped_img1,
resampled_img1 = self.resample1(x[:,3:,:,:], flownetc_flow)
diff_img0 = x[:,:3,:,:] - resampled_img1
norm_diff_img0 = self.channelnorm(diff_img0)
# concat img0, img1, img1->img0, flow, diff-mag ;
concat1 = torch.cat((x, resampled_img1, flownetc_flow/self.div_flow, norm_diff_img0), dim=1)
# flownets1
flownets1_flow2 = self.flownets_1(concat1)[0]
flownets1_flow = self.upsample2(flownets1_flow2*self.div_flow)
return flownets1_flow
class FlowNet2CSS(nn.Module):
def __init__(self, args, batchNorm=False, div_flow = 20.):
super(FlowNet2CSS,self).__init__()
self.batchNorm = batchNorm
self.div_flow = div_flow
self.rgb_max = args.rgb_max
self.args = args
self.channelnorm = ChannelNorm()
# First Block (FlowNetC)
self.flownetc = FlowNetC.FlowNetC(args, batchNorm=self.batchNorm)
self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
if args.fp16:
self.resample1 = nn.Sequential(
tofp32(),
Resample2d(),
tofp16())
else:
self.resample1 = Resample2d()
# Block (FlowNetS1)
self.flownets_1 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm)
self.upsample2 = nn.Upsample(scale_factor=4, mode='bilinear')
if args.fp16:
self.resample2 = nn.Sequential(
tofp32(),
Resample2d(),
tofp16())
else:
self.resample2 = Resample2d()
# Block (FlowNetS2)
self.flownets_2 = FlowNetS.FlowNetS(args, batchNorm=self.batchNorm)
self.upsample3 = nn.Upsample(scale_factor=4, mode='nearest')
for m in self.modules():
if isinstance(m, nn.Conv2d):
if m.bias is not None:
init.uniform(m.bias)
init.xavier_uniform(m.weight)
if isinstance(m, nn.ConvTranspose2d):
if m.bias is not None:
init.uniform(m.bias)
init.xavier_uniform(m.weight)
# init_deconv_bilinear(m.weight)
def forward(self, inputs):
rgb_mean = inputs.contiguous().view(inputs.size()[:2]+(-1,)).mean(dim=-1).view(inputs.size()[:2] + (1,1,1,))
x = (inputs - rgb_mean) / self.rgb_max
x1 = x[:,:,0,:,:]
x2 = x[:,:,1,:,:]
x = torch.cat((x1,x2), dim = 1)
# flownetc
flownetc_flow2 = self.flownetc(x)[0]
flownetc_flow = self.upsample1(flownetc_flow2*self.div_flow)
# warp img1 to img0; magnitude of diff between img0 and and warped_img1,
resampled_img1 = self.resample1(x[:,3:,:,:], flownetc_flow)
diff_img0 = x[:,:3,:,:] - resampled_img1
norm_diff_img0 = self.channelnorm(diff_img0)
# concat img0, img1, img1->img0, flow, diff-mag ;
concat1 = torch.cat((x, resampled_img1, flownetc_flow/self.div_flow, norm_diff_img0), dim=1)
# flownets1
flownets1_flow2 = self.flownets_1(concat1)[0]
flownets1_flow = self.upsample2(flownets1_flow2*self.div_flow)
# warp img1 to img0 using flownets1; magnitude of diff between img0 and and warped_img1
resampled_img1 = self.resample2(x[:,3:,:,:], flownets1_flow)
diff_img0 = x[:,:3,:,:] - resampled_img1
norm_diff_img0 = self.channelnorm(diff_img0)
# concat img0, img1, img1->img0, flow, diff-mag
concat2 = torch.cat((x, resampled_img1, flownets1_flow/self.div_flow, norm_diff_img0), dim=1)
# flownets2
flownets2_flow2 = self.flownets_2(concat2)[0]
flownets2_flow = self.upsample3(flownets2_flow2 * self.div_flow)
return flownets2_flow
================================================
FILE: dvs/flownet2/networks/FlowNetC.py
================================================
import torch
import torch.nn as nn
from torch.nn import init
import math
import numpy as np
from .correlation_package.correlation import Correlation
from .submodules import *
'Parameter count , 39,175,298 '
class FlowNetC(nn.Module):
def __init__(self,args, batchNorm=True, div_flow = 20):
super(FlowNetC,self).__init__()
self.batchNorm = batchNorm
self.div_flow = div_flow
self.conv1 = conv(self.batchNorm, 3, 64, kernel_size=7, stride=2)
self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2)
self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2)
self.conv_redir = conv(self.batchNorm, 256, 32, kernel_size=1, stride=1)
if args.fp16:
self.corr = nn.Sequential(
tofp32(),
Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1),
tofp16())
else:
self.corr = Correlation(pad_size=20, kernel_size=1, max_displacement=20, stride1=1, stride2=2, corr_multiply=1)
self.corr_activation = nn.LeakyReLU(0.1,inplace=True)
self.conv3_1 = conv(self.batchNorm, 473, 256)
self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
self.conv4_1 = conv(self.batchNorm, 512, 512)
self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
self.conv5_1 = conv(self.batchNorm, 512, 512)
self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
self.conv6_1 = conv(self.batchNorm,1024, 1024)
self.deconv5 = deconv(1024,512)
self.deconv4 = deconv(1026,256)
self.deconv3 = deconv(770,128)
self.deconv2 = deconv(386,64)
self.predict_flow6 = predict_flow(1024)
self.predict_flow5 = predict_flow(1026)
self.predict_flow4 = predict_flow(770)
self.predict_flow3 = predict_flow(386)
self.predict_flow2 = predict_flow(194)
self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=True)
for m in self.modules():
if isinstance(m, nn.Conv2d):
if m.bias is not None:
init.uniform_(m.bias)
init.xavier_uniform_(m.weight)
if isinstance(m, nn.ConvTranspose2d):
if m.bias is not None:
init.uniform_(m.bias)
init.xavier_uniform_(m.weight)
# init_deconv_bilinear(m.weight)
self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
def forward(self, x):
x1 = x[:,0:3,:,:]
x2 = x[:,3::,:,:]
out_conv1a = self.conv1(x1)
out_conv2a = self.conv2(out_conv1a)
out_conv3a = self.conv3(out_conv2a)
# FlownetC bottom input stream
out_conv1b = self.conv1(x2)
out_conv2b = self.conv2(out_conv1b)
out_conv3b = self.conv3(out_conv2b)
# Merge streams
out_corr = self.corr(out_conv3a, out_conv3b) # False
out_corr = self.corr_activation(out_corr)
# Redirect top input stream and concatenate
out_conv_redir = self.conv_redir(out_conv3a)
in_conv3_1 = torch.cat((out_conv_redir, out_corr), 1)
# Merged conv layers
out_conv3_1 = self.conv3_1(in_conv3_1)
out_conv4 = self.conv4_1(self.conv4(out_conv3_1))
out_conv5 = self.conv5_1(self.conv5(out_conv4))
out_conv6 = self.conv6_1(self.conv6(out_conv5))
flow6 = self.predict_flow6(out_conv6)
flow6_up = self.upsampled_flow6_to_5(flow6)
out_deconv5 = self.deconv5(out_conv6)
concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
flow5 = self.predict_flow5(concat5)
flow5_up = self.upsampled_flow5_to_4(flow5)
out_deconv4 = self.deconv4(concat5)
concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
flow4 = self.predict_flow4(concat4)
flow4_up = self.upsampled_flow4_to_3(flow4)
out_deconv3 = self.deconv3(concat4)
concat3 = torch.cat((out_conv3_1,out_deconv3,flow4_up),1)
flow3 = self.predict_flow3(concat3)
flow3_up = self.upsampled_flow3_to_2(flow3)
out_deconv2 = self.deconv2(concat3)
concat2 = torch.cat((out_conv2a,out_deconv2,flow3_up),1)
flow2 = self.predict_flow2(concat2)
if self.training:
return flow2,flow3,flow4,flow5,flow6
else:
return flow2,
================================================
FILE: dvs/flownet2/networks/FlowNetFusion.py
================================================
import torch
import torch.nn as nn
from torch.nn import init
import math
import numpy as np
from .submodules import *
'Parameter count = 581,226'
class FlowNetFusion(nn.Module):
def __init__(self,args, batchNorm=True):
super(FlowNetFusion,self).__init__()
self.batchNorm = batchNorm
self.conv0 = conv(self.batchNorm, 11, 64)
self.conv1 = conv(self.batchNorm, 64, 64, stride=2)
self.conv1_1 = conv(self.batchNorm, 64, 128)
self.conv2 = conv(self.batchNorm, 128, 128, stride=2)
self.conv2_1 = conv(self.batchNorm, 128, 128)
self.deconv1 = deconv(128,32)
self.deconv0 = deconv(162,16)
self.inter_conv1 = i_conv(self.batchNorm, 162, 32)
self.inter_conv0 = i_conv(self.batchNorm, 82, 16)
self.predict_flow2 = predict_flow(128)
self.predict_flow1 = predict_flow(32)
self.predict_flow0 = predict_flow(16)
self.upsampled_flow2_to_1 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
self.upsampled_flow1_to_0 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
for m in self.modules():
if isinstance(m, nn.Conv2d):
if m.bias is not None:
init.uniform_(m.bias)
init.xavier_uniform_(m.weight)
if isinstance(m, nn.ConvTranspose2d):
if m.bias is not None:
init.uniform_(m.bias)
init.xavier_uniform_(m.weight)
# init_deconv_bilinear(m.weight)
def forward(self, x):
out_conv0 = self.conv0(x)
out_conv1 = self.conv1_1(self.conv1(out_conv0))
out_conv2 = self.conv2_1(self.conv2(out_conv1))
flow2 = self.predict_flow2(out_conv2)
flow2_up = self.upsampled_flow2_to_1(flow2)
out_deconv1 = self.deconv1(out_conv2)
concat1 = torch.cat((out_conv1,out_deconv1,flow2_up),1)
out_interconv1 = self.inter_conv1(concat1)
flow1 = self.predict_flow1(out_interconv1)
flow1_up = self.upsampled_flow1_to_0(flow1)
out_deconv0 = self.deconv0(concat1)
concat0 = torch.cat((out_conv0,out_deconv0,flow1_up),1)
out_interconv0 = self.inter_conv0(concat0)
flow0 = self.predict_flow0(out_interconv0)
return flow0
================================================
FILE: dvs/flownet2/networks/FlowNetS.py
================================================
'''
Portions of this code copyright 2017, Clement Pinard
'''
import torch
import torch.nn as nn
from torch.nn import init
import math
import numpy as np
from .submodules import *
'Parameter count : 38,676,504 '
class FlowNetS(nn.Module):
def __init__(self, args, input_channels = 12, batchNorm=True):
super(FlowNetS,self).__init__()
self.batchNorm = batchNorm
self.conv1 = conv(self.batchNorm, input_channels, 64, kernel_size=7, stride=2)
self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2)
self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2)
self.conv3_1 = conv(self.batchNorm, 256, 256)
self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
self.conv4_1 = conv(self.batchNorm, 512, 512)
self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
self.conv5_1 = conv(self.batchNorm, 512, 512)
self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
self.conv6_1 = conv(self.batchNorm,1024, 1024)
self.deconv5 = deconv(1024,512)
self.deconv4 = deconv(1026,256)
self.deconv3 = deconv(770,128)
self.deconv2 = deconv(386,64)
self.predict_flow6 = predict_flow(1024)
self.predict_flow5 = predict_flow(1026)
self.predict_flow4 = predict_flow(770)
self.predict_flow3 = predict_flow(386)
self.predict_flow2 = predict_flow(194)
self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
for m in self.modules():
if isinstance(m, nn.Conv2d):
if m.bias is not None:
init.uniform_(m.bias)
init.xavier_uniform_(m.weight)
if isinstance(m, nn.ConvTranspose2d):
if m.bias is not None:
init.uniform_(m.bias)
init.xavier_uniform_(m.weight)
# init_deconv_bilinear(m.weight)
self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
def forward(self, x):
out_conv1 = self.conv1(x)
out_conv2 = self.conv2(out_conv1)
out_conv3 = self.conv3_1(self.conv3(out_conv2))
out_conv4 = self.conv4_1(self.conv4(out_conv3))
out_conv5 = self.conv5_1(self.conv5(out_conv4))
out_conv6 = self.conv6_1(self.conv6(out_conv5))
flow6 = self.predict_flow6(out_conv6)
flow6_up = self.upsampled_flow6_to_5(flow6)
out_deconv5 = self.deconv5(out_conv6)
concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
flow5 = self.predict_flow5(concat5)
flow5_up = self.upsampled_flow5_to_4(flow5)
out_deconv4 = self.deconv4(concat5)
concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
flow4 = self.predict_flow4(concat4)
flow4_up = self.upsampled_flow4_to_3(flow4)
out_deconv3 = self.deconv3(concat4)
concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
flow3 = self.predict_flow3(concat3)
flow3_up = self.upsampled_flow3_to_2(flow3)
out_deconv2 = self.deconv2(concat3)
concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
flow2 = self.predict_flow2(concat2)
if self.training:
return flow2,flow3,flow4,flow5,flow6
else:
return flow2,
================================================
FILE: dvs/flownet2/networks/FlowNetSD.py
================================================
import torch
import torch.nn as nn
from torch.nn import init
import math
import numpy as np
from .submodules import *
'Parameter count = 45,371,666'
class FlowNetSD(nn.Module):
def __init__(self, args, batchNorm=True):
super(FlowNetSD,self).__init__()
self.batchNorm = batchNorm
self.conv0 = conv(self.batchNorm, 6, 64)
self.conv1 = conv(self.batchNorm, 64, 64, stride=2)
self.conv1_1 = conv(self.batchNorm, 64, 128)
self.conv2 = conv(self.batchNorm, 128, 128, stride=2)
self.conv2_1 = conv(self.batchNorm, 128, 128)
self.conv3 = conv(self.batchNorm, 128, 256, stride=2)
self.conv3_1 = conv(self.batchNorm, 256, 256)
self.conv4 = conv(self.batchNorm, 256, 512, stride=2)
self.conv4_1 = conv(self.batchNorm, 512, 512)
self.conv5 = conv(self.batchNorm, 512, 512, stride=2)
self.conv5_1 = conv(self.batchNorm, 512, 512)
self.conv6 = conv(self.batchNorm, 512, 1024, stride=2)
self.conv6_1 = conv(self.batchNorm,1024, 1024)
self.deconv5 = deconv(1024,512)
self.deconv4 = deconv(1026,256)
self.deconv3 = deconv(770,128)
self.deconv2 = deconv(386,64)
self.inter_conv5 = i_conv(self.batchNorm, 1026, 512)
self.inter_conv4 = i_conv(self.batchNorm, 770, 256)
self.inter_conv3 = i_conv(self.batchNorm, 386, 128)
self.inter_conv2 = i_conv(self.batchNorm, 194, 64)
self.predict_flow6 = predict_flow(1024)
self.predict_flow5 = predict_flow(512)
self.predict_flow4 = predict_flow(256)
self.predict_flow3 = predict_flow(128)
self.predict_flow2 = predict_flow(64)
self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1)
for m in self.modules():
if isinstance(m, nn.Conv2d):
if m.bias is not None:
init.uniform_(m.bias)
init.xavier_uniform_(m.weight)
if isinstance(m, nn.ConvTranspose2d):
if m.bias is not None:
init.uniform_(m.bias)
init.xavier_uniform_(m.weight)
# init_deconv_bilinear(m.weight)
self.upsample1 = nn.Upsample(scale_factor=4, mode='bilinear')
def forward(self, x):
out_conv0 = self.conv0(x)
out_conv1 = self.conv1_1(self.conv1(out_conv0))
out_conv2 = self.conv2_1(self.conv2(out_conv1))
out_conv3 = self.conv3_1(self.conv3(out_conv2))
out_conv4 = self.conv4_1(self.conv4(out_conv3))
out_conv5 = self.conv5_1(self.conv5(out_conv4))
out_conv6 = self.conv6_1(self.conv6(out_conv5))
flow6 = self.predict_flow6(out_conv6)
flow6_up = self.upsampled_flow6_to_5(flow6)
out_deconv5 = self.deconv5(out_conv6)
concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
out_interconv5 = self.inter_conv5(concat5)
flow5 = self.predict_flow5(out_interconv5)
flow5_up = self.upsampled_flow5_to_4(flow5)
out_deconv4 = self.deconv4(concat5)
concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
out_interconv4 = self.inter_conv4(concat4)
flow4 = self.predict_flow4(out_interconv4)
flow4_up = self.upsampled_flow4_to_3(flow4)
out_deconv3 = self.deconv3(concat4)
concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
out_interconv3 = self.inter_conv3(concat3)
flow3 = self.predict_flow3(out_interconv3)
flow3_up = self.upsampled_flow3_to_2(flow3)
out_deconv2 = self.deconv2(concat3)
concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
out_interconv2 = self.inter_conv2(concat2)
flow2 = self.predict_flow2(out_interconv2)
if self.training:
return flow2,flow3,flow4,flow5,flow6
else:
return flow2,
================================================
FILE: dvs/flownet2/networks/__init__.py
================================================
================================================
FILE: dvs/flownet2/networks/channelnorm_package/__init__.py
================================================
================================================
FILE: dvs/flownet2/networks/channelnorm_package/channelnorm.py
================================================
from torch.autograd import Function, Variable
from torch.nn.modules.module import Module
import channelnorm_cuda
class ChannelNormFunction(Function):
@staticmethod
def forward(ctx, input1, norm_deg=2):
assert input1.is_contiguous()
b, _, h, w = input1.size()
output = input1.new(b, 1, h, w).zero_()
channelnorm_cuda.forward(input1, output, norm_deg)
ctx.save_for_backward(input1, output)
ctx.norm_deg = norm_deg
return output
@staticmethod
def backward(ctx, grad_output):
input1, output = ctx.saved_tensors
grad_input1 = Variable(input1.new(input1.size()).zero_())
channelnorm_cuda.backward(input1, output, grad_output.data,
grad_input1.data, ctx.norm_deg)
return grad_input1, None
class ChannelNorm(Module):
def __init__(self, norm_deg=2):
super(ChannelNorm, self).__init__()
self.norm_deg = norm_deg
def forward(self, input1):
return ChannelNormFunction.apply(input1, self.norm_deg)
================================================
FILE: dvs/flownet2/networks/channelnorm_package/channelnorm_cuda.cc
================================================
#include
#include
#include "channelnorm_kernel.cuh"
int channelnorm_cuda_forward(
at::Tensor& input1,
at::Tensor& output,
int norm_deg) {
channelnorm_kernel_forward(input1, output, norm_deg);
return 1;
}
int channelnorm_cuda_backward(
at::Tensor& input1,
at::Tensor& output,
at::Tensor& gradOutput,
at::Tensor& gradInput1,
int norm_deg) {
channelnorm_kernel_backward(input1, output, gradOutput, gradInput1, norm_deg);
return 1;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &channelnorm_cuda_forward, "Channel norm forward (CUDA)");
m.def("backward", &channelnorm_cuda_backward, "Channel norm backward (CUDA)");
}
================================================
FILE: dvs/flownet2/networks/channelnorm_package/channelnorm_kernel.cu
================================================
#include
#include
#include
#include "channelnorm_kernel.cuh"
#define CUDA_NUM_THREADS 512
#define DIM0(TENSOR) ((TENSOR).x)
#define DIM1(TENSOR) ((TENSOR).y)
#define DIM2(TENSOR) ((TENSOR).z)
#define DIM3(TENSOR) ((TENSOR).w)
#define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))])
using at::Half;
template
__global__ void kernel_channelnorm_update_output(
const int n,
const scalar_t* __restrict__ input1,
const long4 input1_size,
const long4 input1_stride,
scalar_t* __restrict__ output,
const long4 output_size,
const long4 output_stride,
int norm_deg) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index >= n) {
return;
}
int dim_b = DIM0(output_size);
int dim_c = DIM1(output_size);
int dim_h = DIM2(output_size);
int dim_w = DIM3(output_size);
int dim_chw = dim_c * dim_h * dim_w;
int b = ( index / dim_chw ) % dim_b;
int y = ( index / dim_w ) % dim_h;
int x = ( index ) % dim_w;
int i1dim_c = DIM1(input1_size);
int i1dim_h = DIM2(input1_size);
int i1dim_w = DIM3(input1_size);
int i1dim_chw = i1dim_c * i1dim_h * i1dim_w;
int i1dim_hw = i1dim_h * i1dim_w;
float result = 0.0;
for (int c = 0; c < i1dim_c; ++c) {
int i1Index = b * i1dim_chw + c * i1dim_hw + y * i1dim_w + x;
scalar_t val = input1[i1Index];
result += static_cast(val * val);
}
result = sqrt(result);
output[index] = static_cast(result);
}
template
__global__ void kernel_channelnorm_backward_input1(
const int n,
const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
const scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride,
const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride,
scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride,
int norm_deg) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index >= n) {
return;
}
float val = 0.0;
int dim_b = DIM0(gradInput_size);
int dim_c = DIM1(gradInput_size);
int dim_h = DIM2(gradInput_size);
int dim_w = DIM3(gradInput_size);
int dim_chw = dim_c * dim_h * dim_w;
int dim_hw = dim_h * dim_w;
int b = ( index / dim_chw ) % dim_b;
int y = ( index / dim_w ) % dim_h;
int x = ( index ) % dim_w;
int outIndex = b * dim_hw + y * dim_w + x;
val = static_cast(gradOutput[outIndex]) * static_cast(input1[index]) / (static_cast(output[outIndex])+1e-9);
gradInput[index] = static_cast(val);
}
void channelnorm_kernel_forward(
at::Tensor& input1,
at::Tensor& output,
int norm_deg) {
const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
int n = output.numel();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_forward", ([&] {
kernel_channelnorm_update_output<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
//at::globalContext().getCurrentCUDAStream() >>>(
n,
input1.data(),
input1_size,
input1_stride,
output.data(),
output_size,
output_stride,
norm_deg);
}));
// TODO: ATen-equivalent check
// THCudaCheck(cudaGetLastError());
}
void channelnorm_kernel_backward(
at::Tensor& input1,
at::Tensor& output,
at::Tensor& gradOutput,
at::Tensor& gradInput1,
int norm_deg) {
const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3));
const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3));
const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3));
const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3));
int n = gradInput1.numel();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_backward_input1", ([&] {
kernel_channelnorm_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
//at::globalContext().getCurrentCUDAStream() >>>(
n,
input1.data(),
input1_size,
input1_stride,
output.data(),
output_size,
output_stride,
gradOutput.data(),
gradOutput_size,
gradOutput_stride,
gradInput1.data(),
gradInput1_size,
gradInput1_stride,
norm_deg
);
}));
// TODO: Add ATen-equivalent check
// THCudaCheck(cudaGetLastError());
}
================================================
FILE: dvs/flownet2/networks/channelnorm_package/channelnorm_kernel.cuh
================================================
#pragma once
#include
void channelnorm_kernel_forward(
at::Tensor& input1,
at::Tensor& output,
int norm_deg);
void channelnorm_kernel_backward(
at::Tensor& input1,
at::Tensor& output,
at::Tensor& gradOutput,
at::Tensor& gradInput1,
int norm_deg);
================================================
FILE: dvs/flownet2/networks/channelnorm_package/setup.py
================================================
#!/usr/bin/env python3
import os
import torch
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
cxx_args = ['-std=c++11']
nvcc_args = [
'-gencode', 'arch=compute_52,code=sm_52',
'-gencode', 'arch=compute_60,code=sm_60',
'-gencode', 'arch=compute_61,code=sm_61',
'-gencode', 'arch=compute_70,code=sm_70',
'-gencode', 'arch=compute_70,code=compute_70'
]
setup(
name='channelnorm_cuda',
ext_modules=[
CUDAExtension('channelnorm_cuda', [
'channelnorm_cuda.cc',
'channelnorm_kernel.cu'
], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
],
cmdclass={
'build_ext': BuildExtension
})
================================================
FILE: dvs/flownet2/networks/correlation_package/__init__.py
================================================
================================================
FILE: dvs/flownet2/networks/correlation_package/correlation.py
================================================
import torch
from torch.nn.modules.module import Module
from torch.autograd import Function
import correlation_cuda
class CorrelationFunction(Function):
@staticmethod
def forward(ctx, input1, input2, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1):
ctx.save_for_backward(input1, input2)
ctx.pad_size = pad_size
ctx.kernel_size = kernel_size
ctx.max_displacement = max_displacement
ctx.stride1 = stride1
ctx.stride2 = stride2
ctx.corr_multiply = corr_multiply
with torch.cuda.device_of(input1):
rbot1 = input1.new()
rbot2 = input2.new()
output = input1.new()
correlation_cuda.forward(input1, input2, rbot1, rbot2, output,
ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply)
return output
@staticmethod
def backward(ctx, grad_output):
input1, input2 = ctx.saved_tensors
with torch.cuda.device_of(input1):
rbot1 = input1.new()
rbot2 = input2.new()
grad_input1 = input1.new()
grad_input2 = input2.new()
correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
ctx.pad_size, ctx.kernel_size, ctx.max_displacement, ctx.stride1, ctx.stride2, ctx.corr_multiply)
return grad_input1, grad_input2, None, None, None, None, None, None
class Correlation(Module):
def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1):
super(Correlation, self).__init__()
self.pad_size = pad_size
self.kernel_size = kernel_size
self.max_displacement = max_displacement
self.stride1 = stride1
self.stride2 = stride2
self.corr_multiply = corr_multiply
def forward(self, input1, input2):
result = CorrelationFunction.apply(input1, input2, self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply)
return result
================================================
FILE: dvs/flownet2/networks/correlation_package/correlation_cuda.cc
================================================
#include
#include
#include
#include
#include
#include
#include "correlation_cuda_kernel.cuh"
int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply)
{
int batchSize = input1.size(0);
int nInputChannels = input1.size(1);
int inputHeight = input1.size(2);
int inputWidth = input1.size(3);
int kernel_radius = (kernel_size - 1) / 2;
int border_radius = kernel_radius + max_displacement;
int paddedInputHeight = inputHeight + 2 * pad_size;
int paddedInputWidth = inputWidth + 2 * pad_size;
int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1);
int outputHeight = ceil(static_cast(paddedInputHeight - 2 * border_radius) / static_cast(stride1));
int outputwidth = ceil(static_cast(paddedInputWidth - 2 * border_radius) / static_cast(stride1));
rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth});
rInput1.fill_(0);
rInput2.fill_(0);
output.fill_(0);
int success = correlation_forward_cuda_kernel(
output,
output.size(0),
output.size(1),
output.size(2),
output.size(3),
output.stride(0),
output.stride(1),
output.stride(2),
output.stride(3),
input1,
input1.size(1),
input1.size(2),
input1.size(3),
input1.stride(0),
input1.stride(1),
input1.stride(2),
input1.stride(3),
input2,
input2.size(1),
input2.stride(0),
input2.stride(1),
input2.stride(2),
input2.stride(3),
rInput1,
rInput2,
pad_size,
kernel_size,
max_displacement,
stride1,
stride2,
corr_type_multiply,
at::cuda::getCurrentCUDAStream()
//at::globalContext().getCurrentCUDAStream()
);
//check for errors
if (!success) {
AT_ERROR("CUDA call failed");
}
return 1;
}
int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput,
at::Tensor& gradInput1, at::Tensor& gradInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply)
{
int batchSize = input1.size(0);
int nInputChannels = input1.size(1);
int paddedInputHeight = input1.size(2)+ 2 * pad_size;
int paddedInputWidth = input1.size(3)+ 2 * pad_size;
int height = input1.size(2);
int width = input1.size(3);
rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
gradInput1.resize_({batchSize, nInputChannels, height, width});
gradInput2.resize_({batchSize, nInputChannels, height, width});
rInput1.fill_(0);
rInput2.fill_(0);
gradInput1.fill_(0);
gradInput2.fill_(0);
int success = correlation_backward_cuda_kernel(gradOutput,
gradOutput.size(0),
gradOutput.size(1),
gradOutput.size(2),
gradOutput.size(3),
gradOutput.stride(0),
gradOutput.stride(1),
gradOutput.stride(2),
gradOutput.stride(3),
input1,
input1.size(1),
input1.size(2),
input1.size(3),
input1.stride(0),
input1.stride(1),
input1.stride(2),
input1.stride(3),
input2,
input2.stride(0),
input2.stride(1),
input2.stride(2),
input2.stride(3),
gradInput1,
gradInput1.stride(0),
gradInput1.stride(1),
gradInput1.stride(2),
gradInput1.stride(3),
gradInput2,
gradInput2.size(1),
gradInput2.stride(0),
gradInput2.stride(1),
gradInput2.stride(2),
gradInput2.stride(3),
rInput1,
rInput2,
pad_size,
kernel_size,
max_displacement,
stride1,
stride2,
corr_type_multiply,
at::cuda::getCurrentCUDAStream()
//at::globalContext().getCurrentCUDAStream()
);
if (!success) {
AT_ERROR("CUDA call failed");
}
return 1;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)");
m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)");
}
================================================
FILE: dvs/flownet2/networks/correlation_package/correlation_cuda_kernel.cu
================================================
#include
#include "correlation_cuda_kernel.cuh"
#define CUDA_NUM_THREADS 1024
#define THREADS_PER_BLOCK 32
#define FULL_MASK 0xffffffff
#include
#include
#include
#include
using at::Half;
template
__forceinline__ __device__ scalar_t warpReduceSum(scalar_t val) {
for (int offset = 16; offset > 0; offset /= 2)
val += __shfl_down_sync(FULL_MASK, val, offset);
return val;
}
template
__forceinline__ __device__ scalar_t blockReduceSum(scalar_t val) {
static __shared__ scalar_t shared[32];
int lane = threadIdx.x % warpSize;
int wid = threadIdx.x / warpSize;
val = warpReduceSum(val);
if (lane == 0)
shared[wid] = val;
__syncthreads();
val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
if (wid == 0)
val = warpReduceSum(val);
return val;
}
template
__global__ void channels_first(const scalar_t* __restrict__ input, scalar_t* rinput, int channels, int height, int width, int pad_size)
{
// n (batch size), c (num of channels), y (height), x (width)
int n = blockIdx.x;
int y = blockIdx.y;
int x = blockIdx.z;
int ch_off = threadIdx.x;
scalar_t value;
int dimcyx = channels * height * width;
int dimyx = height * width;
int p_dimx = (width + 2 * pad_size);
int p_dimy = (height + 2 * pad_size);
int p_dimyxc = channels * p_dimy * p_dimx;
int p_dimxc = p_dimx * channels;
for (int c = ch_off; c < channels; c += THREADS_PER_BLOCK) {
value = input[n * dimcyx + c * dimyx + y * width + x];
rinput[n * p_dimyxc + (y + pad_size) * p_dimxc + (x + pad_size) * channels + c] = value;
}
}
template
__global__ void correlation_forward(scalar_t* __restrict__ output, const int nOutputChannels,
const int outputHeight, const int outputWidth, const scalar_t* __restrict__ rInput1,
const int nInputChannels, const int inputHeight, const int inputWidth,
const scalar_t* __restrict__ rInput2, const int pad_size, const int kernel_size,
const int max_displacement, const int stride1, const int stride2) {
int32_t pInputWidth = inputWidth + 2 * pad_size;
int32_t pInputHeight = inputHeight + 2 * pad_size;
int32_t kernel_rad = (kernel_size - 1) / 2;
int32_t displacement_rad = max_displacement / stride2;
int32_t displacement_size = 2 * displacement_rad + 1;
int32_t n = blockIdx.x;
int32_t y1 = blockIdx.y * stride1 + max_displacement;
int32_t x1 = blockIdx.z * stride1 + max_displacement;
int32_t c = threadIdx.x;
int32_t pdimyxc = pInputHeight * pInputWidth * nInputChannels;
int32_t pdimxc = pInputWidth * nInputChannels;
int32_t pdimc = nInputChannels;
int32_t tdimcyx = nOutputChannels * outputHeight * outputWidth;
int32_t tdimyx = outputHeight * outputWidth;
int32_t tdimx = outputWidth;
int32_t nelems = kernel_size * kernel_size * pdimc;
// element-wise product along channel axis
for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) {
for (int ti = -displacement_rad; ti <= displacement_rad; ++ti) {
int x2 = x1 + ti * stride2;
int y2 = y1 + tj * stride2;
float acc0 = 0.0f;
for (int j = -kernel_rad; j <= kernel_rad; ++j) {
for (int i = -kernel_rad; i <= kernel_rad; ++i) {
// THREADS_PER_BLOCK
#pragma unroll
for (int ch = c; ch < pdimc; ch += blockDim.x) {
int indx1 = n * pdimyxc + (y1 + j) * pdimxc
+ (x1 + i) * pdimc + ch;
int indx2 = n * pdimyxc + (y2 + j) * pdimxc
+ (x2 + i) * pdimc + ch;
acc0 += static_cast(rInput1[indx1] * rInput2[indx2]);
}
}
}
if (blockDim.x == warpSize) {
__syncwarp();
acc0 = warpReduceSum(acc0);
} else {
__syncthreads();
acc0 = blockReduceSum(acc0);
}
if (threadIdx.x == 0) {
int tc = (tj + displacement_rad) * displacement_size
+ (ti + displacement_rad);
const int tindx = n * tdimcyx + tc * tdimyx + blockIdx.y * tdimx
+ blockIdx.z;
output[tindx] = static_cast(acc0 / nelems);
}
}
}
}
template
__global__ void correlation_backward_input1(int item, scalar_t* gradInput1, int nInputChannels, int inputHeight, int inputWidth,
const scalar_t* __restrict__ gradOutput, int nOutputChannels, int outputHeight, int outputWidth,
const scalar_t* __restrict__ rInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2)
{
// n (batch size), c (num of channels), y (height), x (width)
int n = item;
int y = blockIdx.x * stride1 + pad_size;
int x = blockIdx.y * stride1 + pad_size;
int c = blockIdx.z;
int tch_off = threadIdx.x;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int xmin = (x - kernel_rad - max_displacement) / stride1;
int ymin = (y - kernel_rad - max_displacement) / stride1;
int xmax = (x + kernel_rad - max_displacement) / stride1;
int ymax = (y + kernel_rad - max_displacement) / stride1;
if (xmax < 0 || ymax < 0 || xmin >= outputWidth || ymin >= outputHeight) {
// assumes gradInput1 is pre-allocated and zero filled
return;
}
if (xmin > xmax || ymin > ymax) {
// assumes gradInput1 is pre-allocated and zero filled
return;
}
xmin = max(0,xmin);
xmax = min(outputWidth-1,xmax);
ymin = max(0,ymin);
ymax = min(outputHeight-1,ymax);
int pInputWidth = inputWidth + 2 * pad_size;
int pInputHeight = inputHeight + 2 * pad_size;
int pdimyxc = pInputHeight * pInputWidth * nInputChannels;
int pdimxc = pInputWidth * nInputChannels;
int pdimc = nInputChannels;
int tdimcyx = nOutputChannels * outputHeight * outputWidth;
int tdimyx = outputHeight * outputWidth;
int tdimx = outputWidth;
int odimcyx = nInputChannels * inputHeight* inputWidth;
int odimyx = inputHeight * inputWidth;
int odimx = inputWidth;
scalar_t nelems = kernel_size * kernel_size * nInputChannels;
__shared__ scalar_t prod_sum[THREADS_PER_BLOCK];
prod_sum[tch_off] = 0;
for (int tc = tch_off; tc < nOutputChannels; tc += THREADS_PER_BLOCK) {
int i2 = (tc % displacement_size - displacement_rad) * stride2;
int j2 = (tc / displacement_size - displacement_rad) * stride2;
int indx2 = n * pdimyxc + (y + j2)* pdimxc + (x + i2) * pdimc + c;
scalar_t val2 = rInput2[indx2];
for (int j = ymin; j <= ymax; ++j) {
for (int i = xmin; i <= xmax; ++i) {
int tindx = n * tdimcyx + tc * tdimyx + j * tdimx + i;
prod_sum[tch_off] += gradOutput[tindx] * val2;
}
}
}
__syncthreads();
if(tch_off == 0) {
scalar_t reduce_sum = 0;
for(int idx = 0; idx < THREADS_PER_BLOCK; idx++) {
reduce_sum += prod_sum[idx];
}
const int indx1 = n * odimcyx + c * odimyx + (y - pad_size) * odimx + (x - pad_size);
gradInput1[indx1] = reduce_sum / nelems;
}
}
template
__global__ void correlation_backward_input2(int item, scalar_t* gradInput2, int nInputChannels, int inputHeight, int inputWidth,
const scalar_t* __restrict__ gradOutput, int nOutputChannels, int outputHeight, int outputWidth,
const scalar_t* __restrict__ rInput1,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2)
{
// n (batch size), c (num of channels), y (height), x (width)
int n = item;
int y = blockIdx.x * stride1 + pad_size;
int x = blockIdx.y * stride1 + pad_size;
int c = blockIdx.z;
int tch_off = threadIdx.x;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int pInputWidth = inputWidth + 2 * pad_size;
int pInputHeight = inputHeight + 2 * pad_size;
int pdimyxc = pInputHeight * pInputWidth * nInputChannels;
int pdimxc = pInputWidth * nInputChannels;
int pdimc = nInputChannels;
int tdimcyx = nOutputChannels * outputHeight * outputWidth;
int tdimyx = outputHeight * outputWidth;
int tdimx = outputWidth;
int odimcyx = nInputChannels * inputHeight* inputWidth;
int odimyx = inputHeight * inputWidth;
int odimx = inputWidth;
scalar_t nelems = kernel_size * kernel_size * nInputChannels;
__shared__ scalar_t prod_sum[THREADS_PER_BLOCK];
prod_sum[tch_off] = 0;
for (int tc = tch_off; tc < nOutputChannels; tc += THREADS_PER_BLOCK) {
int i2 = (tc % displacement_size - displacement_rad) * stride2;
int j2 = (tc / displacement_size - displacement_rad) * stride2;
int xmin = (x - kernel_rad - max_displacement - i2) / stride1;
int ymin = (y - kernel_rad - max_displacement - j2) / stride1;
int xmax = (x + kernel_rad - max_displacement - i2) / stride1;
int ymax = (y + kernel_rad - max_displacement - j2) / stride1;
if (xmax < 0 || ymax < 0 || xmin >= outputWidth || ymin >= outputHeight) {
// assumes gradInput2 is pre-allocated and zero filled
continue;
}
if (xmin > xmax || ymin > ymax) {
// assumes gradInput2 is pre-allocated and zero filled
continue;
}
xmin = max(0,xmin);
xmax = min(outputWidth-1,xmax);
ymin = max(0,ymin);
ymax = min(outputHeight-1,ymax);
int indx1 = n * pdimyxc + (y - j2)* pdimxc + (x - i2) * pdimc + c;
scalar_t val1 = rInput1[indx1];
for (int j = ymin; j <= ymax; ++j) {
for (int i = xmin; i <= xmax; ++i) {
int tindx = n * tdimcyx + tc * tdimyx + j * tdimx + i;
prod_sum[tch_off] += gradOutput[tindx] * val1;
}
}
}
__syncthreads();
if(tch_off == 0) {
scalar_t reduce_sum = 0;
for(int idx = 0; idx < THREADS_PER_BLOCK; idx++) {
reduce_sum += prod_sum[idx];
}
const int indx2 = n * odimcyx + c * odimyx + (y - pad_size) * odimx + (x - pad_size);
gradInput2[indx2] = reduce_sum / nelems;
}
}
int correlation_forward_cuda_kernel(at::Tensor& output,
int ob,
int oc,
int oh,
int ow,
int osb,
int osc,
int osh,
int osw,
at::Tensor& input1,
int ic,
int ih,
int iw,
int isb,
int isc,
int ish,
int isw,
at::Tensor& input2,
int gc,
int gsb,
int gsc,
int gsh,
int gsw,
at::Tensor& rInput1,
at::Tensor& rInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply,
cudaStream_t stream)
{
int batchSize = ob;
int nInputChannels = ic;
int inputWidth = iw;
int inputHeight = ih;
int nOutputChannels = oc;
int outputWidth = ow;
int outputHeight = oh;
dim3 blocks_grid(batchSize, inputHeight, inputWidth);
dim3 threads_block(THREADS_PER_BLOCK);
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channels_first_fwd_1", ([&] {
channels_first<<>>(
input1.data(), rInput1.data(), nInputChannels, inputHeight, inputWidth, pad_size);
}));
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "channels_first_fwd_2", ([&] {
channels_first<<>> (
input2.data(), rInput2.data(), nInputChannels, inputHeight, inputWidth, pad_size);
}));
dim3 threadsPerBlock(THREADS_PER_BLOCK);
dim3 totalBlocksCorr(batchSize, outputHeight, outputWidth);
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "correlation_forward", ([&] {
correlation_forward<<>>
(output.data(), nOutputChannels, outputHeight, outputWidth,
rInput1.data(), nInputChannels, inputHeight, inputWidth,
rInput2.data(),
pad_size,
kernel_size,
max_displacement,
stride1,
stride2);
}));
cudaError_t err = cudaGetLastError();
// check for errors
if (err != cudaSuccess) {
printf("error in correlation_forward_cuda_kernel: %s\n", cudaGetErrorString(err));
return 0;
}
return 1;
}
int correlation_backward_cuda_kernel(
at::Tensor& gradOutput,
int gob,
int goc,
int goh,
int gow,
int gosb,
int gosc,
int gosh,
int gosw,
at::Tensor& input1,
int ic,
int ih,
int iw,
int isb,
int isc,
int ish,
int isw,
at::Tensor& input2,
int gsb,
int gsc,
int gsh,
int gsw,
at::Tensor& gradInput1,
int gisb,
int gisc,
int gish,
int gisw,
at::Tensor& gradInput2,
int ggc,
int ggsb,
int ggsc,
int ggsh,
int ggsw,
at::Tensor& rInput1,
at::Tensor& rInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply,
cudaStream_t stream)
{
int batchSize = gob;
int num = batchSize;
int nInputChannels = ic;
int inputWidth = iw;
int inputHeight = ih;
int nOutputChannels = goc;
int outputWidth = gow;
int outputHeight = goh;
dim3 blocks_grid(batchSize, inputHeight, inputWidth);
dim3 threads_block(THREADS_PER_BLOCK);
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "lltm_forward_cuda", ([&] {
channels_first<<>>(
input1.data(),
rInput1.data(),
nInputChannels,
inputHeight,
inputWidth,
pad_size
);
}));
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "lltm_forward_cuda", ([&] {
channels_first<<>>(
input2.data(),
rInput2.data(),
nInputChannels,
inputHeight,
inputWidth,
pad_size
);
}));
dim3 threadsPerBlock(THREADS_PER_BLOCK);
dim3 totalBlocksCorr(inputHeight, inputWidth, nInputChannels);
for (int n = 0; n < num; ++n) {
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "lltm_forward_cuda", ([&] {
correlation_backward_input1<<>> (
n, gradInput1.data(), nInputChannels, inputHeight, inputWidth,
gradOutput.data(), nOutputChannels, outputHeight, outputWidth,
rInput2.data(),
pad_size,
kernel_size,
max_displacement,
stride1,
stride2);
}));
}
for(int n = 0; n < batchSize; n++) {
AT_DISPATCH_FLOATING_TYPES_AND_HALF(rInput1.type(), "lltm_forward_cuda", ([&] {
correlation_backward_input2<<>>(
n, gradInput2.data(), nInputChannels, inputHeight, inputWidth,
gradOutput.data(), nOutputChannels, outputHeight, outputWidth,
rInput1.data(),
pad_size,
kernel_size,
max_displacement,
stride1,
stride2);
}));
}
// check for errors
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in correlation_backward_cuda_kernel: %s\n", cudaGetErrorString(err));
return 0;
}
return 1;
}
================================================
FILE: dvs/flownet2/networks/correlation_package/correlation_cuda_kernel.cuh
================================================
#pragma once
#include
#include
#include
int correlation_forward_cuda_kernel(at::Tensor& output,
int ob,
int oc,
int oh,
int ow,
int osb,
int osc,
int osh,
int osw,
at::Tensor& input1,
int ic,
int ih,
int iw,
int isb,
int isc,
int ish,
int isw,
at::Tensor& input2,
int gc,
int gsb,
int gsc,
int gsh,
int gsw,
at::Tensor& rInput1,
at::Tensor& rInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply,
cudaStream_t stream);
int correlation_backward_cuda_kernel(
at::Tensor& gradOutput,
int gob,
int goc,
int goh,
int gow,
int gosb,
int gosc,
int gosh,
int gosw,
at::Tensor& input1,
int ic,
int ih,
int iw,
int isb,
int isc,
int ish,
int isw,
at::Tensor& input2,
int gsb,
int gsc,
int gsh,
int gsw,
at::Tensor& gradInput1,
int gisb,
int gisc,
int gish,
int gisw,
at::Tensor& gradInput2,
int ggc,
int ggsb,
int ggsc,
int ggsh,
int ggsw,
at::Tensor& rInput1,
at::Tensor& rInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply,
cudaStream_t stream);
================================================
FILE: dvs/flownet2/networks/correlation_package/setup.py
================================================
#!/usr/bin/env python3
import os
import torch
from setuptools import setup, find_packages
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
cxx_args = ['-std=c++11']
nvcc_args = [
'-gencode', 'arch=compute_50,code=sm_50',
'-gencode', 'arch=compute_52,code=sm_52',
'-gencode', 'arch=compute_60,code=sm_60',
'-gencode', 'arch=compute_61,code=sm_61',
'-gencode', 'arch=compute_70,code=sm_70',
'-gencode', 'arch=compute_70,code=compute_70'
]
setup(
name='correlation_cuda',
ext_modules=[
CUDAExtension('correlation_cuda', [
'correlation_cuda.cc',
'correlation_cuda_kernel.cu'
], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
],
cmdclass={
'build_ext': BuildExtension
})
================================================
FILE: dvs/flownet2/networks/resample2d_package/__init__.py
================================================
================================================
FILE: dvs/flownet2/networks/resample2d_package/resample2d.py
================================================
from torch.nn.modules.module import Module
from torch.autograd import Function, Variable
import resample2d_cuda
class Resample2dFunction(Function):
@staticmethod
def forward(ctx, input1, input2, kernel_size=1, bilinear= True):
assert input1.is_contiguous()
assert input2.is_contiguous()
ctx.save_for_backward(input1, input2)
ctx.kernel_size = kernel_size
ctx.bilinear = bilinear
_, d, _, _ = input1.size()
b, _, h, w = input2.size()
output = input1.new(b, d, h, w).zero_()
resample2d_cuda.forward(input1, input2, output, kernel_size, bilinear)
return output
@staticmethod
def backward(ctx, grad_output):
grad_output = grad_output.contiguous()
assert grad_output.is_contiguous()
input1, input2 = ctx.saved_tensors
grad_input1 = Variable(input1.new(input1.size()).zero_())
grad_input2 = Variable(input1.new(input2.size()).zero_())
resample2d_cuda.backward(input1, input2, grad_output.data,
grad_input1.data, grad_input2.data,
ctx.kernel_size, ctx.bilinear)
return grad_input1, grad_input2, None, None
class Resample2d(Module):
def __init__(self, kernel_size=1, bilinear = True):
super(Resample2d, self).__init__()
self.kernel_size = kernel_size
self.bilinear = bilinear
def forward(self, input1, input2):
input1_c = input1.contiguous()
return Resample2dFunction.apply(input1_c, input2, self.kernel_size, self.bilinear)
================================================
FILE: dvs/flownet2/networks/resample2d_package/resample2d_cuda.cc
================================================
#include
#include
#include "resample2d_kernel.cuh"
int resample2d_cuda_forward(
at::Tensor& input1,
at::Tensor& input2,
at::Tensor& output,
int kernel_size, bool bilinear) {
resample2d_kernel_forward(input1, input2, output, kernel_size, bilinear);
return 1;
}
int resample2d_cuda_backward(
at::Tensor& input1,
at::Tensor& input2,
at::Tensor& gradOutput,
at::Tensor& gradInput1,
at::Tensor& gradInput2,
int kernel_size, bool bilinear) {
resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size, bilinear);
return 1;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)");
m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)");
}
================================================
FILE: dvs/flownet2/networks/resample2d_package/resample2d_kernel.cu
================================================
#include
#include
#include
#define CUDA_NUM_THREADS 512
#define THREADS_PER_BLOCK 64
#define DIM0(TENSOR) ((TENSOR).x)
#define DIM1(TENSOR) ((TENSOR).y)
#define DIM2(TENSOR) ((TENSOR).z)
#define DIM3(TENSOR) ((TENSOR).w)
#define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))])
template
__global__ void kernel_resample2d_update_output(const int n,
const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride,
scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride, int kernel_size, bool bilinear) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index >= n) {
return;
}
scalar_t val = 0.0f;
int dim_b = DIM0(output_size);
int dim_c = DIM1(output_size);
int dim_h = DIM2(output_size);
int dim_w = DIM3(output_size);
int dim_chw = dim_c * dim_h * dim_w;
int dim_hw = dim_h * dim_w;
int b = ( index / dim_chw ) % dim_b;
int c = ( index / dim_hw ) % dim_c;
int y = ( index / dim_w ) % dim_h;
int x = ( index ) % dim_w;
scalar_t dx = DIM3_INDEX(input2, b, 0, y, x);
scalar_t dy = DIM3_INDEX(input2, b, 1, y, x);
scalar_t xf = static_cast(x) + dx;
scalar_t yf = static_cast(y) + dy;
scalar_t alpha = xf - floor(xf); // alpha
scalar_t beta = yf - floor(yf); // beta
if (bilinear) {
int xL = max(min( int (floor(xf)), dim_w-1), 0);
int xR = max(min( int (floor(xf)+1), dim_w -1), 0);
int yT = max(min( int (floor(yf)), dim_h-1), 0);
int yB = max(min( int (floor(yf)+1), dim_h-1), 0);
for (int fy = 0; fy < kernel_size; fy += 1) {
for (int fx = 0; fx < kernel_size; fx += 1) {
val += static_cast((1. - alpha)*(1. - beta) * DIM3_INDEX(input1, b, c, yT + fy, xL + fx));
val += static_cast((alpha)*(1. - beta) * DIM3_INDEX(input1, b, c, yT + fy, xR + fx));
val += static_cast((1. - alpha)*(beta) * DIM3_INDEX(input1, b, c, yB + fy, xL + fx));
val += static_cast((alpha)*(beta) * DIM3_INDEX(input1, b, c, yB + fy, xR + fx));
}
}
output[index] = val;
}
else {
int xN = max(min( int (floor(xf + 0.5)), dim_w - 1), 0);
int yN = max(min( int (floor(yf + 0.5)), dim_h - 1), 0);
output[index] = static_cast ( DIM3_INDEX(input1, b, c, yN, xN) );
}
}
template
__global__ void kernel_resample2d_backward_input1(
const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride,
const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride,
scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, int kernel_size, bool bilinear) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index >= n) {
return;
}
int dim_b = DIM0(gradOutput_size);
int dim_c = DIM1(gradOutput_size);
int dim_h = DIM2(gradOutput_size);
int dim_w = DIM3(gradOutput_size);
int dim_chw = dim_c * dim_h * dim_w;
int dim_hw = dim_h * dim_w;
int b = ( index / dim_chw ) % dim_b;
int c = ( index / dim_hw ) % dim_c;
int y = ( index / dim_w ) % dim_h;
int x = ( index ) % dim_w;
scalar_t dx = DIM3_INDEX(input2, b, 0, y, x);
scalar_t dy = DIM3_INDEX(input2, b, 1, y, x);
scalar_t xf = static_cast(x) + dx;
scalar_t yf = static_cast(y) + dy;
scalar_t alpha = xf - int(xf); // alpha
scalar_t beta = yf - int(yf); // beta
int idim_h = DIM2(input1_size);
int idim_w = DIM3(input1_size);
int xL = max(min( int (floor(xf)), idim_w-1), 0);
int xR = max(min( int (floor(xf)+1), idim_w -1), 0);
int yT = max(min( int (floor(yf)), idim_h-1), 0);
int yB = max(min( int (floor(yf)+1), idim_h-1), 0);
for (int fy = 0; fy < kernel_size; fy += 1) {
for (int fx = 0; fx < kernel_size; fx += 1) {
atomicAdd(&DIM3_INDEX(gradInput, b, c, (yT + fy), (xL + fx)), (1-alpha)*(1-beta) * DIM3_INDEX(gradOutput, b, c, y, x));
atomicAdd(&DIM3_INDEX(gradInput, b, c, (yT + fy), (xR + fx)), (alpha)*(1-beta) * DIM3_INDEX(gradOutput, b, c, y, x));
atomicAdd(&DIM3_INDEX(gradInput, b, c, (yB + fy), (xL + fx)), (1-alpha)*(beta) * DIM3_INDEX(gradOutput, b, c, y, x));
atomicAdd(&DIM3_INDEX(gradInput, b, c, (yB + fy), (xR + fx)), (alpha)*(beta) * DIM3_INDEX(gradOutput, b, c, y, x));
}
}
}
template
__global__ void kernel_resample2d_backward_input2(
const int n, const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride,
const scalar_t* __restrict__ input2, const long4 input2_size, const long4 input2_stride,
const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride,
scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, int kernel_size, bool bilinear) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index >= n) {
return;
}
scalar_t output = 0.0;
int kernel_rad = (kernel_size - 1)/2;
int dim_b = DIM0(gradInput_size);
int dim_c = DIM1(gradInput_size);
int dim_h = DIM2(gradInput_size);
int dim_w = DIM3(gradInput_size);
int dim_chw = dim_c * dim_h * dim_w;
int dim_hw = dim_h * dim_w;
int b = ( index / dim_chw ) % dim_b;
int c = ( index / dim_hw ) % dim_c;
int y = ( index / dim_w ) % dim_h;
int x = ( index ) % dim_w;
int odim_c = DIM1(gradOutput_size);
scalar_t dx = DIM3_INDEX(input2, b, 0, y, x);
scalar_t dy = DIM3_INDEX(input2, b, 1, y, x);
scalar_t xf = static_cast(x) + dx;
scalar_t yf = static_cast(y) + dy;
int xL = max(min( int (floor(xf)), dim_w-1), 0);
int xR = max(min( int (floor(xf)+1), dim_w -1), 0);
int yT = max(min( int (floor(yf)), dim_h-1), 0);
int yB = max(min( int (floor(yf)+1), dim_h-1), 0);
if (c % 2) {
float gamma = 1 - (xf - floor(xf)); // alpha
for (int i = 0; i <= 2*kernel_rad; ++i) {
for (int j = 0; j <= 2*kernel_rad; ++j) {
for (int ch = 0; ch < odim_c; ++ch) {
output += (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xL + i));
output -= (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xL + i));
output += (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xR + i));
output -= (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xR + i));
}
}
}
}
else {
float gamma = 1 - (yf - floor(yf)); // alpha
for (int i = 0; i <= 2*kernel_rad; ++i) {
for (int j = 0; j <= 2*kernel_rad; ++j) {
for (int ch = 0; ch < odim_c; ++ch) {
output += (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xR + i));
output -= (gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yT + j), (xL + i));
output += (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xR + i));
output -= (1-gamma) * DIM3_INDEX(gradOutput, b, ch, y, x) * DIM3_INDEX(input1, b, ch, (yB + j), (xL + i));
}
}
}
}
gradInput[index] = output;
}
void resample2d_kernel_forward(
at::Tensor& input1,
at::Tensor& input2,
at::Tensor& output,
int kernel_size,
bool bilinear) {
int n = output.numel();
const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
const long4 input2_size = make_long4(input2.size(0), input2.size(1), input2.size(2), input2.size(3));
const long4 input2_stride = make_long4(input2.stride(0), input2.stride(1), input2.stride(2), input2.stride(3));
const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3));
const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3));
// TODO: when atomicAdd gets resolved, change to AT_DISPATCH_FLOATING_TYPES_AND_HALF
// AT_DISPATCH_FLOATING_TYPES(input1.type(), "resample_forward_kernel", ([&] {
kernel_resample2d_update_output<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
//at::globalContext().getCurrentCUDAStream() >>>(
n,
input1.data(),
input1_size,
input1_stride,
input2.data(),
input2_size,
input2_stride,
output.data(),
output_size,
output_stride,
kernel_size,
bilinear);
// }));
// TODO: ATen-equivalent check
// THCudaCheck(cudaGetLastError());
}
void resample2d_kernel_backward(
at::Tensor& input1,
at::Tensor& input2,
at::Tensor& gradOutput,
at::Tensor& gradInput1,
at::Tensor& gradInput2,
int kernel_size,
bool bilinear) {
int n = gradOutput.numel();
const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3));
const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3));
const long4 input2_size = make_long4(input2.size(0), input2.size(1), input2.size(2), input2.size(3));
const long4 input2_stride = make_long4(input2.stride(0), input2.stride(1), input2.stride(2), input2.stride(3));
const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3));
const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3));
const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3));
const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3));
// AT_DISPATCH_FLOATING_TYPES(input1.type(), "resample_backward_input1", ([&] {
kernel_resample2d_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
//at::globalContext().getCurrentCUDAStream() >>>(
n,
input1.data(),
input1_size,
input1_stride,
input2.data(),
input2_size,
input2_stride,
gradOutput.data(),
gradOutput_size,
gradOutput_stride,
gradInput1.data(),
gradInput1_size,
gradInput1_stride,
kernel_size,
bilinear
);
// }));
const long4 gradInput2_size = make_long4(gradInput2.size(0), gradInput2.size(1), gradInput2.size(2), gradInput2.size(3));
const long4 gradInput2_stride = make_long4(gradInput2.stride(0), gradInput2.stride(1), gradInput2.stride(2), gradInput2.stride(3));
n = gradInput2.numel();
// AT_DISPATCH_FLOATING_TYPES(gradInput2.type(), "resample_backward_input2", ([&] {
kernel_resample2d_backward_input2<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>(
//at::globalContext().getCurrentCUDAStream() >>>(
n,
input1.data(),
input1_size,
input1_stride,
input2.data(),
input2_size,
input2_stride,
gradOutput.data(),
gradOutput_size,
gradOutput_stride,
gradInput2.data(),
gradInput2_size,
gradInput2_stride,
kernel_size,
bilinear
);
// }));
// TODO: Use the ATen equivalent to get last error
// THCudaCheck(cudaGetLastError());
}
================================================
FILE: dvs/flownet2/networks/resample2d_package/resample2d_kernel.cuh
================================================
#pragma once
#include
void resample2d_kernel_forward(
at::Tensor& input1,
at::Tensor& input2,
at::Tensor& output,
int kernel_size,
bool bilinear);
void resample2d_kernel_backward(
at::Tensor& input1,
at::Tensor& input2,
at::Tensor& gradOutput,
at::Tensor& gradInput1,
at::Tensor& gradInput2,
int kernel_size,
bool bilinear);
================================================
FILE: dvs/flownet2/networks/resample2d_package/setup.py
================================================
#!/usr/bin/env python3
import os
import torch
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
cxx_args = ['-std=c++11']
nvcc_args = [
'-gencode', 'arch=compute_50,code=sm_50',
'-gencode', 'arch=compute_52,code=sm_52',
'-gencode', 'arch=compute_60,code=sm_60',
'-gencode', 'arch=compute_61,code=sm_61',
'-gencode', 'arch=compute_70,code=sm_70',
'-gencode', 'arch=compute_70,code=compute_70'
]
setup(
name='resample2d_cuda',
ext_modules=[
CUDAExtension('resample2d_cuda', [
'resample2d_cuda.cc',
'resample2d_kernel.cu'
], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
],
cmdclass={
'build_ext': BuildExtension
})
================================================
FILE: dvs/flownet2/networks/submodules.py
================================================
# freda (todo) :
import torch.nn as nn
import torch
import numpy as np
def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1):
if batchNorm:
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False),
nn.BatchNorm2d(out_planes),
nn.LeakyReLU(0.1,inplace=True)
)
else:
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
nn.LeakyReLU(0.1,inplace=True)
)
def i_conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, bias = True):
if batchNorm:
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias),
nn.BatchNorm2d(out_planes),
)
else:
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=bias),
)
def predict_flow(in_planes):
return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)
def deconv(in_planes, out_planes):
return nn.Sequential(
nn.ConvTranspose2d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=True),
nn.LeakyReLU(0.1,inplace=True)
)
class tofp16(nn.Module):
def __init__(self):
super(tofp16, self).__init__()
def forward(self, input):
return input.half()
class tofp32(nn.Module):
def __init__(self):
super(tofp32, self).__init__()
def forward(self, input):
return input.float()
def init_deconv_bilinear(weight):
f_shape = weight.size()
heigh, width = f_shape[-2], f_shape[-1]
f = np.ceil(width/2.0)
c = (2 * f - 1 - f % 2) / (2.0 * f)
bilinear = np.zeros([heigh, width])
for x in range(width):
for y in range(heigh):
value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
bilinear[x, y] = value
weight.data.fill_(0.)
for i in range(f_shape[0]):
for j in range(f_shape[1]):
weight.data[i,j,:,:] = torch.from_numpy(bilinear)
def save_grad(grads, name):
def hook(grad):
grads[name] = grad
return hook
'''
def save_grad(grads, name):
def hook(grad):
grads[name] = grad
return hook
import torch
from channelnorm_package.modules.channelnorm import ChannelNorm
model = ChannelNorm().cuda()
grads = {}
a = 100*torch.autograd.Variable(torch.randn((1,3,5,5)).cuda(), requires_grad=True)
a.register_hook(save_grad(grads, 'a'))
b = model(a)
y = torch.mean(b)
y.backward()
'''
================================================
FILE: dvs/flownet2/run.sh
================================================
#!/bin/bash
python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \
--inference_dataset_root ./../video \
--resume ./FlowNet2_checkpoint.pth.tar \
--inference_visualize
================================================
FILE: dvs/flownet2/run_release.sh
================================================
#!/bin/bash
python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \
--inference_dataset_root ./../dataset_release/test \
--resume ./FlowNet2_checkpoint.pth.tar \
--inference_visualize
python main.py --inference --model FlowNet2 --save_flow --inference_dataset Google \
--inference_dataset_root ./../dataset_release/training \
--resume ./FlowNet2_checkpoint.pth.tar \
--inference_visualize
================================================
FILE: dvs/flownet2/utils/__init__.py
================================================
================================================
FILE: dvs/flownet2/utils/flow_utils.py
================================================
import numpy as np
import matplotlib.pyplot as plt
import os.path
TAG_CHAR = np.array([202021.25], np.float32)
def readFlow(fn):
""" Read .flo file in Middlebury format"""
# Code adapted from:
# http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
# WARNING: this will work on little-endian architectures (eg Intel x86) only!
# print 'fn = %s'%(fn)
with open(fn, 'rb') as f:
magic = np.fromfile(f, np.float32, count=1)
if 202021.25 != magic:
print('Magic number incorrect. Invalid .flo file')
return None
else:
w = np.fromfile(f, np.int32, count=1)
h = np.fromfile(f, np.int32, count=1)
# print 'Reading %d x %d flo file\n' % (w, h)
data = np.fromfile(f, np.float32, count=2*int(w)*int(h))
# Reshape data into 3D array (columns, rows, bands)
# The reshape here is for visualization, the original code is (w,h,2)
return np.resize(data, (int(h), int(w), 2))
def writeFlow(filename,uv,v=None):
""" Write optical flow to file.
If v is None, uv is assumed to contain both u and v channels,
stacked in depth.
Original code by Deqing Sun, adapted from Daniel Scharstein.
"""
nBands = 2
if v is None:
assert(uv.ndim == 3)
assert(uv.shape[2] == 2)
u = uv[:,:,0]
v = uv[:,:,1]
else:
u = uv
assert(u.shape == v.shape)
height,width = u.shape
f = open(filename,'wb')
# write the header
f.write(TAG_CHAR)
np.array(width).astype(np.int32).tofile(f)
np.array(height).astype(np.int32).tofile(f)
# arrange into matrix form
tmp = np.zeros((height, width*nBands))
tmp[:,np.arange(width)*2] = u
tmp[:,np.arange(width)*2 + 1] = v
tmp.astype(np.float32).tofile(f)
f.close()
# ref: https://github.com/sampepose/flownet2-tf/
# blob/18f87081db44939414fc4a48834f9e0da3e69f4c/src/flowlib.py#L240
def visulize_flow_file(flow_filename, save_dir=None):
flow_data = readFlow(flow_filename)
img = flow2img(flow_data)
# plt.imshow(img)
# plt.show()
if save_dir:
idx = flow_filename.rfind("/") + 1
plt.imsave(os.path.join(save_dir, "%s-vis.png" % flow_filename[idx:-4]), img)
def flow2img(flow_data):
"""
convert optical flow into color image
:param flow_data:
:return: color image
"""
# print(flow_data.shape)
# print(type(flow_data))
u = flow_data[:, :, 0]
v = flow_data[:, :, 1]
UNKNOW_FLOW_THRESHOLD = 1e7
pr1 = abs(u) > UNKNOW_FLOW_THRESHOLD
pr2 = abs(v) > UNKNOW_FLOW_THRESHOLD
idx_unknown = (pr1 | pr2)
u[idx_unknown] = v[idx_unknown] = 0
# get max value in each direction
maxu = -999.
maxv = -999.
minu = 999.
minv = 999.
maxu = max(maxu, np.max(u))
maxv = max(maxv, np.max(v))
minu = min(minu, np.min(u))
minv = min(minv, np.min(v))
rad = np.sqrt(u ** 2 + v ** 2)
maxrad = max(-1, np.max(rad))
u = u / maxrad + np.finfo(float).eps
v = v / maxrad + np.finfo(float).eps
img = compute_color(u, v)
idx = np.repeat(idx_unknown[:, :, np.newaxis], 3, axis=2)
img[idx] = 0
return np.uint8(img)
def compute_color(u, v):
"""
compute optical flow color map
:param u: horizontal optical flow
:param v: vertical optical flow
:return:
"""
height, width = u.shape
img = np.zeros((height, width, 3))
NAN_idx = np.isnan(u) | np.isnan(v)
u[NAN_idx] = v[NAN_idx] = 0
colorwheel = make_color_wheel()
ncols = np.size(colorwheel, 0)
rad = np.sqrt(u ** 2 + v ** 2)
a = np.arctan2(-v, -u) / np.pi
fk = (a + 1) / 2 * (ncols - 1) + 1
k0 = np.floor(fk).astype(int)
k1 = k0 + 1
k1[k1 == ncols + 1] = 1
f = fk - k0
for i in range(0, np.size(colorwheel, 1)):
tmp = colorwheel[:, i]
col0 = tmp[k0 - 1] / 255
col1 = tmp[k1 - 1] / 255
col = (1 - f) * col0 + f * col1
idx = rad <= 1
col[idx] = 1 - rad[idx] * (1 - col[idx])
notidx = np.logical_not(idx)
col[notidx] *= 0.75
img[:, :, i] = np.uint8(np.floor(255 * col * (1 - NAN_idx)))
return img
def make_color_wheel():
"""
Generate color wheel according Middlebury color code
:return: Color wheel
"""
RY = 15
YG = 6
GC = 4
CB = 11
BM = 13
MR = 6
ncols = RY + YG + GC + CB + BM + MR
colorwheel = np.zeros([ncols, 3])
col = 0
# RY
colorwheel[0:RY, 0] = 255
colorwheel[0:RY, 1] = np.transpose(np.floor(255 * np.arange(0, RY) / RY))
col += RY
# YG
colorwheel[col:col + YG, 0] = 255 - np.transpose(np.floor(255 * np.arange(0, YG) / YG))
colorwheel[col:col + YG, 1] = 255
col += YG
# GC
colorwheel[col:col + GC, 1] = 255
colorwheel[col:col + GC, 2] = np.transpose(np.floor(255 * np.arange(0, GC) / GC))
col += GC
# CB
colorwheel[col:col + CB, 1] = 255 - np.transpose(np.floor(255 * np.arange(0, CB) / CB))
colorwheel[col:col + CB, 2] = 255
col += CB
# BM
colorwheel[col:col + BM, 2] = 255
colorwheel[col:col + BM, 0] = np.transpose(np.floor(255 * np.arange(0, BM) / BM))
col += + BM
# MR
colorwheel[col:col + MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
colorwheel[col:col + MR, 0] = 255
return colorwheel
================================================
FILE: dvs/flownet2/utils/frame_utils.py
================================================
import numpy as np
from os.path import *
from imageio import imread
from . import flow_utils
def read_gen(file_name):
ext = splitext(file_name)[-1]
if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
im = imread(file_name)
if im.shape[2] > 3:
return im[:,:,:3]
else:
return im
elif ext == '.bin' or ext == '.raw':
return np.load(file_name)
elif ext == '.flo':
return flow_utils.readFlow(file_name).astype(np.float32)
return []
================================================
FILE: dvs/flownet2/utils/param_utils.py
================================================
import torch
import torch.nn as nn
import numpy as np
def parse_flownetc(modules, weights, biases):
keys = [
'conv1',
'conv2',
'conv3',
'conv_redir',
'conv3_1',
'conv4',
'conv4_1',
'conv5',
'conv5_1',
'conv6',
'conv6_1',
'deconv5',
'deconv4',
'deconv3',
'deconv2',
'Convolution1',
'Convolution2',
'Convolution3',
'Convolution4',
'Convolution5',
'upsample_flow6to5',
'upsample_flow5to4',
'upsample_flow4to3',
'upsample_flow3to2',
]
i = 0
for m in modules:
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
weight = weights[keys[i]].copy()
bias = biases[keys[i]].copy()
if keys[i] == 'conv1':
m.weight.data[:,:,:,:] = torch.from_numpy(np.flip(weight, axis=1).copy())
m.bias.data[:] = torch.from_numpy(bias)
else:
m.weight.data[:,:,:,:] = torch.from_numpy(weight)
m.bias.data[:] = torch.from_numpy(bias)
i = i + 1
return
def parse_flownets(modules, weights, biases, param_prefix='net2_'):
keys = [
'conv1',
'conv2',
'conv3',
'conv3_1',
'conv4',
'conv4_1',
'conv5',
'conv5_1',
'conv6',
'conv6_1',
'deconv5',
'deconv4',
'deconv3',
'deconv2',
'predict_conv6',
'predict_conv5',
'predict_conv4',
'predict_conv3',
'predict_conv2',
'upsample_flow6to5',
'upsample_flow5to4',
'upsample_flow4to3',
'upsample_flow3to2',
]
for i, k in enumerate(keys):
if 'upsample' in k:
keys[i] = param_prefix + param_prefix + k
else:
keys[i] = param_prefix + k
i = 0
for m in modules:
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
weight = weights[keys[i]].copy()
bias = biases[keys[i]].copy()
if keys[i] == param_prefix+'conv1':
m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
m.weight.data[:,6:9,:,:] = torch.from_numpy(np.flip(weight[:,6:9,:,:], axis=1).copy())
m.weight.data[:,9::,:,:] = torch.from_numpy(weight[:,9:,:,:].copy())
if m.bias is not None:
m.bias.data[:] = torch.from_numpy(bias)
else:
m.weight.data[:,:,:,:] = torch.from_numpy(weight)
if m.bias is not None:
m.bias.data[:] = torch.from_numpy(bias)
i = i + 1
return
def parse_flownetsonly(modules, weights, biases, param_prefix=''):
keys = [
'conv1',
'conv2',
'conv3',
'conv3_1',
'conv4',
'conv4_1',
'conv5',
'conv5_1',
'conv6',
'conv6_1',
'deconv5',
'deconv4',
'deconv3',
'deconv2',
'Convolution1',
'Convolution2',
'Convolution3',
'Convolution4',
'Convolution5',
'upsample_flow6to5',
'upsample_flow5to4',
'upsample_flow4to3',
'upsample_flow3to2',
]
for i, k in enumerate(keys):
if 'upsample' in k:
keys[i] = param_prefix + param_prefix + k
else:
keys[i] = param_prefix + k
i = 0
for m in modules:
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
weight = weights[keys[i]].copy()
bias = biases[keys[i]].copy()
if keys[i] == param_prefix+'conv1':
# print ("%s :"%(keys[i]), m.weight.size(), m.bias.size(), tf_w[keys[i]].shape[::-1])
m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
if m.bias is not None:
m.bias.data[:] = torch.from_numpy(bias)
else:
m.weight.data[:,:,:,:] = torch.from_numpy(weight)
if m.bias is not None:
m.bias.data[:] = torch.from_numpy(bias)
i = i + 1
return
def parse_flownetsd(modules, weights, biases, param_prefix='netsd_'):
keys = [
'conv0',
'conv1',
'conv1_1',
'conv2',
'conv2_1',
'conv3',
'conv3_1',
'conv4',
'conv4_1',
'conv5',
'conv5_1',
'conv6',
'conv6_1',
'deconv5',
'deconv4',
'deconv3',
'deconv2',
'interconv5',
'interconv4',
'interconv3',
'interconv2',
'Convolution1',
'Convolution2',
'Convolution3',
'Convolution4',
'Convolution5',
'upsample_flow6to5',
'upsample_flow5to4',
'upsample_flow4to3',
'upsample_flow3to2',
]
for i, k in enumerate(keys):
keys[i] = param_prefix + k
i = 0
for m in modules:
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
weight = weights[keys[i]].copy()
bias = biases[keys[i]].copy()
if keys[i] == param_prefix+'conv0':
m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
m.weight.data[:,3:6,:,:] = torch.from_numpy(np.flip(weight[:,3:6,:,:], axis=1).copy())
if m.bias is not None:
m.bias.data[:] = torch.from_numpy(bias)
else:
m.weight.data[:,:,:,:] = torch.from_numpy(weight)
if m.bias is not None:
m.bias.data[:] = torch.from_numpy(bias)
i = i + 1
return
def parse_flownetfusion(modules, weights, biases, param_prefix='fuse_'):
keys = [
'conv0',
'conv1',
'conv1_1',
'conv2',
'conv2_1',
'deconv1',
'deconv0',
'interconv1',
'interconv0',
'_Convolution5',
'_Convolution6',
'_Convolution7',
'upsample_flow2to1',
'upsample_flow1to0',
]
for i, k in enumerate(keys):
keys[i] = param_prefix + k
i = 0
for m in modules:
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
weight = weights[keys[i]].copy()
bias = biases[keys[i]].copy()
if keys[i] == param_prefix+'conv0':
m.weight.data[:,0:3,:,:] = torch.from_numpy(np.flip(weight[:,0:3,:,:], axis=1).copy())
m.weight.data[:,3::,:,:] = torch.from_numpy(weight[:,3:,:,:].copy())
if m.bias is not None:
m.bias.data[:] = torch.from_numpy(bias)
else:
m.weight.data[:,:,:,:] = torch.from_numpy(weight)
if m.bias is not None:
m.bias.data[:] = torch.from_numpy(bias)
i = i + 1
return
================================================
FILE: dvs/flownet2/utils/tools.py
================================================
# freda (todo) :
import os, time, sys, math
import subprocess, shutil
from os.path import *
import numpy as np
from inspect import isclass
from pytz import timezone
from datetime import datetime
import inspect
import torch
def datestr():
pacific = timezone('US/Pacific')
now = datetime.now(pacific)
return '{}{:02}{:02}_{:02}{:02}'.format(now.year, now.month, now.day, now.hour, now.minute)
def module_to_dict(module, exclude=[]):
return dict([(x, getattr(module, x)) for x in dir(module)
if isclass(getattr(module, x))
and x not in exclude
and getattr(module, x) not in exclude])
class TimerBlock:
def __init__(self, title):
print(("{}".format(title)))
def __enter__(self):
self.start = time.clock()
return self
def __exit__(self, exc_type, exc_value, traceback):
self.end = time.clock()
self.interval = self.end - self.start
if exc_type is not None:
self.log("Operation failed\n")
else:
self.log("Operation finished\n")
def log(self, string):
duration = time.clock() - self.start
units = 's'
if duration > 60:
duration = duration / 60.
units = 'm'
print((" [{:.3f}{}] {}".format(duration, units, string)))
def log2file(self, fid, string):
fid = open(fid, 'a')
fid.write("%s\n"%(string))
fid.close()
def add_arguments_for_module(parser, module, argument_for_class, default, skip_params=[], parameter_defaults={}):
argument_group = parser.add_argument_group(argument_for_class.capitalize())
module_dict = module_to_dict(module)
argument_group.add_argument('--' + argument_for_class, type=str, default=default, choices=list(module_dict.keys()))
args, unknown_args = parser.parse_known_args()
class_obj = module_dict[vars(args)[argument_for_class]]
argspec = inspect.getargspec(class_obj.__init__)
defaults = argspec.defaults[::-1] if argspec.defaults else None
args = argspec.args[::-1]
for i, arg in enumerate(args):
cmd_arg = '{}_{}'.format(argument_for_class, arg)
if arg not in skip_params + ['self', 'args']:
if arg in list(parameter_defaults.keys()):
argument_group.add_argument('--{}'.format(cmd_arg), type=type(parameter_defaults[arg]), default=parameter_defaults[arg])
elif (defaults is not None and i < len(defaults)):
argument_group.add_argument('--{}'.format(cmd_arg), type=type(defaults[i]), default=defaults[i])
else:
print(("[Warning]: non-default argument '{}' detected on class '{}'. This argument cannot be modified via the command line"
.format(arg, module.__class__.__name__)))
# We don't have a good way of dealing with inferring the type of the argument
# TODO: try creating a custom action and using ast's infer type?
# else:
# argument_group.add_argument('--{}'.format(cmd_arg), required=True)
def kwargs_from_args(args, argument_for_class):
argument_for_class = argument_for_class + '_'
return {key[len(argument_for_class):]: value for key, value in list(vars(args).items()) if argument_for_class in key and key != argument_for_class + 'class'}
def format_dictionary_of_losses(labels, values):
try:
string = ', '.join([('{}: {:' + ('.3f' if value >= 0.001 else '.1e') +'}').format(name, value) for name, value in zip(labels, values)])
except (TypeError, ValueError) as e:
print((list(zip(labels, values))))
string = '[Log Error] ' + str(e)
return string
class IteratorTimer():
def __init__(self, iterable):
self.iterable = iterable
self.iterator = self.iterable.__iter__()
def __iter__(self):
return self
def __len__(self):
return len(self.iterable)
def __next__(self):
start = time.time()
n = next(self.iterator)
self.last_duration = (time.time() - start)
return n
next = __next__
def gpumemusage():
gpu_mem = subprocess.check_output("nvidia-smi | grep MiB | cut -f 3 -d '|'", shell=True).replace(' ', '').replace('\n', '').replace('i', '')
all_stat = [float(a) for a in gpu_mem.replace('/','').split('MB')[:-1]]
gpu_mem = ''
for i in range(len(all_stat)/2):
curr, tot = all_stat[2*i], all_stat[2*i+1]
util = "%1.2f"%(100*curr/tot)+'%'
cmem = str(int(math.ceil(curr/1024.)))+'GB'
gmem = str(int(math.ceil(tot/1024.)))+'GB'
gpu_mem += util + '--' + join(cmem, gmem) + ' '
return gpu_mem
def update_hyperparameter_schedule(args, epoch, global_iteration, optimizer):
if args.schedule_lr_frequency > 0:
for param_group in optimizer.param_groups:
if (global_iteration + 1) % args.schedule_lr_frequency == 0:
param_group['lr'] /= float(args.schedule_lr_fraction)
param_group['lr'] = float(np.maximum(param_group['lr'], 0.000001))
def save_checkpoint(state, is_best, path, prefix, filename='checkpoint.pth.tar'):
prefix_save = os.path.join(path, prefix)
name = prefix_save + '_' + filename
torch.save(state, name)
if is_best:
shutil.copyfile(name, prefix_save + '_model_best.pth.tar')
================================================
FILE: dvs/gyro/__init__.py
================================================
from .gyro_function import (
GetGyroAtTimeStamp,
QuaternionProduct,
QuaternionReciprocal,
ConvertQuaternionToAxisAngle,
FindOISAtTimeStamp,
GetMetadata,
GetProjections,
GetVirtualProjection,
GetForwardGrid,
CenterZoom,
GetWarpingFlow,
torch_norm_quat,
torch_QuaternionProduct,
torch_QuaternionReciprocal,
torch_GetVirtualProjection,
get_static,
torch_GetForwardGrid,
torch_GetWarpingFlow,
train_GetGyroAtTimeStamp,
train_ConvertQuaternionToAxisAngle,
ConvertAxisAngleToQuaternion,
torch_ConvertAxisAngleToQuaternion,
torch_ConvertQuaternionToAxisAngle,
ConvertAxisAngleToQuaternion_no_angle,
ConvertQuaternionToAxisAngle_no_angle,
torch_GetHomographyTransformFromProjections,
torch_ApplyTransform,
norm_quat,
SlerpWithDefault
)
from .gyro_io import (
LoadGyroData,
LoadOISData,
LoadFrameData,
LoadStabResult,
get_grid,
get_rotations,
visual_rotation
)
================================================
FILE: dvs/gyro/gyro_function.py
================================================
import numpy as np
from numpy import linalg as LA
import matplotlib.pyplot as plt
import torch
from torch.autograd import Variable
def get_static(height = 1080, width = 1920, ratio = 0.1):
static_options = {}
static_options["active_array_width"] = 4032
static_options["active_array_height"] = 3024
static_options["crop_window_width"] = 4032
static_options["crop_window_height"] = 2272
static_options["num_grid_rows"] = 12
static_options["num_grid_cols"] = 12
static_options["dim_homography"] = 9
static_options["width"] = width # frame width.
static_options["height"] = height # frame height
# static_options["fov"] = 1.27 # sensor_width/sensor_focal_length
static_options["cropping_ratio"] = 0.0 #ratio # normalized cropping ratio at each side.
return static_options
# Quaternion: [x, y, z, w]
def norm_quat(quat):
norm_quat = LA.norm(quat)
if norm_quat > 1e-6:
quat = quat / norm_quat
# [0 norm_quat norm_quat - 1e-6]
else:
# print('bad len for Reciprocal')
quat = np.array([0,0,0,1])
return quat
def torch_norm_quat(quat, USE_CUDA = True):
# Method 1:
batch_size = quat.size()[0]
quat_out = Variable(torch.zeros((batch_size, 4), requires_grad=True))
if USE_CUDA == True:
quat_out = quat_out.cuda()
for i in range(batch_size):
norm_quat = torch.norm(quat[i])
if norm_quat > 1e-6:
quat_out[i] = quat[i] / norm_quat
# [0 norm_quat norm_quat - 1e-6]
else:
quat_out[i,:3] = quat[i,:3] * 0
quat_out[i,3] = quat[i,3] / quat[i,3]
# Method 2:
# quat = quat / (torch.unsqueeze(torch.norm(quat, dim = 1), 1) + 1e-6) # check norm
return quat_out
def ConvertAxisAngleToQuaternion(axis, angle):
if LA.norm(axis) > 1e-6 and angle > 1e-6:
axis = axis/LA.norm(axis)
half_angle = angle*0.5
sin_half_angle = np.sin(half_angle)
quat = np.array([sin_half_angle* axis[0], sin_half_angle* axis[1], sin_half_angle* axis[2], np.cos(half_angle)])
return norm_quat(quat)
def ConvertAxisAngleToQuaternion_no_angle(axis):
angle = LA.norm(axis)
if LA.norm(axis) > 1e-6:
axis = axis/LA.norm(axis)
half_angle = angle*0.5
sin_half_angle = np.sin(half_angle)
quat = np.array([sin_half_angle* axis[0], sin_half_angle* axis[1], sin_half_angle* axis[2], np.cos(half_angle)])
return norm_quat(quat)
def torch_ConvertAxisAngleToQuaternion(axis, USE_CUDA = True):
batch_size = axis.size()[0]
angle = torch.norm(axis[:,:3], dim = 1)
half_angle = angle * 0.5
sin_half_angle = torch.sin(half_angle)
quats = Variable(torch.zeros((batch_size, 4), requires_grad=True))
norm_axis = axis[:,:3] * 1
if USE_CUDA:
quats = quats.cuda()
for i in range(batch_size):
if angle[i] > 1e-6:
norm_axis[i] = axis[i,:3]/angle[i]
quats[:, :3] = sin_half_angle * norm_axis
quats[:, 3] = torch.cos(half_angle)
return torch_norm_quat(quats)
def ConvertQuaternionToAxisAngle(quat):
quat = quat/LA.norm(quat)
axis_norm = LA.norm(quat[0:3])
axis = np.array([0.0, 0.0, 0.0])
if axis_norm < 1e-6:
angle = 0
else:
axis_norm_reciprocal = 1/axis_norm
axis[0] = quat[0] * axis_norm_reciprocal
axis[1] = quat[1] * axis_norm_reciprocal
axis[2] = quat[2] * axis_norm_reciprocal
angle = 2 * np.arccos(quat[3])
return [axis, angle]
def ConvertQuaternionToAxisAngle_no_angle(quat):
quat = quat/LA.norm(quat)
axis_norm = LA.norm(quat[0:3])
axis = np.array([0.0, 0.0, 0.0])
if axis_norm > 1e-6:
axis_norm_reciprocal = 1 / axis_norm * 2 * np.arccos(quat[3])
axis[0] = quat[0] * axis_norm_reciprocal
axis[1] = quat[1] * axis_norm_reciprocal
axis[2] = quat[2] * axis_norm_reciprocal
return axis
def torch_ConvertQuaternionToAxisAngle(quat, USE_CUDA = True):
batch_size = quat.size()[0]
axis_angle = Variable(torch.zeros((batch_size, 4), requires_grad=True))
if USE_CUDA:
axis_angle = axis_angle.cuda()
for i in range(batch_size):
axis_norm = torch.norm(quat[i, 0:3])
if axis_norm > 1e-6:
axis_norm_reciprocal = 1/axis_norm * 2 * torch.acos(quat[i,3])
axis_angle[i,0] = quat[i,0] * axis_norm_reciprocal
axis_angle[i,1] = quat[i,1] * axis_norm_reciprocal
axis_angle[i,2] = quat[i,2] * axis_norm_reciprocal
return axis_angle
def train_ConvertQuaternionToAxisAngle(quat):
out = np.zeros(4)
out[:3] = ConvertQuaternionToAxisAngle_no_angle(quat)
return out
def AngularVelocityToQuat(angular_v, dt):
length = LA.norm(angular_v)
if length < 1e-6:
angular_v = np.array([1, 0, 0])
print('bad length')
else:
angular_v = angular_v/length
quat = ConvertAxisAngleToQuaternion(angular_v, length*dt)
return quat
def QuaternionProduct(q1, q2):
x1 = q1[0]
y1 = q1[1]
z1 = q1[2]
w1 = q1[3]
x2 = q2[0]
y2 = q2[1]
z2 = q2[2]
w2 = q2[3]
quat = np.zeros(4)
quat[3] = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
quat[0] = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
quat[1] = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
quat[2] = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
return norm_quat(quat)
def torch_QuaternionProduct(q1, q2, USE_CUDA = True):
x1 = q1[:,0]
y1 = q1[:,1]
z1 = q1[:,2]
w1 = q1[:,3]
x2 = q2[:,0]
y2 = q2[:,1]
z2 = q2[:,2]
w2 = q2[:,3]
batch_size = q1.size()[0]
quat = Variable(torch.zeros((batch_size, 4), requires_grad=True))
if USE_CUDA == True:
quat = quat.cuda()
quat[:,3] = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
quat[:,0] = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
quat[:,1] = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
quat[:,2] = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
quat = torch_norm_quat(quat)
return quat
def ProcessGyroRotation(gyro_data):
num_inputs = np.shape(gyro_data)[0]
quats = np.zeros((num_inputs, 4))
quats[0,:] = np.array([0, 0, 0, 1])
for i in range(1, num_inputs):
dt = (gyro_data[i, 0] - gyro_data[i-1, 0])*1e-9
quat = AngularVelocityToQuat(gyro_data[i, 1:4], dt)
quats[i,:] = QuaternionProduct(quat, quats[i-1,:]) # R_t = delta R_t * R_t-1
quats[i,:] = quats[i,:] / LA.norm(quats[i,:])
return quats
def QuaternionReciprocal(q):
quat = np.array([-q[0], -q[1], -q[2], q[3]])
return norm_quat(quat)
def torch_QuaternionReciprocal(q, USE_CUDA = True):
quat = torch.cat((-q[:,0:1], -q[:,1:2], -q[:,2:3], q[:,3:]), dim = 1)
batch_size = quat.size()[0]
quat = torch_norm_quat(quat)
return quat
def ProcessGyroData(gyro_data):
quats = ProcessGyroRotation(gyro_data)
size = np.shape(gyro_data)[0]
axis_dif = np.zeros((size,3))
for i in range(1, size):
quat_dif = QuaternionProduct(quats[i,:], QuaternionReciprocal(quats[i-1,:]))
[axis_dif_cur, angles_cur] = ConvertQuaternionToAxisAngle(quat_dif)
axis_dif[i,:] = axis_dif_cur*angles_cur
return [axis_dif, quats]
def SlerpWithDefault(q1, q2, t, q_default):
t = max(min(t, 1.0), 0.0)
kEpsilon = 1e-6
kSlerpLinearThresh = 0.9995
q1 = q1/LA.norm(q1)
q2 = q2/LA.norm(q2)
if t < kEpsilon:
q3 = q1
return q3
elif t > 1-kEpsilon:
q3 = q2
return q3
dot_prodcut = np.sum(q1*q2)
if abs(dot_prodcut) >= 1:
q3= q_default
return q3
elif abs(dot_prodcut) > kSlerpLinearThresh:
q3 = q1*(1-t) + q2*t
q3 = q3/LA.norm(q3)
return q3
sign = 1
if dot_prodcut < 0:
sign = -1
dot_prodcut = -dot_prodcut
theta = np.arccos(dot_prodcut)
sin_theta = np.sin(theta)
inv_sin_theta = 1.0 / sin_theta
coeff1 = np.sin((1.0 - t) * theta) * inv_sin_theta
coeff2 = sign * np.sin(t * theta) * inv_sin_theta
q3 = q1 * coeff1 + q2 * coeff2
return q3
def GetGyroAtTimeStamp(gyro_data, timestamp):
z = np.array([0,0,0,1])
if len(gyro_data) >= 2 and (not(timestamp < gyro_data[0,0] or timestamp > gyro_data[-1, 0])):
ind = np.where(gyro_data[:,0] >= timestamp)
ind = np.squeeze( ind, axis = 0)
if gyro_data[ind[0], 0] == timestamp:
z = gyro_data[ind[0],1:]
else:
start_index = ind[0] -1
end_index = ind[0]
ratio = (timestamp - gyro_data[start_index,0])/(gyro_data[end_index,0]-gyro_data[start_index,0])
z = SlerpWithDefault(gyro_data[start_index,1:], gyro_data[end_index, 1:], ratio, gyro_data[start_index,1:])
z = z / (LA.norm(z) + 1e-6)
return z
def train_GetGyroAtTimeStamp(gyro_data, timestamp, check = False):
if len(gyro_data) >= 2 and (not(timestamp < gyro_data[0,0] or timestamp > gyro_data[-1, 0])):
ind = np.where(gyro_data[:,0] >= timestamp)
ind = np.squeeze( ind, axis = 0)
if gyro_data[ind[0], 0] == timestamp:
z = gyro_data[ind[0],1:]
else:
start_index = ind[0] -1
end_index = ind[0]
ratio = (timestamp - gyro_data[start_index,0])/(gyro_data[end_index,0]-gyro_data[start_index,0])
z = SlerpWithDefault(gyro_data[start_index,1:], gyro_data[end_index, 1:], ratio, gyro_data[start_index,1:])
return z / (LA.norm(z) + 1e-6)
if check:
print("bad value")
return None
def FindOISAtTimeStamp(ois_log, time):
ois_time = ois_log[:,2]
if time <= ois_time[0]:
ois_data = ois_log[0, 0:2]
elif time > ois_time[-1]:
ois_data = ois_log[-1, 0:2]
else:
ind = np.where(ois_time >= time)
ind = np.squeeze( ind, axis = 0)
first_ind = ind[0]
if ois_time[first_ind] == ind[0]:
ois_data = ois_log[first_ind, 0:2]
else:
cur_time = ois_time[first_ind]
last_timestamp = ois_time[first_ind - 1]
ratio = (time - last_timestamp) / (cur_time - last_timestamp)
ois_data = ois_log[first_ind - 1,0:2] * (1-ratio) + ois_log[first_ind,0:2]*ratio
return ois_data
def GetMetadata(frame_data, frame_index, result_poses = {} ):
# global static_options
# We can just use 1.27 as fov and virtual fov for videos in the data set.
metadata = {}
metadata["frame_id"] = frame_index
metadata["timestamp_ns"] = frame_data[frame_index, 0]
metadata["timestamp_ois_ns"] = frame_data[frame_index, 4]
metadata["rs_time_ns"] = frame_data[frame_index, 3]
if "real fov" in result_poses:
metadata["fov"] = result_poses['real fov'][frame_index,:]
else:
metadata["fov"] = 1.27
if "virtual fov" in result_poses:
metadata["virtual_fov"] = result_poses['virtual fov'][frame_index,:]
else:
metadata["virtual_fov"] = 1.27
return metadata
def GetProjections(static_options, metadata, quats_data, ois_data, no_shutter = False):
num_rows = static_options["num_grid_rows"]
real_projections = []
for i in range(num_rows):
if no_shutter:
timestmap_ns = metadata["timestamp_ns"] + metadata["rs_time_ns"] * 0.5
timestamp_ois_ns = metadata["timestamp_ois_ns"] + metadata["rs_time_ns"] * 0.5
else:
timestmap_ns = metadata["timestamp_ns"] + metadata["rs_time_ns"] * i / (num_rows-1)
timestamp_ois_ns = metadata["timestamp_ois_ns"] + metadata["rs_time_ns"] * i / (num_rows-1)
real_projections.append(GetRealProjection(
static_options, quats_data, ois_data, metadata["fov"], timestmap_ns, timestamp_ois_ns))
return real_projections
def GetRealProjection(static_options, quats_data, ois_data, fov, timestamp_ns, timestamp_ois_ns):
quat = GetGyroAtTimeStamp(quats_data, timestamp_ns)
ois_offset = FindOISAtTimeStamp(ois_data, timestamp_ois_ns)
# ois is w.r.t. active array size, thus we need to convert it to normalzied space.
ois_offset = np.array(ois_offset) / np.array([static_options["crop_window_width"], static_options["crop_window_height"]])
projection = GetProjectionHomography(quat, fov, ois_offset, static_options["width"], static_options["height"])
return projection
def GetProjectionHomography(rot, fov, offset, width, height):
# rot: rotation in quaternion
# fov: sensor_width / focal_length.
# offset: additional ois offset at normalized domain.
# width/height: frame size.
focal_length = width / fov
rotation = ConvertQuaternionToRotationMatrix(rot)
intrinsics = GetIntrinsics(focal_length, offset, width, height)
projection_homography = np.matmul(intrinsics, rotation)
return projection_homography
def torch_GetProjectionHomography(rot, fov, width, height, USE_CUDA = True):
# rot: rotation in quaternion
# fov: sensor_width / focal_length.
# offset: additional ois offset at normalized domain.
# width/height: frame size.
focal_length = width / fov
rotation = torch_ConvertQuaternionToRotationMatrix(rot)
batch_size = rotation.size()[0]
offset = np.array([0,0])
intrinsics = GetIntrinsics(focal_length, offset, width, height)
intrinsics = torch.Tensor(np.repeat(np.expand_dims(intrinsics, axis = 0), batch_size, axis = 0))
if USE_CUDA == True:
intrinsics = intrinsics.cuda()
projection_homography = torch.matmul(intrinsics, rotation)
return projection_homography
def ConvertQuaternionToRotationMatrix(quat):
x = quat[0]
y = quat[1]
z = quat[2]
w = quat[3]
rotation = np.zeros(9)
rotation[0] = 1 - 2 * y * y - 2 * z * z
rotation[1] = 2 * x * y - 2 * z * w
rotation[2] = 2 * x * z + 2 * y * w
rotation[3] = 2 * x * y + 2 * z * w
rotation[4] = 1 - 2 * x * x - 2 * z * z
rotation[5] = 2 * y * z - 2 * x * w
rotation[6] = 2 * x * z - 2 * y * w
rotation[7] = 2 * y * z + 2 * x * w
rotation[8] = 1 - 2 * x * x - 2 * y * y
rotation = np.reshape(rotation, (3, 3)) # Note reshape is different with matlab
return rotation
def torch_ConvertQuaternionToRotationMatrix(quat, USE_CUDA = True):
x = quat[:,0]
y = quat[:,1]
z = quat[:,2]
w = quat[:,3]
batch_size = quat.size()[0]
rotation = Variable(torch.zeros((batch_size, 9), requires_grad=True))
if USE_CUDA == True:
rotation = rotation.cuda()
rotation[:,0] = 1 - 2 * y * y - 2 * z * z
rotation[:,1] = 2 * x * y - 2 * z * w
rotation[:,2] = 2 * x * z + 2 * y * w
rotation[:,3] = 2 * x * y + 2 * z * w
rotation[:,4] = 1 - 2 * x * x - 2 * z * z
rotation[:,5] = 2 * y * z - 2 * x * w
rotation[:,6] = 2 * x * z - 2 * y * w
rotation[:,7] = 2 * y * z + 2 * x * w
rotation[:,8] = 1 - 2 * x * x - 2 * y * y
rotation = rotation.view(batch_size, 3, 3) # Note reshape is different with matlab
return rotation
def ConvertRotationMatrixToQuaternion(m):
tr = m[0,0] + m[1,1] + m[2,2]
if tr > 0 :
S = 2 * (tr+1.0)**0.5
qw = 0.25 * S
qx = (m[2,1] - m[1,2]) / S
qy = (m[0,2] - m[2,0]) / S
qz = (m[1,0] - m[0,1]) / S
elif m[0,0] > m[1,1] and m[0,0] > m[2,2]:
S = 2* (1.0 + m[0,0] - m[1,1] - m[2,2]) ** 0.5
qw = (m[2,1] - m[1,2]) / S
qx = 0.25 * S
qy = (m[0,1] + m[1,0]) / S
qz = (m[0,2] + m[2,0]) / S
elif m[1,1] > m[2,2]:
S = 2* (1.0 - m[0,0] + m[1,1] - m[2,2]) ** 0.5
qw = (m[0,2] - m[2,0]) / S
qx = (m[0,1] + m[1,0]) / S
qy = 0.25 * S
qz = (m[1,2] + m[2,1]) / S
else:
S = 2* (1.0 - m[0,0] - m[1,1] + m[2,2]) ** 0.5
qw = (m[1,0] - m[0,1]) / S
qx = (m[0,2] + m[2,0]) / S
qy = (m[1,2] + m[2,1]) / S
qz = 0.25 * S
return np.array([qx,qy,qz,qw])
def GetIntrinsics(focal_length, offset, width, height):
intrinsics = [
[float(focal_length), 0.0, 0.5*(width-1)+offset[0]*width],
[0.0, float(focal_length), 0.5*(height-1)+offset[1]*height],
[0.0, 0.0, 1.0]
]
return np.array(intrinsics)
def GetVirtualProjection(static_options, result_pose, metadata, frame_index):
# debug only, for getting results and references for comparisons.
quat = result_pose['virtual pose'][frame_index,:]
if 'vitual lens offset' in result_pose:
virutal_lens_offset = result_pose['vitual lens offset'][frame_index,:]
else:
virutal_lens_offset = np.array([0,0])
virtual_projection = GetProjectionHomography(
quat, metadata["virtual_fov"], virutal_lens_offset, static_options["width"], static_options["height"])
return virtual_projection
def torch_GetVirtualProjection(static_options, quat, virtual_fov = 1.27):
virtual_projection = torch_GetProjectionHomography(
quat, virtual_fov, static_options["width"], static_options["height"])
return virtual_projection
def GetForwardGrid(static_options, real_projections, virtual_projection):
# real_projections: a set of 3x3 projections.
# virtual_projection: a single 3x3 projection.
grid = np.zeros((4, static_options["num_grid_cols"], static_options["num_grid_rows"]))
width = static_options["width"]
height = static_options["height"]
row_step = 1/ (static_options["num_grid_rows"] - 1)
col_step = 1/ (static_options["num_grid_cols"] - 1)
for i in range(static_options["num_grid_rows"]):
transform = GetHomographyTransformFromProjections(real_projections[i], virtual_projection)
v = i * row_step
for j in range(static_options["num_grid_cols"]):
u = j * col_step
point = np.array([u * width, v * height, 1]).T
warped_point = ApplyTransform(transform, point)
warped_point = warped_point / np.array([width, height, 1]) # normalize
grid[:, j, i] = np.array([warped_point[0], warped_point[1], u, v])
return grid
def torch_GetForwardGrid(static_options, real_projections, virtual_projection, USE_CUDA = True):
# real_projections: a set of 3x3 projections.
# virtual_projection: a single 3x3 projection.
batch_size = real_projections.size()[0]
grid = torch.zeros((batch_size, 4, static_options["num_grid_cols"], static_options["num_grid_rows"]))
if USE_CUDA:
grid = grid.cuda()
width = static_options["width"]
height = static_options["height"]
row_step = 1/ (static_options["num_grid_rows"] - 1)
col_step = 1/ (static_options["num_grid_cols"] - 1)
for i in range(static_options["num_grid_rows"]):
transform = torch_GetHomographyTransformFromProjections(real_projections[:, i], virtual_projection)
v = i * row_step
for j in range(static_options["num_grid_cols"]):
u = j * col_step
point = torch.Tensor([u * width, v * height, 1])
norm = torch.Tensor([width, height, 1])
if USE_CUDA == True:
point = point.cuda()
norm = norm.cuda()
warped_point = torch_ApplyTransform(transform, point)
warped_point = warped_point / norm # normalize
grid[:, 0, j, i] = warped_point[:,0]
grid[:, 1, j, i] = warped_point[:,1]
grid[:, 2, j, i] = u
grid[:, 3, j, i] = v
return grid
def GetWarpingFlow(real_projections_src, real_projections_dst, num_rows, num_cols, frame_width, frame_height):
# num_rows: rows of the flow.
# num_cols: cols of the flow.
grid = np.zeros((4, num_cols, num_rows))
row_step = 1/ (num_rows - 1)
col_step = 1/ (num_cols - 1)
for i in range(num_rows):
transform = GetHomographyTransformFromProjections(real_projections_src[i], real_projections_dst[i])
v = i * row_step
for j in range(num_cols):
u = j * col_step
point = np.array([u * frame_width, v * frame_height, 1]).T
warped_point = ApplyTransform(transform, point)
warped_point = warped_point / np.array([frame_width, frame_height, 1]) # normalize
grid[:, j, i] = np.array([warped_point[0], warped_point[1], u, v])
return grid
def torch_GetWarpingFlow(static_options, real_projections_src, real_projections_dst, USE_CUDA = True):
# real_projections: a set of 3x3 projections.
# virtual_projection: a single 3x3 projection.
batch_size = real_projections_src.size()[0]
grid = torch.zeros((batch_size, 4, static_options["num_grid_cols"], static_options["num_grid_rows"]))
if USE_CUDA:
grid = grid.cuda()
width = static_options["width"]
height = static_options["height"]
row_step = 1/ (static_options["num_grid_rows"] - 1)
col_step = 1/ (static_options["num_grid_cols"] - 1)
for i in range(static_options["num_grid_rows"]):
transform = torch_GetHomographyTransformFromProjections(real_projections_src[:, i], real_projections_dst[:, i])
v = i * row_step
for j in range(static_options["num_grid_cols"]):
u = j * col_step
point = torch.Tensor([u * width, v * height, 1])
norm = torch.Tensor([width, height, 1])
if USE_CUDA == True:
point = point.cuda()
norm = norm.cuda()
warped_point = torch_ApplyTransform(transform, point)
warped_point = warped_point / norm # normalize
grid[:, 0, j, i] = warped_point[:,0]
grid[:, 1, j, i] = warped_point[:,1]
grid[:, 2, j, i] = u
grid[:, 3, j, i] = v
return grid
def GetHomographyTransformFromProjections(proj_src, proj_dst):
return np.matmul(proj_dst, LA.inv(proj_src))
def torch_GetHomographyTransformFromProjections(proj_src, proj_dst):
return torch.matmul(proj_dst, torch.inverse(proj_src))
def ApplyTransform(transform, point):
# Warps a 2D point ([x y 1]) using a homography transform.
# Returns the warped 2D point ([warped_x, warped_y, 1]).
z = np.matmul(transform, point)
z = z / z[2]
return z
def torch_ApplyTransform(transform, point):
# Warps a 2D point ([x y 1]) using a homography transform.
# Returns the warped 2D point ([warped_x, warped_y, 1]).
z = torch.matmul(transform, point)
z = z / z[:,2:]
return z
def CenterZoom(grid, ratio):
grid[:, 0:2, :, :] = (grid[:, 0:2, :, :] - 0.5) * ratio + 0.5
return grid
================================================
FILE: dvs/gyro/gyro_io.py
================================================
import numpy as np
from numpy import linalg as LA
import matplotlib.pyplot as plt
import scipy.io as sio
from .gyro_function import (
ProcessGyroData, QuaternionProduct, QuaternionReciprocal,
ConvertQuaternionToAxisAngle, FindOISAtTimeStamp, GetMetadata,
GetProjections, GetVirtualProjection, GetForwardGrid,
CenterZoom, GetGyroAtTimeStamp, get_static, ConvertAxisAngleToQuaternion,
ConvertAxisAngleToQuaternion_no_angle, ConvertQuaternionToAxisAngle_no_angle
)
def load_gyro_mesh(input_name):
data = LoadStabResult(input_name)
w, h = data["vertex_grid_size"][0]
data["warping grid"] = np.reshape(data["warping grid"],(-1,int(w),int(h),4))
return data
def get_grid(static_options, frame_data, quats_data, ois_data, virtual_data, no_shutter = False):
grid = []
result_poses = {}
result_poses['virtual pose'] = virtual_data
for i in range(len(virtual_data)):
metadata = GetMetadata(frame_data, i)
real_projections = GetProjections(static_options, metadata, quats_data, ois_data, no_shutter = no_shutter)
virtual_projection = GetVirtualProjection(static_options, result_poses, metadata, i)
grid.append(GetForwardGrid(static_options, real_projections, virtual_projection))
grid = np.array(grid)
zoom_ratio = 1 / (1 - 2 * static_options["cropping_ratio"])
curr_grid = CenterZoom(grid, zoom_ratio)
curr_grid = np.transpose(curr_grid,(0,3,2,1))
return curr_grid
def get_rotations(frame_data, quats_data, ois_data, num_frames):
quats = np.zeros((num_frames, 4))
for i in range(num_frames):
quats[i,:] = GetGyroAtTimeStamp(quats_data, frame_data[i,0])
rotations = np.zeros((num_frames,3))
lens_offsets = np.zeros((num_frames, 2))
for i in range(num_frames):
if i != 0:
quat_dif = QuaternionProduct(quats[i,:], QuaternionReciprocal(quats[i-1,:]))
axis_dif_cur = ConvertQuaternionToAxisAngle_no_angle(quat_dif)
rotations[i,:] = axis_dif_cur
lens_offsets[i,:] = FindOISAtTimeStamp(ois_data, frame_data[i, 4])
return rotations, lens_offsets
def visual_rotation(rotations_real, lens_offsets_real, rotations_virtual, lens_offsets_virtual, rotations_virtual2, lens_offsets_virtual2, path):
# figure('units','normalized','outerposition',[0 0 1 1])
plt.clf()
plt.figure(figsize=(8,16))
plt.subplot(5,1,1)
plt.plot(rotations_real[:,0], "g")
if rotations_virtual is not None:
plt.plot(rotations_virtual[:,0], "b")
if rotations_virtual2 is not None:
plt.plot(rotations_virtual2[:,0], "r")
plt.ylim(-0.02, 0.02)
plt.xlabel('frame id')
plt.ylabel('gyro x')
plt.subplot(5,1,2)
plt.plot(rotations_real[:,1], "g")
if rotations_virtual is not None:
plt.plot(rotations_virtual[:,1], "b")
if rotations_virtual2 is not None:
plt.plot(rotations_virtual2[:,1], "r")
plt.ylim(-0.02, 0.02)
plt.xlabel('frame id')
plt.ylabel('gyro y')
plt.subplot(5,1,3)
plt.plot(rotations_real[:,2], "g")
if rotations_virtual is not None:
plt.plot(rotations_virtual[:,2], "b")
if rotations_virtual2 is not None:
plt.plot(rotations_virtual2[:,2], "r")
plt.ylim(-0.02, 0.02)
plt.xlabel('frame id')
plt.ylabel('gyro z')
plt.subplot(5,1,4)
plt.plot(lens_offsets_real[:,0], "g")
if lens_offsets_virtual is not None:
plt.plot(lens_offsets_virtual[:,0], "b")
if rotations_virtual2 is not None:
plt.plot(lens_offsets_virtual2[:,0], "r")
plt.xlabel('frame id')
plt.ylabel('ois x')
plt.subplot(5,1,5)
plt.plot(lens_offsets_real[:,1], "g")
if lens_offsets_virtual is not None:
plt.plot(lens_offsets_virtual[:,1], "b")
if rotations_virtual2 is not None:
plt.plot(lens_offsets_virtual2[:,1], "r")
plt.xlabel('frame id')
plt.ylabel('ois y')
plt.savefig(path[:-4]+".jpg")
return
def LoadOISData(ois_name):
ois_log = np.loadtxt(ois_name)
ois_log = ois_log[:, -3:]
return ois_log
def LoadFrameData(frame_log_name):
frame_data = np.loadtxt(frame_log_name)
frame_data[:, [0,4]] = frame_data[:, [0,4]] - np.expand_dims(frame_data[:,1]/2, axis = 1)
return frame_data
def LoadGyroData(gyro_log_name):
raw_gyro_data = np.loadtxt(gyro_log_name)
raw_gyro_data[:,0] = raw_gyro_data[:,0] * 1000
raw_gyro_data = raw_gyro_data[:,[0, 2, 1, 3]]
[_, quats_data] = ProcessGyroData(raw_gyro_data)
quats_data = np.concatenate((raw_gyro_data[:, 0, None], quats_data), axis = 1)
return quats_data
def LoadStabResult(input_name):
fid = open(input_name)
data = {}
while True:
name, val = ReadLine(fid)
if name == None:
break
if name in data:
data[name] = np.concatenate((data[name], val), axis=0)
else:
data[name] = val
fid.close()
print("Mesh length: ", len(list(data.values())[0]))
return data
def ReadLine(fid):
name = ''
val = 0
tline = fid.readline()
if len(tline) == 0:
return None, None
if tline[-1] == "\n":
tline = tline[:-1]
ind = tline.find(':')
name = tline[:ind]
tmp_val= str2num(tline[ind+1:])
if len(tmp_val) > 0:
val = tmp_val
else:
tline = fid.readline()
if tline[-1] == "\n":
tline = tline[:-1]
val = str2num(tline)
return name, np.expand_dims(np.array(val), axis=0)
def str2num(string):
nums = string.split(" ")
nums = [float(_) for _ in nums if _ != ""]
return nums
================================================
FILE: dvs/inference.py
================================================
import os
import sys
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
import time
import yaml
import argparse
import numpy as np
from printer import Printer
from dataset import get_data_loader, get_inference_data_loader
from model import Model
import datetime
import copy
from util import make_dir, get_optimizer, norm_flow
from gyro import (
get_grid,
get_rotations,
visual_rotation,
torch_QuaternionProduct,
torch_norm_quat
)
from warp import warp_video
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def run(model, loader, cf, USE_CUDA=True):
no_flo = False
number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"]
model.net.eval()
model.unet.eval()
activation = nn.Softshrink(0.0006) # 0.0036
for i, data in enumerate(loader, 0):
# get the inputs; data is a list of [inputs, labels]
real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data
print("Fininsh Load data")
real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4]
real_projections = real_projections.type(torch.float)
flo = flo.type(torch.float)
flo_back = flo_back.type(torch.float)
ois = ois.type(torch.float)
batch_size, step, dim = real_inputs.size()
times = times.numpy()
real_queue_idx = real_queue_idx.numpy()
virtual_queue = [None] * batch_size
run_loss = 0
model.net.init_hidden(batch_size)
count = 0
for j in range(step):
if (j+1) % 100 == 0:
print("Step: "+str(j+1)+"/"+str(step))
virtual_inputs, vt_1 = loader.dataset.get_virtual_data(
virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j])
real_inputs_step = real_inputs[:,j,:]
inputs = torch.cat((real_inputs_step,virtual_inputs), dim = 1)
# inputs = Variable(real_inputs_step)
if USE_CUDA:
real_inputs_step = real_inputs_step.cuda()
virtual_inputs = virtual_inputs.cuda()
inputs = inputs.cuda()
if no_flo is False:
flo_step = flo[:,j].cuda()
flo_back_step = flo_back[:,j].cuda()
else:
flo_step = None
flo_back_step = None
vt_1 = vt_1.cuda()
real_projections_t = real_projections[:,j+1].cuda()
real_projections_t_1 = real_projections[:,j].cuda()
real_postion_anchor = real_postion[:,j].cuda()
ois_step = ois[:,j].cuda()
if no_flo is False:
b, h, w, _ = flo_step.size()
flo_step = norm_flow(flo_step, h, w)
flo_back_step = norm_flow(flo_back_step, h, w)
with torch.no_grad():
if no_flo is False:
flo_out = model.unet(flo_step, flo_back_step)
else:
flo_out = None
if j < 1:
for i in range(2):
out = model.net(inputs, flo_out, ois_step)
else:
out = model.net(inputs, flo_out, ois_step)
real_position = real_inputs_step[:,40:44]
virtual_position = virtual_inputs[:, -4:]
out[:, :3] = activation(out[:, :3])
out = torch_norm_quat(out)
pos = torch_QuaternionProduct(virtual_position, real_postion_anchor)
loss_step = model.loss(out, vt_1, virtual_inputs, real_inputs_step, \
flo_step, flo_back_step, real_projections_t, real_projections_t_1, real_postion_anchor, \
follow = True, optical = True, undefine = True)
run_loss += loss_step
out = torch_QuaternionProduct(out, pos)
if USE_CUDA:
out = out.cpu().detach().numpy()
virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1])
run_loss /= step
print( "\nLoss: follow, angle, smooth, c2_smooth, undefine, optical")
print(run_loss.cpu().numpy()[:-1], "\n")
return np.squeeze(virtual_queue, axis=0)
def inference(cf, data_path, USE_CUDA):
checkpoints_dir = cf['data']['checkpoints_dir']
checkpoints_dir = make_dir(checkpoints_dir, cf)
files = os.listdir(data_path)
for f in files:
if f[-3:] == "mp4" and "no_ois" not in f and "no_shutter" not in f and "gimbal" not in f.lower() and "grid" not in f.lower() and "flo" not in f.lower():
video_name = f[:-4]
# Define the model
model = Model(cf)
load_model = cf["model"]["load_model"]
print("------Load Pretrined Model--------")
if load_model is not None:
checkpoint = torch.load(load_model)
print(load_model)
else:
load_last = os.path.join(checkpoints_dir, cf['data']['exp']+'_last.checkpoint')
checkpoint = torch.load(load_last)
print(load_last)
model.net.load_state_dict(checkpoint['state_dict'])
model.unet.load_state_dict(checkpoint['unet'])
if USE_CUDA:
model.net.cuda()
model.unet.cuda()
print("-----------Load Dataset----------")
test_loader = get_inference_data_loader(cf, data_path, no_flo = False)
data = test_loader.dataset.data[0]
start_time = time.time()
virtual_queue= run(model, test_loader, cf, USE_CUDA=USE_CUDA)
virtual_data = np.zeros((1,5))
virtual_data[:,1:] = virtual_queue[0, 1:]
virtual_data[:,0] = data.frame[0,0]
virtual_queue = np.concatenate((virtual_data, virtual_queue), axis = 0)
print(virtual_queue.shape)
time_used = (time.time() - start_time) / 60
print("Time_used: %.4f minutes" % (time_used))
virtual_path = os.path.join("./test", cf['data']['exp'], data_path.split("/")[-1]+'.txt')
np.savetxt(virtual_path, virtual_queue, delimiter=' ')
print("------Start Warping Video--------")
grid = get_grid(test_loader.dataset.static_options, \
data.frame[:data.length], data.gyro, data.ois, virtual_queue[:data.length,1:], no_shutter = False)
return data, virtual_queue, video_name, grid
def visual_result(cf, data, video_name, virtual_queue, virtual_queue2 = None, compare_exp = None):
print("------Start Visual Result--------")
rotations_virtual, lens_offsets_virtual = get_rotations(data.frame[:data.length], virtual_queue, np.zeros(data.ois.shape), data.length)
rotations_real, lens_offsets_real = get_rotations(data.frame[:data.length], data.gyro, data.ois, data.length)
if virtual_queue2 is not None:
rotations_virtual2, lens_offsets_virtual2 = get_rotations(data.frame[:data.length], virtual_queue2, np.zeros(data.ois.shape), data.length)
path = os.path.join("./test", cf['data']['exp'], video_name+'_'+compare_exp+'.jpg')
else:
rotations_virtual2, lens_offsets_virtual2 = None, None
path = os.path.join("./test", cf['data']['exp'], video_name+'.jpg')
visual_rotation(rotations_real, lens_offsets_real, rotations_virtual, lens_offsets_virtual, rotations_virtual2, lens_offsets_virtual2, path)
def main(args = None):
config_file = args.config
dir_path = args.dir_path
cf = yaml.load(open(config_file, 'r'))
USE_CUDA = cf['data']["use_cuda"]
log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'_test.log'), 'w+')
printer = Printer(sys.stdout, log_file).open()
data_name = sorted(os.listdir(dir_path))
for i in range(len(data_name)):
print("Running Inference: " + str(i+1) + "/" + str(len(data_name)))
save_path = os.path.join("./test", cf['data']['exp'], data_name[i]+'_stab.mp4')
data_path = os.path.join(dir_path, data_name[i])
data, virtual_queue, video_name, grid= inference(cf, data_path, USE_CUDA)
virtual_queue2 = None
visual_result(cf, data, data_name[i], virtual_queue, virtual_queue2 = virtual_queue2, compare_exp = None)
video_path = os.path.join(data_path, video_name+".mp4")
warp_video(grid, video_path, save_path, frame_number = False)
return
if __name__ == '__main__':
parser = argparse.ArgumentParser("Training model")
parser.add_argument("--config", default="./conf/stabilzation.yaml", help="Config file.")
parser.add_argument("--dir_path", default="./video")
args = parser.parse_args()
main(args = args)
================================================
FILE: dvs/load_frame_sensor_data.py
================================================
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import sys
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
import time
import yaml
import argparse
import numpy as np
from printer import Printer
from dataset import get_data_loader, get_inference_data_loader
from model import Model
import datetime
import copy
from util import make_dir, get_optimizer, norm_flow
from gyro import (
get_grid,
get_rotations,
visual_rotation,
GetGyroAtTimeStamp,
torch_ConvertQuaternionToAxisAngle,
torch_ConvertAxisAngleToQuaternion,
torch_QuaternionProduct,
get_static
)
from warp import warp_video
def run(loader, cf, USE_CUDA=True):
number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"]
for i, data in enumerate(loader, 0):
# get the inputs; data is a list of [inputs, labels]
real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data
print("Fininsh Load data")
real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4]
real_projections = real_projections.type(torch.float)
batch_size, step, dim = real_inputs.size()
times = times.numpy()
real_queue_idx = real_queue_idx.numpy()
virtual_queue = [None] * batch_size
for j in range(step):
virtual_inputs, vt_1 = loader.dataset.get_virtual_data(
virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j])
real_inputs_step = real_inputs[:,j,:]
if USE_CUDA:
real_inputs_step = real_inputs_step.cuda()
virtual_inputs = virtual_inputs.cuda()
real_postion_anchor = real_postion[:,j].cuda()
out = real_inputs_step[:,40:44]
virtual_position = virtual_inputs[:, -4:]
pos = torch_QuaternionProduct(virtual_position, real_postion_anchor)
out = torch_QuaternionProduct(out, pos)
if USE_CUDA:
out = out.cpu().detach().numpy()
virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1])
return np.squeeze(virtual_queue, axis=0)
def inference(cf, data_path, USE_CUDA):
print("-----------Load Dataset----------")
test_loader = get_inference_data_loader(cf, data_path)
data = test_loader.dataset.data[0]
test_loader.dataset.no_flo = True
test_loader.dataset.static_options = get_static(ratio = 0)
start_time = time.time()
virtual_queue = run(test_loader, cf, USE_CUDA=USE_CUDA)
virtual_data = np.zeros((1,5))
virtual_data[:,1:] = virtual_queue[0, 1:]
virtual_data[:,0] = data.frame[0,0]
virtual_queue = np.concatenate((virtual_data, virtual_queue), axis = 0)
files = os.listdir(data_path)
for f in files:
if f[-3:] == "mp4" and "no_ois" not in f and "gimbal" not in f.lower():
video_name = f[:-4]
print(video_name)
virtual_path = os.path.join("./test", cf['data']['exp'], video_name+'.txt')
print("------Start Visual Result--------")
rotations_real, lens_offsets_real = get_rotations(data.frame[:data.length], data.gyro, data.ois, data.length)
fig_path = os.path.join(data_path, video_name+"_real.jpg")
visual_rotation(rotations_real, lens_offsets_real, None, None, None, None, fig_path)
return
def main(args = None):
config_file = args.config
dir_path = args.dir_path
cf = yaml.load(open(config_file, 'r'))
USE_CUDA = cf['data']["use_cuda"]
checkpoints_dir = cf['data']['checkpoints_dir']
checkpoints_dir = make_dir(checkpoints_dir, cf)
data_name = sorted(os.listdir(dir_path))
for i in range(len(data_name)):
print("Running: " + str(i+1) + "/" + str(len(data_name)))
inference(cf, os.path.join(dir_path, data_name[i]), USE_CUDA)
return
if __name__ == '__main__':
parser = argparse.ArgumentParser("Training model")
parser.add_argument("--config", default="./conf/stabilzation.yaml", help="Config file.")
parser.add_argument("--dir_path", default="./video")
args = parser.parse_args()
main(args = args)
================================================
FILE: dvs/loss.py
================================================
import torch
import numpy as np
from torch.autograd import Variable
import operator
import torch.nn.functional as F
import matplotlib.pyplot as plt
from gyro import (
torch_QuaternionProduct,
torch_QuaternionReciprocal,
get_static,
torch_GetVirtualProjection,
torch_GetForwardGrid,
torch_GetWarpingFlow,
torch_ConvertAxisAngleToQuaternion,
torch_ConvertQuaternionToAxisAngle,
torch_norm_quat,
torch_GetHomographyTransformFromProjections,
torch_ApplyTransform
)
class C2_Smooth_loss(torch.nn.Module):
def __init__(self):
super(C2_Smooth_loss, self).__init__()
self.MSE = torch.nn.MSELoss()
def forward(self, Qt, Qt_1, Qt_2):
detaQt_1 = torch_QuaternionProduct(Qt_1, torch_QuaternionReciprocal(Qt_2))
return self.MSE(Qt, detaQt_1)
class C1_Smooth_loss(torch.nn.Module):
def __init__(self):
super(C1_Smooth_loss, self).__init__()
self.MSE = torch.nn.MSELoss()
def forward(self, v_r_axis, v_axis_t_1 = None, real_postion = None):
quat_zero = torch.zeros(v_r_axis.shape).cuda()
quat_zero[:,3] = 1
return self.MSE(v_r_axis, quat_zero)
class Follow_loss(torch.nn.Module):
def __init__(self):
super(Follow_loss, self).__init__()
self.MSE = torch.nn.MSELoss()
def forward(self, virtual_quat, real_quat, real_postion = None):
if real_postion is not None:
real_quat = torch_QuaternionProduct(real_quat, real_postion)
return self.MSE(virtual_quat, real_quat)
class Stay_loss(torch.nn.Module):
def __init__(self):
super(Stay_loss, self).__init__()
self.zero = torch.tensor([0.0,0.0,0.0,1.0]).cuda()
def forward(self, virtual_quat):
return torch.mean(torch.abs(virtual_quat - self.zero))
class Angle_loss(torch.nn.Module):
def __init__(self):
super(Angle_loss, self).__init__()
self.MSE = torch.nn.MSELoss()
def forward(self, Q1, Q2, threshold = 0.5236, logistic_beta1 = 100):
batch_size = Q1.shape[0]
Q3 = torch_norm_quat(torch_QuaternionProduct(Q2, torch_QuaternionReciprocal(Q1)))
theta = torch.zeros(batch_size).cuda()
index = (Q3[:,3] < 1).nonzero()
theta[index] = torch.acos(Q3[index,3]) * 2
loss = torch.mean(theta * (1 / (1 + torch.exp(-logistic_beta1 * (theta - threshold)))))
return loss, theta
class Optical_loss(torch.nn.Module):
def __init__(self):
super(Optical_loss, self).__init__()
self.static_options = get_static()
self.mesh = get_mesh()
def forward(self, Vt, Vt_1, flo, flo_back, real_projection_t, real_projection_t_1):
virtual_projection_t = torch_GetVirtualProjection(self.static_options, Vt)
virtual_projection_t_1 = torch_GetVirtualProjection(self.static_options, Vt_1)
b, h, w = flo.size()[:3]
grid_t = torch_GetForwardGrid(self.static_options, real_projection_t, virtual_projection_t)[:,:2,:,:].permute(0,1,3,2)
grid_t = torch.nn.functional.upsample_bilinear(grid_t, size = (h, w)) # [B,C(xy),H,W]
grid_t_1 = torch_GetForwardGrid(self.static_options, real_projection_t_1, virtual_projection_t_1)[:,:2,:,:].permute(0,1,3,2)
grid_t_1 = torch.nn.functional.upsample_bilinear(grid_t_1, size = (h, w)) # [B,C(xy),H,W]
mesh = self.mesh.repeat(b, 1, 1, 1)
flo = flo + mesh
flo_back = flo_back + mesh # [B,H,W,C]
valid = (flo[:,:,:,0] > 0) * (flo[:,:,:,1] > 0) * (flo[:,:,:,0] < 1) * (flo[:,:,:,1] < 1)
valid_f = torch.unsqueeze(valid, dim = 3).type(torch.cuda.FloatTensor)
valid = torch.unsqueeze(valid, dim = 1).type(torch.cuda.FloatTensor)
valid_back = (flo_back[:,:,:,0] > 0) * (flo_back[:,:,:,1] > 0) * (flo_back[:,:,:,0] < 1) * (flo_back[:,:,:,1] < 1)
valid_back_f = torch.unsqueeze(valid_back, dim = 3).type(torch.cuda.FloatTensor)
valid_back = torch.unsqueeze(valid_back, dim = 1).type(torch.cuda.FloatTensor) # [B,C,H,W]
flo = (flo * 2 - 1) * valid_f
flo_back = (flo_back * 2 - 1) * valid_back_f
forward_t = torch.nn.functional.grid_sample(grid_t, flo, padding_mode="reflection") # default bilinear
backward_t_1 = torch.nn.functional.grid_sample(grid_t_1, flo_back, padding_mode="reflection") # default bilinear
forward_diff = ((forward_t - grid_t_1) * valid) ** 2
backward_diff = ((backward_t_1 - grid_t) * valid_back) ** 2
forward_loss = torch.sum(forward_diff, dim = (1,2,3)) / torch.sum(valid, dim = (1,2,3))
backward_loss = torch.sum(backward_diff, dim = (1,2,3)) / torch.sum(valid_back, dim = (1,2,3))
loss = forward_loss + backward_loss
loss = torch.min(loss, loss - loss + 1) #[0]
loss = torch.sum(loss) / b
return loss
def get_mesh(height = 270, width = 480, USE_CUDA = True):
xs = np.linspace(0, 1, width, endpoint = False) + 0.5 / height
ys = np.linspace(0, 1, height, endpoint = False) + 0.5 / width
xmesh, ymesh = np.meshgrid(xs, ys)
# Reshape the sampling positions to a H x W x 2 tensor
mesh = torch.Tensor(np.expand_dims(np.moveaxis(np.array(list(zip(xmesh, ymesh))), 1, 2),axis=0))
if USE_CUDA:
mesh = mesh.cuda()
return mesh
class Undefine_loss(torch.nn.Module):
def __init__(self, ratio = 0.08, inner_ratio = 0.04, USE_CUDA = True):
super(Undefine_loss, self).__init__()
self.static_options = get_static()
self.inner_ratio = inner_ratio
width = self.static_options["width"]
height = self.static_options["height"]
x0, x1, y0, y1 = \
int(width*ratio), int(width*(1-ratio)), int(height*ratio), int(height*(1-ratio))
self.norm = torch.Tensor([width, height, 1])
self.p00 = torch.Tensor([x0, y0, 1])
self.p01 = torch.Tensor([x0, y1, 1])
self.p10 = torch.Tensor([x1, y0, 1])
self.p11 = torch.Tensor([x1, y1, 1])
if USE_CUDA == True:
self.p00 = self.p00.cuda()
self.p01 = self.p01.cuda()
self.p10 = self.p10.cuda()
self.p11 = self.p11.cuda()
self.norm = self.norm.cuda()
def forward(self, Vt, Rt, ratio = 0.04):
batch_size = Vt.size()[0]
row_mid = self.static_options["num_grid_rows"] // 2
virtual_projection_t = torch_GetVirtualProjection(self.static_options, Vt)
real_projection_t = torch_GetVirtualProjection(self.static_options, Rt)
# virtual projection and real projection
transform = torch_GetHomographyTransformFromProjections(real_projection_t, virtual_projection_t)
p00 = (torch_ApplyTransform(transform, self.p00) / self.norm)[:,:2]
p01 = (torch_ApplyTransform(transform, self.p01) / self.norm)[:,:2]
p10 = (torch_ApplyTransform(transform, self.p10) / self.norm)[:,:2]
p11 = (torch_ApplyTransform(transform, self.p11) / self.norm)[:,:2]
loss = torch.stack((self.get_loss(p00), self.get_loss(p01), self.get_loss(p10), self.get_loss(p11)),dim = 1)
loss,_ = torch.max(loss, dim = 1)
loss = torch.min(loss, loss - loss + 1) #[0]
loss = torch.sum(loss) / batch_size
return loss
def get_loss(self, p):
d = (p - self.inner_ratio) * (p < self.inner_ratio).type(torch.cuda.FloatTensor) + \
(1 - self.inner_ratio - p) * (p > (1 - self.inner_ratio)).type(torch.cuda.FloatTensor)
return torch.sum(d**2, dim = 1)
================================================
FILE: dvs/metrics.py
================================================
import os
import sys
import numpy as np
import cv2
import math
import pdb
import matplotlib.pyplot as plt
from printer import Printer
from warp import video2frame_one_seq
import datetime
import torch
import copy
import csv
import copyreg
import shutil
import matplotlib.pyplot as plt
from util import crop_video
def _pickle_keypoints(point):
return cv2.KeyPoint, (*point.pt, point.size, point.angle,
point.response, point.octave, point.class_id)
copyreg.pickle(cv2.KeyPoint().__class__, _pickle_keypoints)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
h_size = 480
w_size = 640
def crop_metric(M):
points = np.array([[0,0,1],[0,h_size,1], [w_size,0,1], [w_size,h_size,1]]).T
result = np.matmul(M,points).T
result = result[:,:2]/result[:,2:]
w_out = 1 - max(result[0,0], result[1,0], w_size - result[2,0], w_size - result[3,0], 0)/w_size
h_out = 1 - max(result[0,1], result[2,1], h_size - result[1,1], h_size - result[3,1], 0)/h_size
return w_out, h_out
# https://stackoverflow.com/questions/34389125/how-to-get-the-scale-factor-of-getperspectivetransform-in-opencv
def get_scale(M):
h1 = M[0, 0]
h2 = M[0, 1]
h3 = M[0, 2]
h4 = M[1, 0]
h5 = M[1, 1]
h6 = M[1, 2]
h7 = M[2, 0]
h8 = M[2, 1]
QR = np.array([[h1-(h7*h3), h2-(h8*h3)], [h4-(h7*h6), h5-(h8*h6)]])
Q, R = np.linalg.qr(QR)
return abs(R[0,0]), abs(R[1,1])
# https://stackoverflow.com/questions/21019338/how-to-change-the-homography-with-the-scale-of-the-image
def get_rescale_matrix(M, sx, sy):
S = np.eye(3, dtype = float)
S[0,0] = sx
S[1,1] = sy
S1 = np.eye(3, dtype = float)
S1[0,0] = 1/sx
S1[1,1] = 1/sy
return np.matmul(M, S1)
# Part of code reference from https://github.com/jinsc37/DIFRINT/blob/master/metrics.py
def metrics(in_src, out_src, package, crop_scale = False, re_compute = False):
load_dic = None
if re_compute and os.path.exists(package):
print("Start load")
load_dic = torch.load(package)
print("Finish load")
dic = {
'M': None,
'CR_seq': [],
'DV_seq': [],
'SS_t': None,
'SS_r': None,
'w_crop':[],
'h_crop':[],
'distortion': [],
'count': 0,
'in_sift': {},
'out_sift': {},
'fft_t': {},
'fft_r': {}
}
if load_dic is not None:
dic["in_sift"] = load_dic["in_sift"]
dic["out_sift"] = load_dic["out_sift"]
frameList_in = sorted(os.listdir(in_src))
frameList = sorted(os.listdir(out_src))
frameList = frameList[:min(len(frameList_in),len(frameList))]
# Create brute-force matcher object
bf = cv2.BFMatcher()
# Apply the homography transformation if we have enough good matches
MIN_MATCH_COUNT = 10 #10
ratio = 0.7 #0.7
thresh = 5.0 #5.0
Pt = np.asarray([[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]])
P_seq = []
count = 1
for index, f in enumerate(frameList, 0):
if f.endswith('.png'):
# Load the images in gray scale
img1 = cv2.imread(os.path.join(in_src, f), 0)
img1 = cv2.resize(img1, (w_size,h_size), interpolation = cv2.INTER_LINEAR)
img1o = cv2.imread(os.path.join(out_src, f), 0)
img1o = cv2.resize(img1o, (w_size,h_size), interpolation = cv2.INTER_LINEAR)
sift = cv2.SIFT_create()
if f in dic["in_sift"]:
keyPoints1, descriptors1 = dic["in_sift"][f]
else:
# Detect the SIFT key points and compute the descriptors for the two images
keyPoints1, descriptors1 = sift.detectAndCompute(img1, None)
dic["in_sift"][f] = (keyPoints1, descriptors1)
if f in dic["out_sift"]:
keyPoints1o, descriptors1o = dic["out_sift"][f]
else:
keyPoints1o, descriptors1o = sift.detectAndCompute(img1o, None)
dic["out_sift"][f] = (keyPoints1o, descriptors1o)
# Match the descriptors
matches = bf.knnMatch(descriptors1, descriptors1o, k=2)
# Select the good matches using the ratio test
goodMatches = []
for m, n in matches:
if m.distance < ratio * n.distance:
goodMatches.append(m)
if len(goodMatches) > MIN_MATCH_COUNT:
# Get the good key points positions
sourcePoints = np.float32([ keyPoints1[m.queryIdx].pt for m in goodMatches ]).reshape(-1, 1, 2)
destinationPoints = np.float32([ keyPoints1o[m.trainIdx].pt for m in goodMatches ]).reshape(-1, 1, 2)
M, mask = cv2.findHomography(sourcePoints, destinationPoints, method=cv2.RANSAC, ransacReprojThreshold=thresh)
im_dst = cv2.warpPerspective(img1, M, (w_size,h_size))
cm = []
for i in range(6):
for j in range(6):
hs = int(h_size * (0.2 + 0.1 * i))
he = int(h_size * (0.3 + 0.1 * i))
ws = int(w_size * (0.2 + 0.1 * j))
we = int(w_size * (0.3 + 0.1 * j))
cm.append(np.corrcoef(img1o[hs:he, ws:we].flat, im_dst[hs:he, ws:we].flat))
dic["distortion"].append(cm)
if crop_scale:
sx, sy = get_scale(M)
M_scale = get_rescale_matrix(M, sx, sy)
w_crop, h_crop = crop_metric(M_scale)
else:
w_crop, h_crop = crop_metric(M)
dic["w_crop"].append(w_crop)
dic["h_crop"].append(h_crop)
# Obtain Scale, Translation, Rotation, Distortion value
sx = M[0, 0]
sy = M[1, 1]
scaleRecovered = math.sqrt(np.abs(sx*sy))
w, _ = np.linalg.eig(M[0:2,0:2])
w = np.sort(w)[::-1]
DV = w[1]/w[0]
#pdb.set_trace()
dic["CR_seq"].append(1.0/scaleRecovered)
dic["DV_seq"].append(DV)
# For Stability score calculation
if count < len(frameList):
f_path = f[:-9] + '%05d.png' % (int(f[-9:-4])+1)
if f_path in dic["out_sift"]:
keyPoints2o, descriptors2o = dic["out_sift"][f_path]
else:
img2o = cv2.imread(os.path.join(out_src, f_path), 0)
img2o = cv2.resize(img2o, (w_size,h_size), interpolation = cv2.INTER_LINEAR)
keyPoints2o, descriptors2o = sift.detectAndCompute(img2o, None)
dic["out_sift"][f_path] = (keyPoints2o, descriptors2o)
matches = bf.knnMatch(descriptors1o, descriptors2o, k=2)
goodMatches = []
for m, n in matches:
if m.distance < ratio * n.distance:
goodMatches.append(m)
if len(goodMatches) > MIN_MATCH_COUNT:
# Get the good key points positions
sourcePoints = np.float32([ keyPoints1o[m.queryIdx].pt for m in goodMatches ]).reshape(-1, 1, 2)
destinationPoints = np.float32([ keyPoints2o[m.trainIdx].pt for m in goodMatches ]).reshape(-1, 1, 2)
# Obtain the homography matrix
M, mask = cv2.findHomography(sourcePoints, destinationPoints, method=cv2.RANSAC, ransacReprojThreshold=thresh)
P_seq.append(np.matmul(Pt, M))
Pt = np.matmul(Pt, M)
if count % 10 ==0:
sys.stdout.write('\rFrame: ' + str(count) + '/' + str(len(frameList)))
sys.stdout.flush()
dic["count"] = count
count += 1
# Make 1D temporal signals
P_seq_t = np.asarray([1])
P_seq_r = np.asarray([1])
#pdb.set_trace()
for Mp in P_seq:
sx = Mp[0, 0]
sy = Mp[1, 1]
c = Mp[0, 2]
f = Mp[1, 2]
transRecovered = math.sqrt(c*c + f*f)
thetaRecovered = math.atan2(sx, sy) * 180 / math.pi
P_seq_t = np.concatenate((P_seq_t, [transRecovered]), axis=0)
P_seq_r = np.concatenate((P_seq_r, [thetaRecovered]), axis=0)
P_seq_t = np.delete(P_seq_t, 0)
P_seq_r = np.delete(P_seq_r, 0)
# FFT
fft_t = np.fft.fft(P_seq_t)
fft_r = np.fft.fft(P_seq_r)
fft_t = abs(fft_t)**2
fft_r = abs(fft_r)**2
fft_t = np.delete(fft_t, 0)
fft_r = np.delete(fft_r, 0)
fft_t = fft_t[:int(len(fft_t)/2)]
fft_r = fft_r[:int(len(fft_r)/2)]
dic["fft_t"] = fft_t
dic["fft_r"] = fft_r
SS_t = np.sum(fft_t[:5])/np.sum(fft_t)
SS_r = np.sum(fft_r[:5])/np.sum(fft_r)
dic["CR_seq"] = np.array(dic["CR_seq"])
dic["DV_seq"] = np.array(dic["DV_seq"])
dic["w_crop"] = np.array(dic["w_crop"])
dic["h_crop"] = np.array(dic["h_crop"])
dic["distortion"] = np.array(dic["distortion"])
dic["SS_t"] = SS_t
dic["SS_r"] = SS_r
if not (re_compute and os.path.exists(package)):
torch.save(dic, package)
DV_seq = np.absolute(dic["DV_seq"])
DV_seq = DV_seq[np.where((DV_seq >= 0.5) & (DV_seq <= 1))]
Distortion = str.format('{0:.4f}', np.nanmin(DV_seq))
Distortion_avg = str.format('{0:.4f}', np.nanmean(DV_seq))
Trans = str.format('{0:.4f}', dic["SS_t"])
Rot = str.format('{0:.4f}', dic["SS_r"])
w_crop = crop_rm_outlier(dic["w_crop"])
h_crop = crop_rm_outlier(dic["h_crop"])
FOV = str.format( '{0:.4f}', min(np.nanmin(w_crop), np.nanmin(h_crop)) )
FOV_avg = str.format( '{0:.4f}', (np.nanmean(w_crop)+np.nanmean(h_crop)) / 2 )
Correlation_avg = str.format( '{0:.4f}', np.nanmean(dic["distortion"][10:]) )
Correlation_min = str.format( '{0:.4f}', np.nanmin(dic["distortion"][10:]) )
# Print results
print('\n***Distortion value (Avg, Min):')
print(Distortion_avg +' | '+ Distortion)
print('***Stability Score (Avg, Trans, Rot):')
print(str.format('{0:.4f}', (dic["SS_t"]+dic["SS_r"])/2) +' | '+ Trans +' | '+ Rot )
print("=================")
print('***FOV ratio (Avg, Min):')
print( FOV_avg +' | '+ FOV )
print('***Correlation value (Avg, Min):')
print( Correlation_avg +' | '+ Correlation_min , "\n")
dic['in_sift'] = 0
dic['out_sift'] = 0
torch.save(dic, package[:-3]+"_light.pt")
return float(FOV)
def crop_rm_outlier(crop):
crop = np.array(crop)
crop = crop[crop >= 0.5]
return sorted(crop)[5:]
if __name__ == '__main__':
metric_path = os.path.join("./test/stabilzation/metric")
if not os.path.exists(metric_path):
os.makedirs(metric_path)
in_video = "./video/s_114_outdoor_running_trail_daytime/ControlCam_20200930_104820.mp4"
in_folder = os.path.join(metric_path, "in_frame")
if not os.path.exists(in_folder):
os.makedirs(in_folder)
print("Convert video to frames")
video2frame_one_seq(in_video, in_folder)
out_video = "./test/stabilzation/s_114_outdoor_running_trail_daytime_stab.mp4"
out_folder = os.path.join(metric_path, "out_frame")
if not os.path.exists(out_folder):
os.makedirs(out_folder)
print("Convert video to frames")
video2frame_one_seq(out_video, out_folder)
package = os.path.join(metric_path, "stabilzation.pt")
FOV = metrics(in_folder, out_folder, package)
crop_path = out_video[:-4] + "_crop.mp4"
crop_video(out_video, crop_path, FOV)
================================================
FILE: dvs/model.py
================================================
import math
import torch
from collections import OrderedDict
import torch.nn as nn
import numpy as np
import util
import yaml
import os
from loss import C2_Smooth_loss, C1_Smooth_loss, Optical_loss, Undefine_loss, Angle_loss, Follow_loss, Stay_loss
from gyro import torch_norm_quat, torch_QuaternionProduct
import torch.nn.functional as F
Activates = {"sigmoid": nn.Sigmoid, "relu": nn.ReLU, "tanh": nn.Tanh}
class LayerLSTM(nn.Module):
def __init__(self, input_size, hidden_size, bias):
super(LayerLSTM, self).__init__()
self.LSTM = nn.LSTMCell(input_size, hidden_size, bias)
self.hidden_size = hidden_size
def init_hidden(self, batch_size):
self.hx = torch.zeros((batch_size, self.hidden_size)).cuda()
self.cx = torch.zeros((batch_size, self.hidden_size)).cuda()
def forward(self, x):
self.hx, self.cx = self.LSTM(x, (self.hx, self.cx))
return self.hx
class LayerCNN(nn.Module):
def __init__(self, in_channel, out_channel, kernel_size, stride, padding, pooling_size=None,
activation_function=nn.ReLU, batch_norm=True):
super(LayerCNN, self).__init__()
self.conv = nn.Conv2d(in_channel, out_channel, kernel_size=kernel_size, stride=stride, padding=padding)
self.batch_norm = nn.BatchNorm2d(out_channel) if batch_norm else None
self.activation = activation_function(inplace=True)
if pooling_size is not None:
self.pooling = nn.MaxPool2d(pooling_size)
else:
self.pooling = None
def forward(self, x):
x = self.conv(x) #x->[batch,channel,height,width]
if self.batch_norm is not None:
x = self.batch_norm(x)
x = self.activation(x)
if self.pooling is not None:
x = self.pooling(x)
return x
class LayerFC(nn.Module):
def __init__(self, in_features, out_features, bias, drop_out=0, activation_function=nn.ReLU, batch_norm = False):
super(LayerFC, self).__init__()
self.fc = nn.Linear(in_features, out_features, bias=bias)
# self.activation = activation_function(inplace=True) if activation_function is not None else None
self.activation = activation_function() if activation_function is not None else None
self.dropout = nn.Dropout(p=drop_out,inplace=False) if drop_out else None
self.batch_norm = nn.BatchNorm1d(out_features) if batch_norm else None
def forward(self, x):
if self.dropout is not None:
x = self.dropout(x)
x = self.fc(x)
if self.batch_norm is not None:
x = self.batch_norm(x)
if self.activation is not None:
x = self.activation(x)
return x
class Net(nn.Module):
def __init__(self, cf):
super(Net, self).__init__()
self.cnn_param = cf["model"]["cnn"]
self.rnn_param = cf["model"]["rnn"]
self.fc_param = cf["model"]["fc"]
self.unit_size = 4
self.no_flo = False
if self.no_flo is False:
self._rnn_input_size = (2*cf["data"]["number_real"]+1+cf["data"]["number_virtual"]) * 4 + 64
else:
self._rnn_input_size = (2*cf["data"]["number_real"]+1+cf["data"]["number_virtual"]) * self.unit_size
#CNN Layers
cnns = []
cnn_activation = Activates[self.cnn_param["activate_function"]]
cnn_batch_norm = self.cnn_param["batch_norm"]
cnn_layer_param = self.cnn_param["layers"]
if cnn_layer_param is not None:
cnn_layers = len(cnn_layer_param)
for layer in range(cnn_layers):
in_channel = eval(cnn_layer_param[layer][0])[0]
out_channel = eval(cnn_layer_param[layer][0])[1]
kernel_size = eval(cnn_layer_param[layer][1])
stride = eval(cnn_layer_param[layer][2])
padding = eval(cnn_layer_param[layer][3])
pooling_size = eval(cnn_layer_param[layer][4])
cnn = None
cnn = LayerCNN(in_channel, out_channel, kernel_size, stride, padding, pooling_size,
activation_function=cnn_activation, batch_norm=cnn_batch_norm)
cnns.append(('%d' % layer, cnn))
self._rnn_input_size = int(math.floor((self._rnn_input_size+2*padding[1]-kernel_size[1])/stride[1])+1)
if pooling_size is not None:
self._rnn_input_size = int(math.floor((self._rnn_input_size-pooling_size[1])/pooling_size[1])+1)
self.convs = nn.Sequential(OrderedDict(cnns))
else:
self.convs = None
out_channel = cf["data"]["channel_size"]
self.gap = nn.AvgPool2d(self._rnn_input_size) if self.cnn_param["gap"] else None
self._rnn_input_size = out_channel if self.cnn_param["gap"] else out_channel*(self._rnn_input_size)
#RNN Layers
rnns = []
rnn_layer_param = self.rnn_param["layers"]
rnn_layers = len(rnn_layer_param)
for layer in range(rnn_layers):
if layer:
rnn = LayerLSTM(rnn_layer_param[layer-1][0], rnn_layer_param[layer][0], rnn_layer_param[layer][1])
else:
rnn = LayerLSTM(self._rnn_input_size, rnn_layer_param[layer][0], rnn_layer_param[layer][1])
rnns.append(('%d'%layer, rnn))
self.rnns = nn.Sequential(OrderedDict(rnns))
self._fc_input_size = rnn_layer_param[rnn_layers-1][0] #* 2 # ois
#FC Layers
fcs = []
fc_activation = Activates[self.fc_param["activate_function"]]
fc_batch_norm = self.fc_param["batch_norm"]
fc_layer_param = self.fc_param["layers"]
fc_drop_out = self.fc_param["drop_out"]
fc_layers = len(fc_layer_param)
if fc_layers == 1:
fc = LayerFC(self._fc_input_size,fc_layer_param[0][0],fc_layer_param[0][1],
fc_drop_out, None, fc_batch_norm)
fcs.append(('%d'%(fc_layers-1), fc))
else:
for layer in range(fc_layers-1):
if layer:
fc = LayerFC(fc_layer_param[layer-1][0],fc_layer_param[layer][0],fc_layer_param[layer][1],
fc_drop_out, fc_activation, fc_batch_norm)
else:
fc = LayerFC(self._fc_input_size,fc_layer_param[layer][0],fc_layer_param[layer][1],
fc_drop_out,fc_activation, fc_batch_norm)
fcs.append(('%d'%layer, fc))
fc = LayerFC(fc_layer_param[fc_layers-2][0],fc_layer_param[fc_layers-1][0],fc_layer_param[fc_layers-1][1],
fc_drop_out,None, fc_batch_norm) # Modified
fcs.append(('%d'%(fc_layers-1), fc))
self.class_num = fc_layer_param[fc_layers-1][0]
self.fcs = nn.Sequential(OrderedDict(fcs))
def init_hidden(self, batch_size):
for i in range(len(self.rnns)):
self.rnns[i].init_hidden(batch_size)
def forward(self, x, flo, ois):
b,c = x.size() #x->[batch,channel,height,width]
if self.convs is not None:
x = self.convs(x)
if self.gap is not None:
x = self.gap(x)
x = x.view(b,-1)
if self.no_flo is False:
x = torch.cat((x, flo), dim = 1)
x = self.rnns(x)
x = self.fcs(x) # [b, 4]
x = torch_norm_quat(x)
return x
class Model():
def __init__(self, cf):
super().__init__()
self.net = Net(cf)
self.unet = UNet()
self.init_weights(cf)
self.loss_smooth = C1_Smooth_loss()
self.loss_follow = Follow_loss()
self.loss_c2_smooth = C2_Smooth_loss()
self.loss_optical = Optical_loss()
self.loss_undefine = Undefine_loss(ratio = 0.08)
self.loss_angle = Angle_loss()
self.loss_stay = Stay_loss()
self.loss_smooth_w = cf["loss"]["smooth"]
self.loss_angle_w = cf["loss"]["angle"]
self.loss_follow_w = cf["loss"]["follow"]
self.loss_c2_smooth_w = cf["loss"]["c2_smooth"]
self.loss_undefine_w = cf["loss"]["undefine"]
self.loss_opt_w = cf["loss"]["opt"]
self.loss_stay_w = cf["loss"]["stay"]
self.gaussian_weight = np.array([0.072254, 0.071257, 0.068349, 0.063764, 0.057856, 0.051058, 0.043824, 0.036585, 0.029705, 0.023457, 0.01801])
def loss(
self, out, vt_1, virtual_inputs, real_inputs, flo, flo_back,
real_projections_t, real_projections_t_1, real_postion_anchor,
follow = True, undefine = True, optical = True, stay = False
):
unit_size = self.net.unit_size
mid = real_inputs.size()[1]//(2*unit_size)
Rt = real_inputs[:,unit_size*(mid):unit_size*(mid)+4]
v_pos = torch_QuaternionProduct(out, virtual_inputs[:, -4:])
r_pos = torch_QuaternionProduct(v_pos, real_postion_anchor)
loss = torch.zeros(7).cuda()
if self.loss_follow_w > 0 and follow:
for i in range(-2,3):
loss[0] += self.loss_follow_w * self.loss_follow(v_pos, real_inputs[:,unit_size*(i+mid):unit_size*(i+mid)+4], None)
if self.loss_angle_w > 0 and follow:
threshold = 6 / 180 * 3.1415926
loss_angle, theta = self.loss_angle(v_pos, Rt, threshold = threshold)
loss[1] = self.loss_angle_w * loss_angle
if self.loss_smooth_w > 0:
loss_smooth = self.loss_smooth(out)
loss[2] = self.loss_smooth_w * loss_smooth
if self.loss_c2_smooth_w > 0:
loss[3] = self.loss_c2_smooth_w * self.loss_c2_smooth(out, virtual_inputs[:, -4:], virtual_inputs[:, -8:-4])
if self.loss_undefine_w > 0 and undefine:
Vt_undefine = v_pos.clone()
for i in range(0, 10, 2):
Rt_undefine = real_inputs[:,unit_size*(mid+i):unit_size*(mid+i)+4]
loss_undefine_w = self.loss_undefine_w * self.gaussian_weight[i]
loss[4] += loss_undefine_w * self.loss_undefine(Vt_undefine, Rt_undefine)
Vt_undefine = torch_QuaternionProduct(out, Vt_undefine)
Vt_undefine = torch_QuaternionProduct(out, Vt_undefine)
if self.loss_opt_w > 0 and optical:
loss[5] = self.loss_opt_w * self.loss_optical(r_pos, vt_1, flo, flo_back, real_projections_t, real_projections_t_1)
if self.loss_stay_w > 0 and stay:
loss[6] = self.loss_stay_w * self.loss_stay(out)
return loss
def init_weights(self, cf):
for m in self.net.modules():
if isinstance(m, nn.Conv1d) or isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv3d) or isinstance(m, nn.Linear):
if cf["train"]["init"] == "xavier_uniform":
nn.init.xavier_uniform_(m.weight.data)
elif cf["train"]["init"] == "xavier_normal":
nn.init.xavier_normal_(m.weight.data)
for m in self.unet.modules():
if isinstance(m, nn.Conv1d) or isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv3d) or isinstance(m, nn.Linear):
if cf["train"]["init"] == "xavier_uniform":
nn.init.xavier_uniform_(m.weight.data)
elif cf["train"]["init"] == "xavier_normal":
nn.init.xavier_normal_(m.weight.data)
def save_checkpoint(self, epoch = 0, optimizer=None):
package = {
'cnn': self.net.cnn_param,
'fc': self.net.fc_param,
'state_dict': self.net.state_dict(),
}
if optimizer is not None:
package['optim_dict'] = optimizer.state_dict()
if self.unet is not None:
package['unet'] = self.unet.state_dict()
package["epoch"] = epoch
return package
class UNet(nn.Module):
def __init__(self, n_channels = 4, n_classes = 16, bilinear=True):
super(UNet, self).__init__()
self.n_channels = n_channels
self.n_classes = n_classes
self.bilinear = bilinear
self.inc = DoubleConv(n_channels, 8)
self.down1 = Down(8, 16)
self.down2 = Down(16, 32)
self.down3 = Down(32, 64)
# factor = 2 if bilinear else 1
self.down4 = Down(64, 128)
self._fc_input_size = 128 * 1 * 1
self.fc = LayerFC(self._fc_input_size, 64, bias = True)
def forward(self, x, x_back = None):
if x_back is not None:
x = torch.cat((x,x_back), dim =3)
x = x.permute(0,3,1,2)
b,c,h,w = x.size()
x1 = self.inc(x)
x2 = self.down1(x1)
x3 = self.down2(x2)
x4 = self.down3(x3)
x5 = self.down4(x4)
x = torch.reshape(x5, (b, -1))
x = self.fc(x)
return x
class DoubleConv(nn.Module):
"""(convolution => [BN] => ReLU) * 2"""
def __init__(self, in_channels, out_channels, mid_channels=None):
super().__init__()
if not mid_channels:
mid_channels = out_channels
self.double_conv = nn.Sequential(
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
)
def forward(self, x):
return self.double_conv(x)
class Down(nn.Module):
"""Downscaling with maxpool then double conv"""
def __init__(self, in_channels, out_channels):
super().__init__()
self.maxpool_conv = nn.Sequential(
nn.MaxPool2d(4),
DoubleConv(in_channels, out_channels)
)
def forward(self, x):
return self.maxpool_conv(x)
class Up(nn.Module):
"""Upscaling then double conv"""
def __init__(self, in_channels, out_channels, bilinear=True):
super().__init__()
# if bilinear, use the normal convolutions to reduce the number of channels
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
else:
self.up = nn.ConvTranspose2d(in_channels , in_channels // 2, kernel_size=2, stride=2)
self.conv = DoubleConv(in_channels, out_channels)
def forward(self, x1, x2):
x1 = self.up(x1)
# input is CHW
diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
diffY // 2, diffY - diffY // 2])
# if you have padding issues, see
# https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
# https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
x = torch.cat([x2, x1], dim=1)
return self.conv(x)
class OutConv(nn.Module):
def __init__(self, in_channels, out_channels):
super(OutConv, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
def forward(self, x):
return self.conv(x)
================================================
FILE: dvs/printer.py
================================================
import sys
class Printer(object):
def __init__(self, *files):
self.files = files
#Redirect Printer
def open(self):
if not hasattr(sys, '_stdout'):
sys._stdout = sys.stdout
sys.stdout = self
return self
#Restore the Default Printer
def close(self):
stdout = sys._stdout
for f in self.files:
if f != stdout:
f.close()
sys.stdout = stdout
#Overloading write() Function
def write(self, obj):
for f in self.files:
f.write(obj)
f.flush()
def flush(self):
pass
if __name__ == '__main__':
print("Start testing")
t = Printer(sys.stdout, open('./test.txt', 'w+')).open()
print("In files")
t.close()
print("Not in files")
================================================
FILE: dvs/requirements.txt
================================================
colorama==0.4.4
ffmpeg==1.4
imageio==2.9.0
matplotlib==3.3.4
opencv-contrib-python==4.5.1.48
opencv-python==4.5.1.48
pytz==2021.1
PyYAML==5.4.1
scipy==1.5.4
tensorboardX==2.1
tqdm==4.59.0
================================================
FILE: dvs/train.py
================================================
import os
import sys
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
import time
import yaml
import argparse
import numpy as np
from printer import Printer
from dataset import get_data_loader
from model import Model
import datetime
import copy
from util import make_dir, get_optimizer, AverageMeter, save_train_info, norm_flow
from gyro import torch_QuaternionProduct, torch_QuaternionReciprocal, torch_norm_quat
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def run_epoch(model, loader, cf, epoch, lr, optimizer=None, is_training=True, USE_CUDA=True, clip_norm=0):
no_flo = False
number_virtual, number_real = cf['data']["number_virtual"], cf['data']["number_real"]
avg_loss = AverageMeter()
if is_training:
model.net.train()
model.unet.train()
else:
model.net.eval()
model.unet.eval()
for i, data in enumerate(loader, 0):
# get the inputs; data is a list of [inputs, labels]
real_inputs, times, flo, flo_back, real_projections, real_postion, ois, real_queue_idx = data
print("Fininsh Load data")
real_inputs = real_inputs.type(torch.float) #[b,60,84=21*4]
real_projections = real_projections.type(torch.float)
flo = flo.type(torch.float)
flo_back = flo_back.type(torch.float)
ois = ois.type(torch.float)
batch_size, step, dim = real_inputs.size()
times = times.numpy()
real_queue_idx = real_queue_idx.numpy()
virtual_queue = loader.dataset.random_init_virtual_queue(batch_size, real_postion[:,0,:].numpy(), times[:,1]) # TODO
# virtual_queue = [None] * batch_size
loss = 0
model.net.init_hidden(batch_size)
for j in range(step):
virtual_inputs, vt_1 = loader.dataset.get_virtual_data(
virtual_queue, real_queue_idx, times[:, j], times[:, j+1], times[:, 0], batch_size, number_virtual, real_postion[:,j])
real_inputs_step = real_inputs[:,j,:]
inputs = torch.cat((real_inputs_step,virtual_inputs), dim = 1)
# inputs = Variable(real_inputs_step)
if USE_CUDA:
real_inputs_step = real_inputs_step.cuda()
virtual_inputs = virtual_inputs.cuda()
inputs = inputs.cuda()
if no_flo is False:
flo_step = flo[:,j].cuda()
flo_back_step = flo_back[:,j].cuda()
else:
flo_step = None
flo_back_step = None
vt_1 = vt_1.cuda()
real_projections_t = real_projections[:,j+1].cuda()
real_projections_t_1 = real_projections[:,j].cuda()
real_postion_anchor = real_postion[:,j].cuda()
ois_step = ois[:,j].cuda()
if no_flo is False:
b, h, w, _ = flo_step.size()
flo_step = norm_flow(flo_step, h, w)
flo_back_step = norm_flow(flo_back_step, h, w)
if is_training:
if no_flo is False:
flo_out = model.unet(flo_step, flo_back_step)
else:
flo_out = None
if j < 1:
for i in range(2):
out = model.net(inputs, flo_out, ois_step)
else:
out = model.net(inputs, flo_out, ois_step)
else:
with torch.no_grad():
if no_flo is False:
flo_out = model.unet(flo_step, flo_back_step)
else:
flo_out = None
if j < 1:
for i in range(2):
out = model.net(inputs, flo_out, ois_step)
else:
out = model.net(inputs, flo_out, ois_step)
if epoch <= 30:
follow = True
else:
follow = False
if epoch > 30:
undefine = True
else:
undefine = False
if epoch > 40:
optical = True
else:
optical = False
loss_step = model.loss(out, vt_1, virtual_inputs, real_inputs_step, \
flo_step, flo_back_step, real_projections_t, real_projections_t_1, real_postion_anchor, \
follow = follow, undefine = undefine, optical = optical, stay = optical)
loss = loss_step
virtual_position = virtual_inputs[:, -4:]
pos = torch_QuaternionProduct(virtual_position, real_postion_anchor)
out = torch_QuaternionProduct(out, pos)
if USE_CUDA:
out = out.cpu().detach().numpy()
virtual_queue = loader.dataset.update_virtual_queue(batch_size, virtual_queue, out, times[:,j+1])
if (j+1) % 10 == 0:
print("Step: "+str(j+1)+"/"+str(step))
print(loss)
loss = torch.sum(loss)
if is_training:
optimizer.zero_grad()
loss.backward(retain_graph=True)
if clip_norm:
nn.utils.clip_grad_norm_(model.net.parameters(), max_norm=clip_norm)
nn.utils.clip_grad_norm_(model.unet.parameters(), max_norm=clip_norm)
optimizer.step()
avg_loss.update(loss.item(), batch_size)
return avg_loss.avg
def train(args = None):
torch.autograd.set_detect_anomaly(True)
config_file = args.config
cf = yaml.load(open(config_file, 'r'))
USE_CUDA = cf['data']["use_cuda"]
seed = cf['train']["seed"]
torch.manual_seed(seed)
if USE_CUDA:
torch.cuda.manual_seed(seed)
checkpoints_dir = cf['data']['checkpoints_dir']
epochs = cf["train"]["epoch"]
snapshot = cf["train"]["snapshot"]
decay_epoch = cf['train']['decay_epoch']
init_lr = cf["train"]["init_lr"]
lr_decay = cf["train"]["lr_decay"]
lr_step = cf["train"]["lr_step"]
clip_norm = cf["train"]["clip_norm"]
load_model = cf["model"]["load_model"]
checkpoints_dir = make_dir(checkpoints_dir, cf)
if load_model is None:
log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'.log'), 'w+')
else:
log_file = open(os.path.join(cf["data"]["log"], cf['data']['exp']+'.log'), 'a')
printer = Printer(sys.stdout, log_file).open()
print('----Print Arguments Setting------')
for key in cf:
print('{}:'.format(key))
for para in cf[key]:
print('{:50}:{}'.format(para,cf[key][para]))
print('\n')
# Define the model
model = Model(cf)
optimizer = get_optimizer(cf["train"]["optimizer"], model, init_lr, cf)
for idx, m in enumerate(model.net.children()):
print('{}:{}'.format(idx,m))
for idx, m in enumerate(model.unet.children()):
print('{}:{}'.format(idx,m))
if load_model is not None:
print("------Load Pretrined Model--------")
checkpoint = torch.load(load_model)
model.net.load_state_dict(checkpoint['state_dict'])
model.unet.load_state_dict(checkpoint['unet'])
print("------Resume Training Process-----")
optimizer.load_state_dict(checkpoint['optim_dict'])
epoch_load = checkpoint['epoch']
print("Epoch load: ", epoch_load)
else:
epoch_load = 0
if USE_CUDA:
model.net.cuda()
model.unet.cuda()
if load_model is not None:
for state in optimizer.state.values():
for k, v in state.items():
if isinstance(v, torch.Tensor):
state[k] = v.cuda()
for param in optimizer.param_groups:
init_lr = param['lr']
print("-----------Load Dataset----------")
train_loader, test_loader = get_data_loader(cf, no_flo = False)
print("----------Start Training----------")
currentDT = datetime.datetime.now()
print(currentDT.strftime(" %Y-%m-%d %H:%M:%S"))
start_time = time.time()
if lr_step:
decay_epoch = list(range(1+lr_step, epochs+1, lr_step))
lr = init_lr
for count in range(epoch_load+1, epochs+1):
if decay_epoch != None and count in decay_epoch:
lr *= lr_decay
for param in optimizer.param_groups:
param['lr'] *= lr_decay
print("Epoch: %d, learning_rate: %.5f" % (count, lr))
train_loss = run_epoch(model, train_loader, cf, count, lr, optimizer=optimizer, clip_norm=clip_norm, is_training=True, USE_CUDA=USE_CUDA)
test_loss = run_epoch(model, test_loader, cf, count, lr, is_training=False, USE_CUDA=USE_CUDA)
time_used = (time.time() - start_time) / 60
print("Epoch %d done | TrLoss: %.4f | TestLoss: %.4f | Time_used: %.4f minutes" % (
count, train_loss, test_loss, time_used))
if count % snapshot == 0:
save_train_info("epoch", checkpoints_dir, cf, model, count, optimizer)
save_train_info("last", checkpoints_dir, cf, model, count, optimizer)
print("Model stored at epoch %d"%count)
currentDT = datetime.datetime.now()
print(currentDT.strftime(" %Y-%m-%d %H:%M:%S"))
print("------------End Training----------")
return
if __name__ == '__main__':
parser = argparse.ArgumentParser("Training model")
parser.add_argument("--config", default="./conf/stabilzation_train.yaml", help="Config file.")
args = parser.parse_args()
train(args = args)
================================================
FILE: dvs/util.py
================================================
import os
import torch
import cv2
from itertools import chain
from warp import load_video, save_video
import numpy as np
import matplotlib.pyplot as plt
from gyro import get_rotations
import shutil
def save_train_info(name, checkpoints_dir, cf, model, count, optimizer = None):
path = None
if name == "last":
path = os.path.join(checkpoints_dir, cf['data']['exp']+'_last.checkpoint')
elif name == "best":
path = os.path.join(checkpoints_dir, cf['data']['exp']+'_best.checkpoint')
else:
path = os.path.join(checkpoints_dir, cf['data']['exp']+'_epoch%d.checkpoint'%count)
torch.save(model.save_checkpoint(epoch = count, optimizer=optimizer), path)
def make_dir(checkpoints_dir ,cf):
inference_path = "./test"
if not os.path.exists(checkpoints_dir):
os.makedirs(checkpoints_dir)
if not os.path.exists(cf["data"]["log"]):
os.makedirs(cf["data"]["log"])
if not os.path.exists(inference_path):
os.makedirs(inference_path)
inference_path = os.path.join(inference_path, cf['data']['exp'])
if not os.path.exists(inference_path):
os.makedirs(inference_path)
checkpoints_dir = os.path.join(checkpoints_dir, cf['data']['exp'])
if not os.path.exists(checkpoints_dir):
os.makedirs(checkpoints_dir)
return checkpoints_dir
def get_optimizer(optimizer, model, init_lr, cf):
if optimizer == "adam":
optimizer = torch.optim.Adam(chain(model.net.parameters(), model.unet.parameters()), lr=init_lr, weight_decay=cf["train"]["weight_decay"])
elif optimizer == "sgd":
optimizer = torch.optim.SGD(chain(model.net.parameters(), model.unet.parameters()), lr=init_lr, momentum=cf["train"]["momentum"])
return optimizer
def crop_video(in_path, out_path, crop_ratio):
frame_array, fps, size = load_video(in_path)
hs = int((1-crop_ratio)*1080) + 1
he = int(crop_ratio*1080) - 1
ws = int((1-crop_ratio)*1920) + 1
we = int(crop_ratio*1920) - 1
for i in range(len(frame_array)):
frame_array[i] = cv2.resize(frame_array[i][hs:he,ws:we,:], size, interpolation = cv2.INTER_LINEAR)
save_video(out_path, frame_array, fps, size= size)
def norm_flow(flow, h, w):
if flow.shape[2] == 2:
flow[:,:,0] /= h
flow[:,:,1] /= w
else:
flow[:,:,:,0] /= h
flow[:,:,:,1] /= w
return flow
class AverageMeter(object):
def __init__(self):
self.reset()
def reset(self):
self.avg = 0
self.sum = 0
self.cnt = 0
def update(self, val, n=1):
self.sum += val * n
self.cnt += n
if self.cnt > 0:
self.avg = self.sum / self.cnt
================================================
FILE: dvs/warp/__init__.py
================================================
from .warping import (
warp_video
)
from .read_write import (
save_video,
load_video,
video2frame_one_seq
)
================================================
FILE: dvs/warp/rasterizer.py
================================================
import numpy as np
import matplotlib.pyplot as plt
from numpy import array
import torch
import cv2
import time
device = torch.device("cuda")
def Rasterization(image, grid, get_mesh_only = False):
# grid xy WH
shape = image.size()
height = shape[1]
width = shape[2]
wapper_upper_triangle, wapper_lower_triangle = grid_to_triangle(grid[:,:,:2])
origin_upper_triangle, origin_lower_triangle = grid_to_triangle(grid[:,:,2:])
[xmax, xmin, ymax, ymin], xlength, ylength = grid_size(wapper_upper_triangle, wapper_lower_triangle, height, width)
xratio = xlength / width
yratio = ylength / height
wapper_triangle = torch.stack((wapper_upper_triangle,wapper_lower_triangle),dim = 1).to(device) # grid * upper/lower * point * xy
origin_triangle = torch.stack((origin_upper_triangle,origin_lower_triangle),dim = 1).to(device) # grid * upper/lower * point * xy
tran_triangle = torch.zeros(wapper_triangle.size()).to(device)
tran_triangle[:,:,:,0] = (wapper_triangle[:,:,:,0] - xmin.view(-1,1,1).to(device)/width) / xratio
tran_triangle[:,:,:,1] = (wapper_triangle[:,:,:,1] - ymin.view(-1,1,1).to(device)/height) / yratio
mask = triangle2mask(tran_triangle, ylength, xlength) # consuming
mask = torch.unsqueeze(mask, 4)
origin_triangle = torch.unsqueeze(origin_triangle, 1)
grid_sample = origin_triangle * mask # consuming
grid_sample = torch.sum(torch.sum(grid_sample, dim = 3), dim = 2).view(-1,ylength,xlength,2) # consuming
gxmin = min(0, int(torch.min(xmin)))
gxmax = int(torch.max(xmin) + xlength)
gymin = min(0, int(torch.min(ymin)))
gymax = int(torch.max(ymin) + ylength)
grid_merge = torch.zeros((max(gymax-gymin, height, height - gymin),max(gxmax - gxmin, width, width - gxmin),2)).to(device)
for i in range(grid_sample.size()[0]):
x_s = int(xmin[i] - gxmin)
x_e = int(xmin[i] + xlength - gxmin)
y_s = int(ymin[i] - gymin)
y_e = int(ymin[i] + ylength -gymin)
grid_merge[ y_s:y_e, x_s:x_e, :] += grid_sample[i, :, :, :]
# grid_merge = grid_merge[min(-gxmin,0):min(-gxmin,0)+height, min(-gymin,0):min(-gymin,0)+width, :]
grid_merge = grid_merge[-gymin:-gymin+height, -gxmin:-gxmin+width, :]
# if get_mesh_only:
# grid_merge = grid_merge.cpu().numpy()
# mesh_grid = generate_mesh_grid(height, width)
# out = grid_merge - mesh_grid
# return np.concatenate((out[:,:,1:],out[:,:,:1]),2)
shift = torch.tensor([0.5/height,0.5/width])[None, None, :].to(device)
grid_merge = (grid_merge + 1*shift) * 2 - 1
image[:3,:2,:2] = 0
image = torch.unsqueeze(image, 0).to(device)
grid_merge = torch.unsqueeze(grid_merge, 0)
image = torch.nn.functional.grid_sample(image, grid_merge) # default bilinear
image = torch.squeeze(image, 0)
return image.cpu()
def grid_to_triangle(grid):
grid_shape = grid.size()
num = (grid_shape[0] - 1) * (grid_shape[1] - 1)
upper_triangle = grid[:-1, :-1, :, None]
upper_triangle = torch.cat(( upper_triangle, grid[1:, :-1, :, None]), dim = 3)
upper_triangle = torch.cat(( upper_triangle, grid[:-1, 1:, :, None]), dim = 3)
upper_triangle = upper_triangle.view(num, 2, 3)
upper_triangle = torch.transpose(upper_triangle, 1, 2) # grid * point * xy
lower_triangle = grid[:-1, 1:, :, None]
lower_triangle = torch.cat(( lower_triangle, grid[1:, :-1, :, None]), dim = 3)
lower_triangle = torch.cat(( lower_triangle, grid[1:, 1:, :, None]), dim = 3)
lower_triangle = lower_triangle.view(num, 2, 3)
lower_triangle = torch.transpose(lower_triangle, 1, 2)
return upper_triangle, lower_triangle # grid * point * xy
def grid_size(upper_triangle, lower_triangle, height, width):
wapper_grid = torch.cat((upper_triangle, lower_triangle),dim =1)
xmax = torch.floor(torch.max(wapper_grid[:,:,0]*width, 1)[0]) + 1
ymax = torch.floor(torch.max(wapper_grid[:,:,1]*height, 1)[0]) + 1
xmin = torch.floor(torch.min(wapper_grid[:,:,0]*width, 1)[0])
ymin = torch.floor(torch.min(wapper_grid[:,:,1]*height, 1)[0])
xlength = int(torch.max(xmax - xmin))
ylength = int(torch.max(ymax - ymin))
return [xmax, xmin, ymax, ymin], xlength, ylength
def generate_mesh_grid(height, width):
# Create a grid of sampling positions
xs = np.linspace(0, 1, width, endpoint=False)
ys = np.linspace(0, 1, height, endpoint=False)
xmesh, ymesh = np.meshgrid(xs, ys)
# Reshape the sampling positions to a H x W x 2 tensor
return np.moveaxis(array(list(zip(xmesh, ymesh))), 1, 2)
def triangle2mask(d, height, width): # d: [N x T x 3 x 2]
N = d.size()[0] # batch size
T = d.size()[1] # triangle number
P = height * width # The number of pixels in the output image.
area = edgefunc(d[:, :, 1, :], d[:, :, 2, :], d[:, :, None, 0, :])
gridcpu = generate_mesh_grid(height, width)
gridcpu = np.reshape(gridcpu, (height*width, 2))
grid = torch.Tensor(gridcpu)
grid = grid.unsqueeze(0).repeat((N, T, 1, 1)) # [N x T x P x 2]
grid = grid.to(device)
# Evaluate the edge functions at every position.
# We should get a [N x P] vector out of each.
w0 = edgefunc(d[:, :, 1, :], d[:, :, 2, :], grid) / area
w1 = edgefunc(d[:, :, 2, :], d[:, :, 0, :], grid) / area
w2 = edgefunc(d[:, :, 0, :], d[:, :, 1, :], grid) / area
# Only pixels inside the triangles will have color
# [N x P]
mask = (w0 > 0) & (w1 > 0) & (w2 > 0)
mask = torch.unsqueeze(mask, 3).type(torch.cuda.FloatTensor)
w = torch.stack((w0,w1,w2),dim = 3) * mask
return torch.transpose(w, 1, 2) # [N x P x T x 3]
def edgefunc(v0, v1, p):
"""
let P = H * W
v0 and v1 have vertex positions for all T triangles.
Their shapes are [N x T X 2]
p is a list of sampling points as a [N x T X P x 2] tensor.
Each of the T triangles has an [P x 2] matrix of sampling points.
returns a [N x T x P] matrix
"""
P = p.size()[2]
# Take all the x and y coordinates of all the positions as a
# [N x S] tensor
py = p[:, :, :, 1]
px = p[:, :, :, 0]
# We need to manually broadcast the vector to cover all sample points
x10 = v0[:, :, 0] - v1[:, :, 0] # [N x T]
y01 = v1[:, :, 1] - v0[:, :, 1] # [N x T]
x10 = x10.unsqueeze(2).repeat((1, 1, P)) # [N x T x P]
y01 = y01.unsqueeze(2).repeat((1, 1, P)) # [N x T x P]
cross = v0[:,:,1]*v1[:,:,0] - v0[:,:,0]*v1[:,:,1] # [N x T]
cross = cross.unsqueeze(2).repeat((1, 1, P)) # [N x T x P]
return y01*px + x10*py + cross
if __name__ == '__main__':
print(generate_mesh_grid(2,3))
================================================
FILE: dvs/warp/read_write.py
================================================
import numpy as np
import cv2
import os
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
import ffmpeg
import json
import torch
import argparse
def load_video(path, save_dir = None, resize = None, length = -1): # N x H x W x C
vidcap = cv2.VideoCapture(path)
fps = vidcap.get(cv2.CAP_PROP_FPS)
success,image = vidcap.read()
print(image.shape)
height, width, layers = image.shape
if resize is None:
size = (width,height)
elif type(resize) is int:
size = (width//resize,height//resize)
else:
size = resize
count = 0
frames = []
while success:
if resize is not None:
image = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR)
if save_dir != None:
path = os.path.join(save_dir, "frame_" + str(count).zfill(4) + ".png")
cv2.imwrite(path, image)
frames.append(image)
success,image = vidcap.read()
count += 1
if length > 0 and count >= length:
break
print("Video length: ", len(frames))
return frames, fps, size
def video2frame(path, resize = None):
data_name = sorted(os.listdir(path))
for i in range(len(data_name)):
print(str(i+1)+" / " + str(len(data_name)))
data_folder = os.path.join(path, data_name[i])
print(data_folder)
files = os.listdir(data_folder)
for f in files:
if f[-4:] == ".mp4":
video_name = f
video_path = os.path.join(data_folder, video_name)
frame_folder = os.path.join(data_folder, "frames")
if not os.path.exists(frame_folder):
os.makedirs(frame_folder)
load_video(video_path, save_dir = frame_folder, resize=resize)
def video2frame_one_seq(path, save_dir = None, resize = None): # N x H x W x C
vidcap = cv2.VideoCapture(path)
fps = vidcap.get(cv2.CAP_PROP_FPS)
success,image = vidcap.read()
print(path)
print(image.shape)
height, width, layers = image.shape
if resize is None:
size = (width,height)
elif type(resize) is int:
size = (width//resize,height//resize)
else:
size = resize
count = 0
while success:
if resize is not None:
image = cv2.resize(image, size, interpolation = cv2.INTER_LINEAR)
if save_dir != None:
path = os.path.join(save_dir, "frame_" + str(count).zfill(5) + ".png")
cv2.imwrite(path, image)
success,image = vidcap.read()
count += 1
return fps, size
def save_video(path,frame_array, fps, size, losses = None, frame_number = False, writer = None):
if writer is None:
if path[-3:] == "mp4":
out = cv2.VideoWriter(path,cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
else:
out = cv2.VideoWriter(path,cv2.VideoWriter_fourcc('M','J','P','G'), fps, size)
else:
out = writer
for i in range(len(frame_array)):
# writing to a image array
if frame_number:
frame_array[i] = draw_number(np.asarray(frame_array[i]), i)
if losses is not None:
frame_array[i] = draw_number(np.asarray(frame_array[i]), losses[i], x = 900, message = "Loss: ")
out.write(frame_array[i])
if writer is None:
out.release()
def draw_number(frame, num, x = 10, y = 10, message = "Frame: "):
image=Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(image)
font = ImageFont.truetype("./data/arial.ttf", 45)
message = message + str(num)
color = 'rgb(0, 0, 0)' # black color
draw.text((x, y), message, fill=color, font=font)
return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
if __name__ == "__main__":
parser = argparse.ArgumentParser("FlowNet2 Preparation")
parser.add_argument("--dir_path", default="./video")
args = parser.parse_args()
dir_path = args.dir_path
if dir_path == "./video":
video2frame(dir_path, resize = 4)
else:
video2frame(os.path.join(dir_path, "test"), resize = 4)
video2frame(os.path.join(dir_path, "training"), resize = 4)
================================================
FILE: dvs/warp/warping.py
================================================
import numpy as np
from .read_write import load_video, save_video
import torch
import cv2
from .rasterizer import Rasterization
import time
import os
def warp_video(mesh_path, video_path, save_path, losses = None, frame_number = False, fps_fix = None):
if type(mesh_path) == str:
print("Error")
else:
grid_data = mesh_path
frame_array, fps, size = load_video(video_path, length = grid_data.shape[0])
if fps_fix is not None:
fps = fps_fix
length = min(grid_data.shape[0], len(frame_array))
seq_length = 100
seq = length//seq_length
writer = cv2.VideoWriter(save_path,cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
for i in range(seq+1):
if seq_length*i==length:
break
print("Frame: "+str(i*seq_length)+"/"+str(length))
frame_array_save = warpping_rast(grid_data[seq_length*i:min(seq_length*(i+1),length)], frame_array[seq_length*i:min(seq_length*(i+1),length)], losses = losses)
save_video(save_path,frame_array_save, fps, size, losses = losses, frame_number = frame_number, writer = writer)
writer.release()
def warpping_rast(grid_data, frame_array, losses = None):
output = []
for i in range(0, min(len(frame_array), grid_data.shape[0])):
frame = warpping_one_frame_rast(frame_array[i], grid_data[i])
output.append(frame)
return output
def warpping_one_frame_rast(image, grid):
img = torch.Tensor(image).permute(2,0,1)/255
grid = torch.Tensor(grid)
output_image = Rasterization(img, grid)
return np.clip(output_image.permute(1,2,0).numpy() * 255, 0, 255).astype("uint8")