Showing preview only (596K chars total). Download the full file or copy to clipboard to get everything.
Repository: baowenbo/DAIN
Branch: master
Commit: 7c727aca5676
Files: 123
Total size: 560.1 KB
Directory structure:
gitextract_7t87l58_/
├── .gitignore
├── AverageMeter.py
├── Colab_DAIN.ipynb
├── LICENSE
├── MegaDepth/
│ ├── LICENSE
│ ├── MegaDepth_model.py
│ ├── README.md
│ ├── SDR_compute.py
│ ├── __init__.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── aligned_data_loader.py
│ │ ├── base_data_loader.py
│ │ ├── data_loader.py
│ │ └── image_folder.py
│ ├── models/
│ │ ├── HG_model.py
│ │ ├── __init__.py
│ │ ├── base_model.py
│ │ └── models.py
│ ├── options/
│ │ ├── __init__.py
│ │ ├── base_options.py
│ │ ├── test_options.py
│ │ └── train_options.py
│ ├── pytorch_DIW_scratch.py
│ ├── rmse_error_main.py
│ └── util/
│ ├── __init__.py
│ ├── html.py
│ ├── image_pool.py
│ ├── png.py
│ ├── util.py
│ └── visualizer.py
├── PWCNet/
│ ├── PWCNet.py
│ ├── __init__.py
│ ├── correlation_package_pytorch1_0/
│ │ ├── __init__.py
│ │ ├── build.sh
│ │ ├── clean.sh
│ │ ├── correlation.py
│ │ ├── correlation_cuda.cc
│ │ ├── correlation_cuda_kernel.cu
│ │ ├── correlation_cuda_kernel.cuh
│ │ └── setup.py
│ └── models/
│ ├── PWCNet.py
│ └── __init__.py
├── README.md
├── Resblock/
│ ├── BasicBlock.py
│ └── __init__.py
├── S2D_models/
│ ├── S2DF.py
│ └── __init__.py
├── Stack.py
├── balancedsampler.py
├── colab_interpolate.py
├── datasets/
│ ├── Vimeo_90K_interp.py
│ ├── __init__.py
│ └── listdatasets.py
├── demo_MiddleBury.py
├── demo_MiddleBury_slowmotion.py
├── environment.yaml
├── loss_function.py
├── lr_scheduler.py
├── my_args.py
├── my_package/
│ ├── DepthFlowProjection/
│ │ ├── DepthFlowProjectionLayer.py
│ │ ├── DepthFlowProjectionModule.py
│ │ ├── __init__.py
│ │ ├── depthflowprojection_cuda.cc
│ │ ├── depthflowprojection_cuda_kernel.cu
│ │ ├── depthflowprojection_cuda_kernel.cuh
│ │ └── setup.py
│ ├── FilterInterpolation/
│ │ ├── FilterInterpolationLayer.py
│ │ ├── FilterInterpolationModule.py
│ │ ├── __init__.py
│ │ ├── filterinterpolation_cuda.cc
│ │ ├── filterinterpolation_cuda_kernel.cu
│ │ ├── filterinterpolation_cuda_kernel.cuh
│ │ └── setup.py
│ ├── FlowProjection/
│ │ ├── FlowProjectionLayer.py
│ │ ├── FlowProjectionModule.py
│ │ ├── __init__.py
│ │ ├── flowprojection_cuda.cc
│ │ ├── flowprojection_cuda_kernel.cu
│ │ ├── flowprojection_cuda_kernel.cuh
│ │ └── setup.py
│ ├── Interpolation/
│ │ ├── InterpolationLayer.py
│ │ ├── InterpolationModule.py
│ │ ├── __init__.py
│ │ ├── interpolation_cuda.cc
│ │ ├── interpolation_cuda_kernel.cu
│ │ ├── interpolation_cuda_kernel.cuh
│ │ └── setup.py
│ ├── InterpolationCh/
│ │ ├── InterpolationChLayer.py
│ │ ├── InterpolationChModule.py
│ │ ├── __init__.py
│ │ ├── interpolationch_cuda.cc
│ │ ├── interpolationch_cuda_kernel.cu
│ │ ├── interpolationch_cuda_kernel.cuh
│ │ └── setup.py
│ ├── MinDepthFlowProjection/
│ │ ├── __init__.py
│ │ ├── minDepthFlowProjectionLayer.py
│ │ ├── minDepthFlowProjectionModule.py
│ │ ├── mindepthflowprojection_cuda.cc
│ │ ├── mindepthflowprojection_cuda_kernel.cu
│ │ ├── mindepthflowprojection_cuda_kernel.cuh
│ │ └── setup.py
│ ├── SeparableConv/
│ │ ├── SeparableConvLayer.py
│ │ ├── SeparableConvModule.py
│ │ ├── __init__.py
│ │ ├── separableconv_cuda.cc
│ │ ├── separableconv_cuda_kernel.cu
│ │ ├── separableconv_cuda_kernel.cuh
│ │ └── setup.py
│ ├── SeparableConvFlow/
│ │ ├── SeparableConvFlowLayer.py
│ │ ├── SeparableConvFlowModule.py
│ │ ├── __init__.py
│ │ ├── separableconvflow_cuda.cc
│ │ ├── separableconvflow_cuda_kernel.cu
│ │ ├── separableconvflow_cuda_kernel.cuh
│ │ └── setup.py
│ ├── build.sh
│ ├── clean.sh
│ ├── compiler_args.py
│ └── test_module.py
├── networks/
│ ├── DAIN.py
│ ├── DAIN_slowmotion.py
│ └── __init__.py
└── train.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Ignore Git here
.git
# But not these files...
# !.gitignore
checkpoints/test_local/opt.txt
PWCNet/pwc_net.pth.tar
MegaDepth/checkpoints/*
model_weights/*
MiddleBurySet/*
.nfs*
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
### VirtualEnv template
# Virtualenv
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
.Python
[Bb]in
[Ii]nclude
[Ll]ib
[Ll]ib64
[Ll]ocal
[Ss]cripts
pyvenv.cfg
.venv
pip-selfcheck.json
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff:
.idea/workspace.xml
.idea/tasks.xml
.idea/dictionaries
.idea/vcs.xml
.idea/jsLibraryMappings.xml
# Sensitive or high-churn files:
.idea/dataSources.ids
.idea/dataSources.xml
.idea/dataSources.local.xml
.idea/sqlDataSources.xml
.idea/dynamic.xml
.idea/uiDesigner.xml
# Gradle:
.idea/gradle.xml
.idea/libraries
# Mongo Explorer plugin:
.idea/mongoSettings.xml
.idea/
## File-based project format:
*.iws
## Plugin-specific files:
# IntelliJ
/out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
================================================
FILE: AverageMeter.py
================================================
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
================================================
FILE: Colab_DAIN.ipynb
================================================
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Colab_DAIN_new.ipynb",
"private_outputs": true,
"provenance": [],
"collapsed_sections": [],
"toc_visible": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "1pIo4r_Y8cMo"
},
"source": [
"# DAIN Colab"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "iGPHW5SOpPe3"
},
"source": [
"*DAIN Colab, v1.6.0*\n",
"\n",
"Based on the [original Colab file](https://github.com/baowenbo/DAIN/issues/44) by btahir. \n",
"\n",
"Enhancements by [Styler00Dollar](https://github.com/styler00dollar) aka \"sudo rm -rf / --no-preserve-root#8353\" on discord and [Alpha](https://github.com/AlphaGit), (Alpha#6137 on Discord). Please do not run this command in your linux terminal. It's rather meant as a joke.\n",
"\n",
"[Styler00Dollar's fork](https://github.com/styler00dollar/DAIN) / [Alpha's fork](https://github.com/AlphaGit/DAIN)\n",
"\n",
"A simple guide:\n",
"- Upload this ` .ipynb` file to your Google Colab.\n",
"- Create a folder inside of Google Drive named \"DAIN\"\n",
"- Change the configurations in the next cell\n",
"- Run cells one by one\n",
"\n",
"Stuff that should be improved:\n",
"- Alpha channel will be removed automatically and won't be added back. Anything related to alpha will be converted to black.\n",
"- Adding configuration to select speed\n",
"- Detect scenes to avoid interpolating scene-changes\n",
"- Auto-resume\n",
"- Copy `start_frame` - `end_frame` audio from original input to final output\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "enKoi0TR2fOD",
"cellView": "form"
},
"source": [
"################# Required Configurations ############################\n",
"\n",
"#@markdown # Required Configuration\n",
"#@markdown Use the values in here to configure what you'd like DAIN to do.\n",
"\n",
"#@markdown ## Input file\n",
"#@markdown Path (relative to the root of your Google Drive) to the input file. For instance, if you save your `example.mkv` file in your Google Drive, inside a `videos` folder, the path would be: `videos/example.mkv`. Currenly videos and gifs are supported.\n",
"INPUT_FILEPATH = \"DAIN/input.mp4\" #@param{type:\"string\"}\n",
"\n",
"#@markdown ## Output file\n",
"#@markdown Output file path: path (relative to the root of your Google Drive) for the output file. It will also determine the filetype in the destination. `.mp4` is recommended for video input, `.gif` for gif inputs.\n",
"OUTPUT_FILE_PATH = \"DAIN/output.mp4\" #@param{type:\"string\"}\n",
"\n",
"################# Optional configurations ############################\n",
"\n",
"#@markdown # Optional Configuration\n",
"#@markdown Parameters below can be left with their defaults, but feel free to adapt them to your needs.\n",
"\n",
"#@markdown ## Target FPS\n",
"#@markdown how many frames per second should the result have. This will determine how many intermediate images are interpolated.\n",
"TARGET_FPS = 60 #@param{type:\"number\"}\n",
"\n",
"#@markdown ## Frame input directory\n",
"#@markdown A path, relative to your GDrive root, where you already have the list of frames in the format 00001.png, 00002.png, etc.\n",
"FRAME_INPUT_DIR = '/content/DAIN/input_frames' #@param{type:\"string\"}\n",
"\n",
"#@markdown ## Frame output directory\n",
"#@markdown A path, relative to your GDrive root, where you want the generated frame.\n",
"FRAME_OUTPUT_DIR = '/content/DAIN/output_frames' #@param{type:\"string\"}\n",
"\n",
"#@markdown ## Start Frame\n",
"#@markdown First frame to consider from the video when processing.\n",
"START_FRAME = 1 #@param{type:\"number\"}\n",
"\n",
"#@markdown ## End Frame\n",
"#@markdown Last frame to consider from the video when processing. To use the whole video use `-1`.\n",
"END_FRAME = -1 #@param{type:\"number\"}\n",
"\n",
"#@markdown ## Seamless playback\n",
"#@markdown Creates a seamless loop by using the first frame as last one as well. Set this to True this if loop is intended.\n",
"SEAMLESS = False #@param{type:\"boolean\"}\n",
"\n",
"#@markdown ## Auto-remove PNG directory\n",
"#@markdown Auto-delete output PNG dir after ffmpeg video creation. Set this to `False` if you want to keep the PNG files.\n",
"AUTO_REMOVE = True #@param{type:\"boolean\"}"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "N9cGwalNeyk9",
"cellView": "form"
},
"source": [
"#@title Connect Google Drive\n",
"from google.colab import drive\n",
"drive.mount('/content/gdrive')\n",
"print('Google Drive connected.')"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "irzjv1x4e3S4",
"cellView": "form"
},
"source": [
"#@title Check your current GPU\n",
"# If you are lucky, you get 16GB VRAM. If you are not lucky, you get less. VRAM is important. The more VRAM, the higher the maximum resolution will go.\n",
"\n",
"# 16GB: Can handle 720p. 1080p will procude an out-of-memory error. \n",
"# 8GB: Can handle 480p. 720p will produce an out-of-memory error.\n",
"\n",
"!nvidia-smi --query-gpu=gpu_name,driver_version,memory.total --format=csv"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "UYHTTP91oMvh"
},
"source": [
"# Install dependencies.\n",
"\n",
"This next step may take somewhere between 15-20 minutes. Run this only once at startup.\n",
"\n",
"Look for the \"Finished installing dependencies\" message."
]
},
{
"cell_type": "code",
"metadata": {
"id": "e5AHGetTRacZ",
"cellView": "form"
},
"source": [
"#@title Setup everything. This takes a while. Just wait ~20 minutes in total.\n",
"\n",
"# Install old pytorch to avoid faulty output\n",
"%cd /content/\n",
"!wget -c https://repo.anaconda.com/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh\n",
"!chmod +x Miniconda3-4.5.4-Linux-x86_64.sh\n",
"!bash ./Miniconda3-4.5.4-Linux-x86_64.sh -b -f -p /usr/local\n",
"!conda install pytorch==1.1 cudatoolkit torchvision -c pytorch -y\n",
"!conda install ipykernel -y\n",
"\n",
"!pip install scipy==1.1.0\n",
"!pip install imageio\n",
"!CUDA_VISIBLE_DEVICES=0\n",
"!sudo apt-get install imagemagick imagemagick-doc\n",
"print(\"Finished installing dependencies.\")\n",
"\n",
"# Clone DAIN sources\n",
"%cd /content\n",
"!git clone -b master --depth 1 https://github.com/baowenbo/DAIN /content/DAIN\n",
"%cd /content/DAIN\n",
"!git log -1\n",
"\n",
"# Building DAIN\n",
"%cd /content/DAIN/my_package/\n",
"!./build.sh\n",
"print(\"Building #1 done.\")\n",
"\n",
"# Building DAIN PyTorch correlation package.\n",
"%cd /content/DAIN/PWCNet/correlation_package_pytorch1_0\n",
"!./build.sh\n",
"print(\"Building #2 done.\")\n",
"\n",
"# Downloading pre-trained model\n",
"%cd /content/DAIN\n",
"!mkdir model_weights\n",
"!wget -O model_weights/best.pth http://vllab1.ucmerced.edu/~wenbobao/DAIN/best.pth"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "zm5kn6vTncL4",
"cellView": "form"
},
"source": [
"#@title Detecting FPS of input file.\n",
"%shell yes | cp -f /content/gdrive/My\\ Drive/{INPUT_FILEPATH} /content/DAIN/\n",
"\n",
"import os\n",
"filename = os.path.basename(INPUT_FILEPATH)\n",
"\n",
"import cv2\n",
"cap = cv2.VideoCapture(f'/content/DAIN/{filename}')\n",
"\n",
"fps = cap.get(cv2.CAP_PROP_FPS)\n",
"print(f\"Input file has {fps} fps\")\n",
"\n",
"if(fps/TARGET_FPS>0.5):\n",
" print(\"Define a higher fps, because there is not enough time for new frames. (Old FPS)/(New FPS) should be lower than 0.5. Interpolation will fail if you try.\")"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "9YNva-GuKq4Y",
"cellView": "form"
},
"source": [
"#@title ffmpeg extract - Generating individual frame PNGs from the source file.\n",
"%shell rm -rf '{FRAME_INPUT_DIR}'\n",
"%shell mkdir -p '{FRAME_INPUT_DIR}'\n",
"\n",
"if (END_FRAME==-1):\n",
" %shell ffmpeg -i '/content/DAIN/{filename}' -vf 'select=gte(n\\,{START_FRAME}),setpts=PTS-STARTPTS' '{FRAME_INPUT_DIR}/%05d.png'\n",
"else:\n",
" %shell ffmpeg -i '/content/DAIN/{filename}' -vf 'select=between(n\\,{START_FRAME}\\,{END_FRAME}),setpts=PTS-STARTPTS' '{FRAME_INPUT_DIR}/%05d.png'\n",
"\n",
"from IPython.display import clear_output\n",
"clear_output()\n",
"\n",
"png_generated_count_command_result = %shell ls '{FRAME_INPUT_DIR}' | wc -l\n",
"frame_count = int(png_generated_count_command_result.output.strip())\n",
"\n",
"import shutil\n",
"if SEAMLESS:\n",
" frame_count += 1\n",
" first_frame = f\"{FRAME_INPUT_DIR}/00001.png\"\n",
" new_last_frame = f\"{FRAME_INPUT_DIR}/{frame_count.zfill(5)}.png\"\n",
" shutil.copyfile(first_frame, new_last_frame)\n",
"\n",
"print(f\"{frame_count} frame PNGs generated.\")\n",
"\n",
"#Checking if PNGs do have alpha\n",
"import subprocess as sp\n",
"%cd {FRAME_INPUT_DIR}\n",
"channels = sp.getoutput('identify -format %[channels] 00001.png')\n",
"print (f\"{channels} detected\")\n",
"\n",
"# Removing alpha if detected\n",
"if \"a\" in channels:\n",
" print(\"Alpha channel detected and will be removed.\")\n",
" print(sp.getoutput('find . -name \"*.png\" -exec convert \"{}\" -alpha off PNG24:\"{}\" \\;'))"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "W3rrE7L824gL",
"cellView": "form"
},
"source": [
"#@title Interpolation\n",
"%shell mkdir -p '{FRAME_OUTPUT_DIR}'\n",
"%cd /content/DAIN\n",
"\n",
"!python -W ignore colab_interpolate.py --netName DAIN_slowmotion --time_step {fps/TARGET_FPS} --start_frame 1 --end_frame {frame_count} --frame_input_dir '{FRAME_INPUT_DIR}' --frame_output_dir '{FRAME_OUTPUT_DIR}'"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "TKREDli2IDMV",
"cellView": "form"
},
"source": [
"#@title Create output video\n",
"%cd {FRAME_OUTPUT_DIR}\n",
"%shell ffmpeg -y -r {TARGET_FPS} -f image2 -pattern_type glob -i '*.png' '/content/gdrive/My Drive/{OUTPUT_FILE_PATH}'\n",
"\n",
"if(AUTO_REMOVE):\n",
" !rm -rf {FRAME_OUTPUT_DIR}/*\n",
"\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "UF5TEo5N374o",
"cellView": "form"
},
"source": [
"#@title [Experimental] Create video with sound\n",
"# Only run this, if the original had sound.\n",
"%cd {FRAME_OUTPUT_DIR}\n",
"%shell ffmpeg -i '/content/DAIN/{filename}' -acodec copy output-audio.aac\n",
"%shell ffmpeg -y -r {TARGET_FPS} -f image2 -pattern_type glob -i '*.png' -i output-audio.aac -shortest '/content/gdrive/My Drive/{OUTPUT_FILE_PATH}'\n",
"\n",
"if (AUTO_REMOVE):\n",
" !rm -rf {FRAME_OUTPUT_DIR}/*\n",
" !rm -rf output-audio.aac"
],
"execution_count": null,
"outputs": []
}
]
}
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2019 Wenbo Bao
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: MegaDepth/LICENSE
================================================
MIT License
Copyright (c) 2018 Zhengqi Li
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: MegaDepth/MegaDepth_model.py
================================================
import torch
import sys
from torch.autograd import Variable
import numpy as np
from .options.train_options import TrainOptions
from .models.models import create_model
__all__ = ['HourGlass']
def HourGlass(pretrained=None):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
opt = TrainOptions().parse() # set CUDA_VISIBLE_DEVICES before import torch
model = create_model(opt,pretrained)
#netG is the real nn.Module
return model.netG
================================================
FILE: MegaDepth/README.md
================================================
# MegaDepth: Learning Single-View Depth Prediction from Internet Photos
This is a code of the algorithm described in "MegaDepth: Learning Single-View Depth Prediction from Internet Photos, Z. Li and N. Snavely, CVPR 2018". The code skeleton is based on "https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix". If you use our code or models for academic purposes, please consider citing:
@inproceedings{MDLi18,
title={MegaDepth: Learning Single-View Depth Prediction from Internet Photos},
author={Zhengqi Li and Noah Snavely},
booktitle={Computer Vision and Pattern Recognition (CVPR)},
year={2018}
}
#### Examples of single-view depth predictions on the photos we randomly downloaded from Internet:
<img src="https://github.com/lixx2938/MegaDepth/blob/master/demo.jpg" width="300"/> <img src="https://github.com/lixx2938/MegaDepth/blob/master/demo.png" width="300"/>
<img src="https://github.com/lixx2938/MegaDepth/blob/master/demo_img/demo_2.jpg" width="300"/> <img src="https://github.com/lixx2938/MegaDepth/blob/master/demo_img/demo_2.png" width="300"/>
<img src="https://github.com/lixx2938/MegaDepth/blob/master/demo_img/demo_3.jpg" width="300"/> <img src="https://github.com/lixx2938/MegaDepth/blob/master/demo_img/demo_3.png" width="300"/>
<img src="https://github.com/lixx2938/MegaDepth/blob/master/demo_img/demo_4.jpg" width="300"/> <img src="https://github.com/lixx2938/MegaDepth/blob/master/demo_img/demo_4.png" width="300"/>
#### Dependencies:
* The code was written in Pytorch 0.2 and Python 2.7, but it should be easy to adapt it to Python 3 and latest Pytorch version if needed.
* You might need skimage, h5py libraries installed for python before running the code.
#### Single-view depth prediction on any Internet photo:
* Download pretrained model from: http://www.cs.cornell.edu/projects/megadepth/dataset/models/best_generalization_net_G.pth and put it in "checkpoints/test_local/best_generalization_net_G.pth
* In python file "models/HG_model.py", in init function, change to "model_parameters = self.load_network(model, 'G', 'best_generalization')"
* run demo code
```bash
python demo.py
```
You should see an inverse depth prediction saved as demo.png from an original photo demo.jpg. If you want to use RGB maps for visualization, like the figures in our paper, you have to install/run semantic segmentation from https://github.com/kazuto1011/pspnet-pytorch trained on ADE20K to mask out sky, because inconsistent depth prediction of unmasked sky will not make RGB visualization resonable.
#### Evaluation on the MegaDepth test splits:
* Download MegaDepth V1 dataset from project website: http://www.cs.cornell.edu/projects/megadepth/.
* Download pretrained model (specific for MD dataset) from http://www.cs.cornell.edu/projects/megadepth/dataset/models/best_vanila_net_G.pth and put it in "checkpoints/test_local/best_vanila_net_G.pth"
* Download test list files from http://www.cs.cornell.edu/projects/megadepth/dataset/data_lists/test_lists.tar.gz, it should include two folders corresponding to images with landscape and portrait orientations.
* To compute scale invarance RMSE on MD testset, change the variable "dataset_root" in python file "rmse_error_main.py" to the root directory of MegaDepth_v1 folder, and change variable "test_list_dir_l" and "test_list_dir_p" to corresponding folder paths of test lists, and run:
```bash
python rmse_error_main.py
```
* To compute Structure from Motion Disagreement Rate (SDR), change the variable "dataset_root" in python file "rmse_error_main.py" to the root directory of MegaDepth_v1 folder, and change variable "test_list_dir_l" and "test_list_dir_p" to corresponding folder paths of test lists, and run:
```bash
python SDR_compute.py
```
* If you want to run our model on arbitrary Internet photos, please download pretrained model from http://www.cs.cornell.edu/projects/megadepth/dataset/models/best_generalization_net_G.pth, which has much better generalization ability (qualitatively speaking) to completely unknown scenes.
================================================
FILE: MegaDepth/SDR_compute.py
================================================
import time
import torch
import sys
from options.train_options import TrainOptions
opt = TrainOptions().parse() # set CUDA_VISIBLE_DEVICES before import torch
from data.data_loader import CreateDataLoader_TEST
from models.models import create_model
dataset_root = "/phoenix/S6/zl548/"
test_list_dir_l = dataset_root + '/MegaDpeth_code/test_list/landscape/'
input_height = 240
input_width = 320
test_data_loader_l = CreateDataLoader_TEST(dataset_root, test_list_dir_l, input_height, input_width)
test_dataset_l = test_data_loader_l.load_data()
test_dataset_size_l = len(test_data_loader_l)
print('========================= test L images = %d' % test_dataset_size_l)
test_list_dir_p = dataset_root + '/MegaDpeth_code/test_list/portrait/'
input_height = 320
input_width = 240
test_data_loader_p = CreateDataLoader_TEST(dataset_root, test_list_dir_p, input_height, input_width)
test_dataset_p = test_data_loader_p.load_data()
test_dataset_size_p = len(test_data_loader_p)
print('========================= test P images = %d' % test_dataset_size_p)
model = create_model(opt)
batch_size = 32
diw_index = 0
total_steps = 0
best_loss = 100
error_list = [0 , 0, 0]
total_list = [0 , 0, 0]
list_l = range(test_dataset_size_l)
list_p = range(test_dataset_size_p)
def test_SDR(model):
total_loss =0
# count = 0
print("============================= TEST SDR============================")
model.switch_to_eval()
diw_index = 0
for i, data in enumerate(test_dataset_l):
stacked_img = data['img_1']
targets = data['target_1']
error, samples = model.evaluate_SDR(stacked_img, targets)
for j in range(0,3):
error_list[j] += error[j]
total_list[j] += samples[j]
print("EQUAL ", error_list[0]/float(total_list[0]))
print("INEQUAL ", error_list[1]/float(total_list[1]))
print("TOTAL ",error_list[2]/float(total_list[2]))
for i, data in enumerate(test_dataset_p):
stacked_img = data['img_1']
targets = data['target_1']
error, samples = model.evaluate_SDR(stacked_img, targets)
for j in range(0,3):
error_list[j] += error[j]
total_list[j] += samples[j]
print("EQUAL ", error_list[0]/float(total_list[0]))
print("INEQUAL ", error_list[1]/float(total_list[1]))
print("TOTAL ",error_list[2]/float(total_list[2]))
print("=========================================================SDR Summary =====================")
print("Equal SDR:\t" , float(error_list[0])/ float(total_list[0]))
print("Unequal SDR:\t" , float(error_list[1])/ float(total_list[1]))
print("SDR:\t" , float(error_list[2])/ float(total_list[2]))
print("WE ARE TESTING SDR!!!!")
test_SDR(model)
================================================
FILE: MegaDepth/__init__.py
================================================
from .MegaDepth_model import *
================================================
FILE: MegaDepth/data/__init__.py
================================================
================================================
FILE: MegaDepth/data/aligned_data_loader.py
================================================
import random
import numpy as np
import torch.utils.data
from data.base_data_loader import BaseDataLoader
from data.image_folder import ImageFolder
from data.image_folder import ImageFolder_TEST
from builtins import object
import sys
import h5py
class PairedData(object):
def __init__(self, data_loader, flip):
self.data_loader = data_loader
# self.fineSize = fineSize
# self.max_dataset_size = max_dataset_size
self.flip = flip
self.data_loader_iter = iter(self.data_loader)
self.iter = 0
def __iter__(self):
self.data_loader_iter = iter(self.data_loader)
self.iter = 0
return self
def __next__(self):
self.iter += 1
final_img, target_1 = next(self.data_loader_iter)
return {'img_1': final_img, 'target_1': target_1}
class AlignedDataLoader(BaseDataLoader):
def __init__(self,_root, _list_dir, _input_height, _input_width, _is_flip, _shuffle):
transform = None
dataset = ImageFolder(root=_root, \
list_dir =_list_dir, input_height = _input_height, input_width = _input_width, transform=transform, is_flip = _is_flip)
data_loader = torch.utils.data.DataLoader(dataset, batch_size= 16, shuffle= _shuffle, num_workers=int(3))
self.dataset = dataset
flip = False
self.paired_data = PairedData(data_loader, flip)
def name(self):
return 'RMSEDataLoader'
def load_data(self):
return self.paired_data
def __len__(self):
return len(self.dataset)
class AlignedDataLoader_TEST(BaseDataLoader):
def __init__(self,_root, _list_dir, _input_height, _input_width):
dataset = ImageFolder_TEST(root=_root, \
list_dir =_list_dir, _input_height = _input_height, _input_width = _input_width)
data_loader = torch.utils.data.DataLoader(dataset, batch_size= 1, shuffle= False, num_workers=int(3))
self.dataset = dataset
flip = False
self.paired_data = PairedData(data_loader, flip)
def name(self):
return 'TestSDRDataLoader'
def load_data(self):
return self.paired_data
def __len__(self):
return len(self.dataset)
================================================
FILE: MegaDepth/data/base_data_loader.py
================================================
class BaseDataLoader():
def __init__(self):
pass
# def initialize(self):
# # self.opt = opt
# pass
def load_data():
return None
================================================
FILE: MegaDepth/data/data_loader.py
================================================
def CreateDataLoader(_root, _list_dir, _input_height, _input_width, is_flip = True, shuffle = True):
data_loader = None
from data.aligned_data_loader import AlignedDataLoader
data_loader = AlignedDataLoader(_root, _list_dir, _input_height, _input_width, is_flip, shuffle)
return data_loader
def CreateDataLoader_TEST(_root, _list_dir, _input_height, _input_width):
data_loader = None
from data.aligned_data_loader import AlignedDataLoader_TEST
data_loader = AlignedDataLoader_TEST(_root, _list_dir, _input_height, _input_width)
return data_loader
================================================
FILE: MegaDepth/data/image_folder.py
================================================
################################################################################
# Code from
# https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py
# Modified the original code so that it also loads images from the current
# directory as well as the subdirectories
################################################################################
import h5py
import torch.utils.data as data
import pickle
import numpy as np
import torch
import os, os.path
import math, random
import sys
from skimage.transform import resize
from skimage import io
def make_dataset(list_dir):
# subgroup_name1 = "/dataset/image_list/"
file_name = list_dir + "imgs_MD.p"
file_name_1 = open( file_name, "rb" )
images_list = pickle.load( file_name_1)
file_name_1.close()
file_name_t= list_dir + "targets_MD.p"
file_name_2 = open( file_name_t, "rb" )
targets_list = pickle.load(file_name_2)
file_name_2.close()
return images_list, targets_list
# test for si-RMSE
class ImageFolder(data.Dataset):
def __init__(self, root, list_dir, input_height, input_width, transform=None,
loader=None, is_flip = True):
# load image list from hdf5
img_list , targets_list = make_dataset(list_dir)
if len(img_list) == 0:
raise(RuntimeError("Found 0 images in: " + root + "\n"
"Supported image extensions are: " + ",".join(IMG_EXTENSIONS)))
# img_list_1, img_list_2 = selfshuffle_dataset(img_list)
self.root = root
self.list_dir = list_dir
self.img_list = img_list
self.targets_list = targets_list
self.transform = transform
# self.loader = loader
self.input_height = input_height
self.input_width = input_width
self.is_flip = is_flip
def load_MD(self, img_path, depth_path):
MD_img = np.float32(io.imread(img_path))/255.0
hdf5_file_read = h5py.File(depth_path,'r')
gt = hdf5_file_read.get('/depth')
gt = np.array(gt)
assert(gt.shape[0] == MD_img.shape[0])
assert(gt.shape[1] == MD_img.shape[1])
color_rgb = np.zeros((self.input_height,self.input_width,3))
MD_img = resize(MD_img, (self.input_height, self.input_width), order = 1)
if len(MD_img.shape) == 2:
color_rgb[:,:,0] = MD_img.copy()
color_rgb[:,:,1] = MD_img.copy()
color_rgb[:,:,2] = MD_img.copy()
else:
color_rgb = MD_img.copy()
if np.sum(gt > 1e-8) > 10:
gt[ gt > np.percentile(gt[gt > 1e-8], 98)] = 0
gt[ gt < np.percentile(gt[gt > 1e-8], 1)] = 0
max_depth = np.max(gt) + 1e-9
gt = gt/max_depth
gt = resize(gt, (self.input_height, self.input_width), order = 0)
gt = gt*max_depth
mask = np.float32(gt > 1e-8)
color_rgb = np.ascontiguousarray(color_rgb)
gt = np.ascontiguousarray(gt)
mask = np.ascontiguousarray(mask)
hdf5_file_read.close()
return color_rgb, gt, mask
def __getitem__(self, index):
# 00xx/1/
targets_1 = {}
# targets_1['L'] = []
targets_1['path'] = []
img_path_suff = self.img_list[index]
targets_path_suff = self.targets_list[index]
img_path = self.root + "/MegaDepth_v1/" + img_path_suff
depth_path = self.root + "/MegaDepth_v1/" + targets_path_suff
img, gt, mask = self.load_MD(img_path, depth_path)
gt[mask < 0.1] = 1.0
targets_1['path'] = targets_path_suff
targets_1['gt_0'] = torch.from_numpy(gt).float()
targets_1['mask_0'] = torch.from_numpy(mask).float()
final_img = torch.from_numpy( np.transpose(img, (2,0,1)) ).contiguous().float()
return final_img, targets_1
def __len__(self):
return len(self.img_list)
# Test for SDR
class ImageFolder_TEST(data.Dataset):
def __init__(self, root, list_dir, _input_height, _input_width):
# load image list from hdf5
img_list , targets_list = make_dataset(list_dir)
if len(img_list) == 0:
raise(RuntimeError("Found 0 images in: " + root + "\n"
"Supported image extensions are: " + ",".join(IMG_EXTENSIONS)))
self.root = root
self.list_dir = list_dir
self.img_list = img_list
self.input_height = _input_height
self.input_width = _input_width
self.half_window = 1
def load_SfM_ORD(self, img_path, targets_path):
sfm_image = np.float32(io.imread(img_path))/255.0
resized_sfm_img = resize(sfm_image, (self.input_height, self.input_width), order = 1)
color_rgb = np.zeros((self.input_height, self.input_width,3))
if len(sfm_image.shape) == 2:
color_rgb[:,:,0] = resized_sfm_img.copy()
color_rgb[:,:,1] = resized_sfm_img.copy()
color_rgb[:,:,2] = resized_sfm_img.copy()
else:
color_rgb = resized_sfm_img.copy()
if color_rgb.shape[2] == 4:
return color_rgb, 0, 0 ,0, 0, 0
hdf5_file_read = h5py.File(targets_path,'r')
gt = hdf5_file_read.get('/SfM_features')
gt = np.array(gt)
y_A = np.round( gt[0,:] * float(self.input_height) )
x_A = np.round( gt[1,:] * float(self.input_width) )
y_B = np.round( gt[2,:] * float(self.input_height) )
x_B = np.round( gt[3,:] * float(self.input_width) )
ord_ = gt[4,:]
hdf5_file_read.close()
return color_rgb, y_A, x_A ,y_B, x_B, ord_
def __getitem__(self, index):
# 00xx/1/
targets_1 = {}
# targets_1['L'] = []
targets_1['path'] = []
targets_1['sdr_xA'] = []
targets_1['sdr_yA'] = []
targets_1['sdr_xB'] = []
targets_1['sdr_yB'] = []
targets_1['sdr_gt'] = []
img_path_suff = self.img_list[index]
img_path = self.root + "/MegaDepth_v1/" + img_path_suff
folder_name = img_path_suff.split('/')[-4]
img_name = img_path_suff.split('/')[-1]
sparse_sift_path = self.root + "/sparse_features/" + folder_name + "/" + img_name + ".h5"
# no sift features
if not os.path.isfile(sparse_sift_path) or not os.path.isfile(img_path):
img = np.zeros((self.input_height, self.input_width,3))
targets_1['has_SfM_feature'] = False
else:
img, y_A, x_A ,y_B, x_B, ordinal = self.load_SfM_ORD(img_path, sparse_sift_path)
targets_1['sdr_xA'].append(torch.from_numpy(x_A).long())
targets_1['sdr_yA'].append(torch.from_numpy(y_A).long())
targets_1['sdr_xB'].append(torch.from_numpy(x_B).long())
targets_1['sdr_yB'].append(torch.from_numpy(y_B).long())
targets_1['sdr_gt'].append(torch.from_numpy(ordinal).float())
targets_1['has_SfM_feature'] = True
final_img = torch.from_numpy( np.transpose(img, (2,0,1)) ).contiguous().float()
return final_img, targets_1
def __len__(self):
return len(self.img_list)
================================================
FILE: MegaDepth/models/HG_model.py
================================================
import numpy as np
import torch
import os
from torch.autograd import Variable
from .base_model import BaseModel
import sys
# import pytorch_DIW_scratch
import MegaDepth.pytorch_DIW_scratch as pytorch_DIW_scratch
class HGModel(BaseModel):
def name(self):
return 'HGModel'
def __init__(self, opt,pretrained=None):
BaseModel.initialize(self, opt)
# print("===========================================LOADING Hourglass NETWORK====================================================")
model = pytorch_DIW_scratch.pytorch_DIW_scratch
# model_temp = model
# model= torch.nn.parallel.DataParallel(model, device_ids = [0,1])
# model_parameters = self.load_network(model, 'G', 'best_vanila')
if pretrained is None:
# model_parameters = self.load_network(model, 'G', 'best_generalization')
#
# model.load_state_dict(model_parameters)
# self.netG = model.cuda()
self.netG = model
# print("No weights loaded for Hourglass Network")
else:
pretrained_dict = torch.load(pretrained)
model_dict = model.state_dict()
# print(len(pretrained_dict))
# print(len(model_dict))
# 1. filter out unnecessary keys
# the saved model contains a 'module.' prefix for the data.parallel reason
pretrained_dict = {k[7:]: v for k, v in pretrained_dict.items()} # and not k[:10]== 'rectifyNet'}
# print(str(len(pretrained_dict)) + " are updated")
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict)
# 3. load the new state dict
model.load_state_dict(model_dict)
pretrained_dict = None
self.netG = model
def batch_classify(self, z_A_arr, z_B_arr, ground_truth ):
threashold = 1.1
depth_ratio = torch.div(z_A_arr, z_B_arr)
depth_ratio = depth_ratio.cpu()
estimated_labels = torch.zeros(depth_ratio.size(0))
estimated_labels[depth_ratio > (threashold)] = 1
estimated_labels[depth_ratio < (1/threashold)] = -1
diff = estimated_labels - ground_truth
diff[diff != 0] = 1
# error
inequal_error_count = diff[ground_truth != 0]
inequal_error_count = torch.sum(inequal_error_count)
error_count = torch.sum(diff) #diff[diff !=0]
# error_count = error_count.size(0)
equal_error_count = error_count - inequal_error_count
# total
total_count = depth_ratio.size(0)
ground_truth[ground_truth !=0 ] = 1
inequal_count_total = torch.sum(ground_truth)
equal_total_count = total_count - inequal_count_total
error_list = [equal_error_count, inequal_error_count, error_count]
count_list = [equal_total_count, inequal_count_total, total_count]
return error_list, count_list
def computeSDR(self, prediction_d, targets):
# for each image
total_error = [0,0,0]
total_samples = [0,0,0]
for i in range(0, prediction_d.size(0)):
if targets['has_SfM_feature'][i] == False:
continue
x_A_arr = targets["sdr_xA"][i].squeeze(0)
x_B_arr = targets["sdr_xB"][i].squeeze(0)
y_A_arr = targets["sdr_yA"][i].squeeze(0)
y_B_arr = targets["sdr_yB"][i].squeeze(0)
predict_depth = torch.exp(prediction_d[i,:,:])
predict_depth = predict_depth.squeeze(0)
ground_truth = targets["sdr_gt"][i]
# print(x_A_arr.size())
# print(y_A_arr.size())
z_A_arr = torch.gather( torch.index_select(predict_depth, 1 ,x_A_arr.cuda()) , 0, y_A_arr.view(1, -1).cuda())# predict_depth:index(2, x_A_arr):gather(1, y_A_arr:view(1, -1))
z_B_arr = torch.gather( torch.index_select(predict_depth, 1 ,x_B_arr.cuda()) , 0, y_B_arr.view(1, -1).cuda())
z_A_arr = z_A_arr.squeeze(0)
z_B_arr = z_B_arr.squeeze(0)
error_list, count_list = self.batch_classify(z_A_arr, z_B_arr,ground_truth)
for j in range(0,3):
total_error[j] += error_list[j]
total_samples[j] += count_list[j]
return total_error, total_samples
def evaluate_SDR(self, input_, targets):
input_images = Variable(input_.cuda() )
prediction_d = self.netG.forward(input_images)
total_error, total_samples = self.computeSDR(prediction_d.data, targets)
return total_error, total_samples
def rmse_Loss(self, log_prediction_d, mask, log_gt):
N = torch.sum(mask)
log_d_diff = log_prediction_d - log_gt
log_d_diff = torch.mul(log_d_diff, mask)
s1 = torch.sum( torch.pow(log_d_diff,2) )/N
s2 = torch.pow(torch.sum(log_d_diff),2)/(N*N)
data_loss = s1 - s2
data_loss = torch.sqrt(data_loss)
return data_loss
def evaluate_RMSE(self, input_images, prediction_d, targets):
count = 0
total_loss = Variable(torch.cuda.FloatTensor(1))
total_loss[0] = 0
mask_0 = Variable(targets['mask_0'].cuda(), requires_grad = False)
d_gt_0 = torch.log(Variable(targets['gt_0'].cuda(), requires_grad = False))
for i in range(0, mask_0.size(0)):
total_loss += self.rmse_Loss(prediction_d[i,:,:], mask_0[i,:,:], d_gt_0[i,:,:])
count += 1
return total_loss.data[0], count
def evaluate_sc_inv(self, input_, targets):
input_images = Variable(input_.cuda() )
prediction_d = self.netG.forward(input_images)
rmse_loss , count= self.evaluate_RMSE(input_images, prediction_d, targets)
return rmse_loss, count
def switch_to_train(self):
self.netG.train()
def switch_to_eval(self):
self.netG.eval()
================================================
FILE: MegaDepth/models/__init__.py
================================================
================================================
FILE: MegaDepth/models/base_model.py
================================================
import os
import torch
class BaseModel():
def name(self):
return 'BaseModel'
def initialize(self, opt):
self.opt = opt
self.gpu_ids = opt.gpu_ids
self.isTrain = opt.isTrain
self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor
self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)
def set_input(self, input):
self.input = input
def forward(self):
pass
# used in test time, no backprop
def test(self):
pass
def get_image_paths(self):
pass
def optimize_parameters(self):
pass
def get_current_visuals(self):
return self.input
def get_current_errors(self):
return {}
def save(self, label):
pass
# helper saving function that can be used by subclasses
def save_network(self, network, network_label, epoch_label, gpu_ids):
save_filename = '_%s_net_%s.pth' % (epoch_label, network_label)
save_path = os.path.join(self.save_dir, save_filename)
torch.save(network.cpu().state_dict(), save_path)
if len(gpu_ids) and torch.cuda.is_available():
network.cuda(device_id=gpu_ids[0])
# helper loading function that can be used by subclasses
def load_network(self, network, network_label, epoch_label):
save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
save_path = os.path.join(self.save_dir, save_filename)
print(save_path)
model = torch.load(save_path)
return model
# network.load_state_dict(torch.load(save_path))
def update_learning_rate():
pass
================================================
FILE: MegaDepth/models/models.py
================================================
def create_model(opt,pretrained=None):
model = None
from .HG_model import HGModel
model = HGModel(opt,pretrained)
# print("model [%s] was created" % (model.name()))
return model
================================================
FILE: MegaDepth/options/__init__.py
================================================
================================================
FILE: MegaDepth/options/base_options.py
================================================
import argparse
import os
from ..util import util
class BaseOptions():
def __init__(self):
self.parser = argparse.ArgumentParser()
self.initialized = False
def initialize(self):
# self.parser.add_argument('--dataroot', required=True, help='path to images (should have subfolders trainA, trainB, valA, valB, etc)')
self.parser.add_argument('--batchSize', type=int, default=1, help='input batch size')
self.parser.add_argument('--loadSize', type=int, default=286, help='scale images to this size')
self.parser.add_argument('--fineSize', type=int, default=256, help='then crop to this size')
self.parser.add_argument('--input_nc', type=int, default=3, help='# of input image channels')
self.parser.add_argument('--output_nc', type=int, default=3, help='# of output image channels')
self.parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in first conv layer')
self.parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in first conv layer')
# self.parser.add_argument('--which_model_netD', type=str, default='basic', help='selects model to use for netD')
self.parser.add_argument('--which_model_netG', type=str, default='unet_256', help='selects model to use for netG')
# self.parser.add_argument('--n_layers_D', type=int, default=3, help='only used if which_model_netD==n_layers')
self.parser.add_argument('--gpu_ids', type=str, default='0,1', help='gpu ids: e.g. 0 0,1,2, 0,2')
self.parser.add_argument('--name', type=str, default='test_local', help='name of the experiment. It decides where to store samples and models')
# self.parser.add_argument('--align_data', action='store_true',
# help='if True, the datasets are loaded from "test" and "train" directories and the data pairs are aligned')
self.parser.add_argument('--model', type=str, default='pix2pix',
help='chooses which model to use. cycle_gan, one_direction_test, pix2pix, ...')
# self.parser.add_argument('--which_direction', type=str, default='AtoB', help='AtoB or BtoA')
self.parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data')
self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints/', help='models are saved here')
self.parser.add_argument('--norm', type=str, default='instance', help='instance normalization or batch normalization')
self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly')
self.parser.add_argument('--display_winsize', type=int, default=256, help='display window size')
self.parser.add_argument('--display_id', type=int, default=1, help='window id of the web display')
self.parser.add_argument('--identity', type=float, default=0.0, help='use identity mapping. Setting identity other than 1 has an effect of scaling the weight of the identity mapping loss. For example, if the weight of the identity loss should be 10 times smaller than the weight of the reconstruction loss, please set optidentity = 0.1')
self.parser.add_argument('--use_dropout', action='store_true', help='use dropout for the generator')
self.parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.')
self.initialized = True
def parse(self):
if not self.initialized:
self.initialize()
self.opt = self.parser.parse_known_args()[0] #parse_args()
self.opt.isTrain = self.isTrain # train or test
str_ids = self.opt.gpu_ids.split(',')
self.opt.gpu_ids = []
for str_id in str_ids:
id = int(str_id)
if id >= 0:
self.opt.gpu_ids.append(id)
args = vars(self.opt)
# print('------------ Options -------------')
# for k, v in sorted(args.items()):
# print('%s: %s' % (str(k), str(v)))
# print('-------------- End ----------------')
# save to the disk
expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name)
util.mkdirs(expr_dir)
file_name = os.path.join(expr_dir, 'opt.txt')
with open(file_name, 'wt') as opt_file:
opt_file.write('------------ Options -------------\n')
for k, v in sorted(args.items()):
opt_file.write('%s: %s\n' % (str(k), str(v)))
opt_file.write('-------------- End ----------------\n')
return self.opt
================================================
FILE: MegaDepth/options/test_options.py
================================================
from .base_options import BaseOptions
class TestOptions(BaseOptions):
def initialize(self):
BaseOptions.initialize(self)
self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.')
self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')
self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
self.parser.add_argument('--how_many', type=int, default=50, help='how many test images to run')
self.isTrain = False
================================================
FILE: MegaDepth/options/train_options.py
================================================
from .base_options import BaseOptions
class TrainOptions(BaseOptions):
def initialize(self):
BaseOptions.initialize(self)
self.parser.add_argument('--display_freq', type=int, default=100, help='frequency of showing training results on screen')
self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console')
self.parser.add_argument('--save_latest_freq', type=int, default=5000, help='frequency of saving the latest results')
self.parser.add_argument('--save_epoch_freq', type=int, default=5, help='frequency of saving checkpoints at the end of epochs')
self.parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model')
self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc')
self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
self.parser.add_argument('--niter', type=int, default=100, help='# of iter at starting learning rate')
self.parser.add_argument('--niter_decay', type=int, default=100, help='# of iter to linearly decay learning rate to zero')
self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam')
self.parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam')
self.parser.add_argument('--no_lsgan', action='store_true', help='do *not* use least square GAN, if false, use vanilla GAN')
self.parser.add_argument('--lambda_A', type=float, default=10.0, help='weight for cycle loss (A -> B -> A)')
self.parser.add_argument('--lambda_B', type=float, default=10.0, help='weight for cycle loss (B -> A -> B)')
self.parser.add_argument('--pool_size', type=int, default=50, help='the size of image buffer that stores previously generated images')
self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/')
self.parser.add_argument('--no_flip' , action='store_true', help='if specified, do not flip the images for data argumentation')
# NOT-IMPLEMENTED self.parser.add_argument('--preprocessing', type=str, default='resize_and_crop', help='resizing/cropping strategy')
self.isTrain = True
================================================
FILE: MegaDepth/pytorch_DIW_scratch.py
================================================
import torch
import torch.nn as nn
from torch.autograd import Variable
from functools import reduce
class LambdaBase(nn.Sequential):
def __init__(self, fn, *args):
super(LambdaBase, self).__init__(*args)
self.lambda_func = fn
def forward_prepare(self, input):
output = []
for module in self._modules.values():
output.append(module(input))
return output if output else input
class Lambda(LambdaBase):
def forward(self, input):
return self.lambda_func(self.forward_prepare(input))
class LambdaMap(LambdaBase):
def forward(self, input):
return list(map(self.lambda_func,self.forward_prepare(input)))
class LambdaReduce(LambdaBase):
def forward(self, input):
return reduce(self.lambda_func,self.forward_prepare(input))
pytorch_DIW_scratch = nn.Sequential( # Sequential,
nn.Conv2d(3,128,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Sequential( # Sequential,
LambdaMap(lambda x: x, # ConcatTable,
nn.Sequential( # Sequential,
nn.MaxPool2d((2, 2),(2, 2)),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
),
nn.Sequential( # Sequential,
LambdaMap(lambda x: x, # ConcatTable,
nn.Sequential( # Sequential,
nn.MaxPool2d((2, 2),(2, 2)),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(128,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
nn.Sequential( # Sequential,
LambdaMap(lambda x: x, # ConcatTable,
nn.Sequential( # Sequential,
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,64,(11, 11),(1, 1),(5, 5)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
),
nn.Sequential( # Sequential,
nn.AvgPool2d((2, 2),(2, 2)),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
nn.Sequential( # Sequential,
LambdaMap(lambda x: x, # ConcatTable,
nn.Sequential( # Sequential,
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
),
nn.Sequential( # Sequential,
nn.AvgPool2d((2, 2),(2, 2)),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
nn.UpsamplingNearest2d(scale_factor=2),
),
),
LambdaReduce(lambda x,y: x+y), # CAddTable,
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,64,(11, 11),(1, 1),(5, 5)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
nn.UpsamplingNearest2d(scale_factor=2),
),
),
LambdaReduce(lambda x,y: x+y), # CAddTable,
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,64,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
),
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(256,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
),
nn.UpsamplingNearest2d(scale_factor=2),
),
nn.Sequential( # Sequential,
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,32,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,32,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,32,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,32,(11, 11),(1, 1),(5, 5)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
),
),
),
LambdaReduce(lambda x,y: x+y), # CAddTable,
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,32,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,32,(5, 5),(1, 1),(2, 2)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,32,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
),
),
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(128,16,(1, 1)),
nn.BatchNorm2d(16,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,16,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(16,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,16,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(16,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,32,(1, 1)),
nn.BatchNorm2d(32,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(32,16,(11, 11),(1, 1),(5, 5)),
nn.BatchNorm2d(16,1e-05,0.1,False),
nn.ReLU(),
),
),
nn.UpsamplingNearest2d(scale_factor=2),
),
nn.Sequential( # Sequential,
LambdaReduce(lambda x,y,dim=1: torch.cat((x,y),dim), # Concat,
nn.Sequential( # Sequential,
nn.Conv2d(128,16,(1, 1)),
nn.BatchNorm2d(16,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,16,(3, 3),(1, 1),(1, 1)),
nn.BatchNorm2d(16,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,16,(7, 7),(1, 1),(3, 3)),
nn.BatchNorm2d(16,1e-05,0.1,False),
nn.ReLU(),
),
nn.Sequential( # Sequential,
nn.Conv2d(128,64,(1, 1)),
nn.BatchNorm2d(64,1e-05,0.1,False),
nn.ReLU(),
nn.Conv2d(64,16,(11, 11),(1, 1),(5, 5)),
nn.BatchNorm2d(16,1e-05,0.1,False),
nn.ReLU(),
),
),
),
),
LambdaReduce(lambda x,y: x+y), # CAddTable,
),
nn.Conv2d(64,1,(3, 3),(1, 1),(1, 1)),
)
================================================
FILE: MegaDepth/rmse_error_main.py
================================================
import time
import torch
import sys
from options.train_options import TrainOptions
opt = TrainOptions().parse() # set CUDA_VISIBLE_DEVICES before import torch
from data.data_loader import CreateDataLoader
from models.models import create_model
dataset_root = "/phoenix/S6/zl548/"
test_list_dir_l = '/phoenix/S6/zl548/MegaDpeth_code/test_list/landscape/'
input_height = 240
input_width = 320
is_flipped = False
shuffle = False
test_data_loader_l = CreateDataLoader(dataset_root, test_list_dir_l, input_height, input_width, is_flipped, shuffle)
test_dataset_l = test_data_loader_l.load_data()
test_dataset_size_l = len(test_data_loader_l)
print('========================= test images = %d' % test_dataset_size_l)
test_list_dir_p = '/phoenix/S6/zl548/MegaDpeth_code/test_list/portrait/'
input_height = 320
input_width = 240
test_data_loader_p = CreateDataLoader(dataset_root, test_list_dir_p, input_height, input_width, is_flipped, shuffle)
test_dataset_p = test_data_loader_p.load_data()
test_dataset_size_p = len(test_data_loader_p)
print('========================= test images = %d' % test_dataset_size_p)
model = create_model(opt)
def test(model):
total_loss =0
toal_count = 0
print("============================= TEST ============================")
model.switch_to_eval()
for i, data in enumerate(test_dataset_l):
stacked_img = data['img_1']
targets = data['target_1']
rmse_loss , count = model.evaluate_sc_inv(stacked_img, targets)
total_loss += rmse_loss
toal_count += count
print('RMSE loss is', total_loss/float(toal_count))
for i, data in enumerate(test_dataset_p):
stacked_img = data['img_1']
targets = data['target_1']
rmse_loss , count = model.evaluate_sc_inv(stacked_img, targets)
total_loss += rmse_loss
toal_count += count
print('RMSE loss is', total_loss/float(toal_count))
print('average RMSE loss is', total_loss/float(toal_count))
print("WE ARE IN TESTING RMSE!!!!")
test(model)
print("WE ARE DONE TESTING!!!")
print("We are done")
================================================
FILE: MegaDepth/util/__init__.py
================================================
================================================
FILE: MegaDepth/util/html.py
================================================
import dominate
from dominate.tags import *
import os
class HTML:
def __init__(self, web_dir, title, reflesh=0):
self.title = title
self.web_dir = web_dir
self.img_dir = os.path.join(self.web_dir, 'images')
if not os.path.exists(self.web_dir):
os.makedirs(self.web_dir)
if not os.path.exists(self.img_dir):
os.makedirs(self.img_dir)
# print(self.img_dir)
self.doc = dominate.document(title=title)
if reflesh > 0:
with self.doc.head:
meta(http_equiv="reflesh", content=str(reflesh))
def get_image_dir(self):
return self.img_dir
def add_header(self, str):
with self.doc:
h3(str)
def add_table(self, border=1):
self.t = table(border=border, style="table-layout: fixed;")
self.doc.add(self.t)
def add_images(self, ims, txts, links, width=400):
self.add_table()
with self.t:
with tr():
for im, txt, link in zip(ims, txts, links):
with td(style="word-wrap: break-word;", halign="center", valign="top"):
with p():
with a(href=os.path.join('images', link)):
img(style="width:%dpx" % width, src=os.path.join('images', im))
br()
p(txt)
def save(self):
html_file = '%s/index.html' % self.web_dir
f = open(html_file, 'wt')
f.write(self.doc.render())
f.close()
if __name__ == '__main__':
html = HTML('web/', 'test_html')
html.add_header('hello world')
ims = []
txts = []
links = []
for n in range(4):
ims.append('image_%d.png' % n)
txts.append('text_%d' % n)
links.append('image_%d.png' % n)
html.add_images(ims, txts, links)
html.save()
================================================
FILE: MegaDepth/util/image_pool.py
================================================
import random
import numpy as np
import torch
from pdb import set_trace as st
from torch.autograd import Variable
class ImagePool():
def __init__(self, pool_size):
self.pool_size = pool_size
if self.pool_size > 0:
self.num_imgs = 0
self.images = []
def query(self, images):
if self.pool_size == 0:
return images
return_images = []
for image in images.data:
image = torch.unsqueeze(image, 0)
if self.num_imgs < self.pool_size:
self.num_imgs = self.num_imgs + 1
self.images.append(image)
return_images.append(image)
else:
p = random.uniform(0, 1)
if p > 0.5:
random_id = random.randint(0, self.pool_size-1)
tmp = self.images[random_id].clone()
self.images[random_id] = image
return_images.append(tmp)
else:
return_images.append(image)
return_images = Variable(torch.cat(return_images, 0))
return return_images
================================================
FILE: MegaDepth/util/png.py
================================================
import struct
import zlib
def encode(buf, width, height):
""" buf: must be bytes or a bytearray in py3, a regular string in py2. formatted RGBRGB... """
assert (width * height * 3 == len(buf))
bpp = 3
def raw_data():
# reverse the vertical line order and add null bytes at the start
row_bytes = width * bpp
for row_start in range((height - 1) * width * bpp, -1, -row_bytes):
yield b'\x00'
yield buf[row_start:row_start + row_bytes]
def chunk(tag, data):
return [
struct.pack("!I", len(data)),
tag,
data,
struct.pack("!I", 0xFFFFFFFF & zlib.crc32(data, zlib.crc32(tag)))
]
SIGNATURE = b'\x89PNG\r\n\x1a\n'
COLOR_TYPE_RGB = 2
COLOR_TYPE_RGBA = 6
bit_depth = 8
return b''.join(
[ SIGNATURE ] +
chunk(b'IHDR', struct.pack("!2I5B", width, height, bit_depth, COLOR_TYPE_RGB, 0, 0, 0)) +
chunk(b'IDAT', zlib.compress(b''.join(raw_data()), 9)) +
chunk(b'IEND', b'')
)
================================================
FILE: MegaDepth/util/util.py
================================================
from __future__ import print_function
import torch
import numpy as np
from PIL import Image
import inspect, re
import numpy as np
import os
import collections
# Converts a Tensor into a Numpy array
# |imtype|: the desired type of the converted numpy array
def tensor2im(image_tensor, imtype=np.uint8):
image_numpy = image_tensor[0].cpu().float().numpy()
image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
return image_numpy.astype(imtype)
def diagnose_network(net, name='network'):
mean = 0.0
count = 0
for param in net.parameters():
if param.grad is not None:
mean += torch.mean(torch.abs(param.grad.data))
count += 1
if count > 0:
mean = mean / count
print(name)
print(mean)
def save_image(image_numpy, image_path):
image_pil = Image.fromarray(image_numpy)
image_pil.save(image_path)
def info(object, spacing=10, collapse=1):
"""Print methods and doc strings.
Takes module, class, list, dictionary, or string."""
methodList = [e for e in dir(object) if isinstance(getattr(object, e), collections.Callable)]
processFunc = collapse and (lambda s: " ".join(s.split())) or (lambda s: s)
print( "\n".join(["%s %s" %
(method.ljust(spacing),
processFunc(str(getattr(object, method).__doc__)))
for method in methodList]) )
def varname(p):
for line in inspect.getframeinfo(inspect.currentframe().f_back)[3]:
m = re.search(r'\bvarname\s*\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)', line)
if m:
return m.group(1)
def print_numpy(x, val=True, shp=False):
x = x.astype(np.float64)
if shp:
print('shape,', x.shape)
if val:
x = x.flatten()
print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
def mkdirs(paths):
if isinstance(paths, list) and not isinstance(paths, str):
for path in paths:
mkdir(path)
else:
mkdir(paths)
def mkdir(path):
if not os.path.exists(path):
os.makedirs(path)
================================================
FILE: MegaDepth/util/visualizer.py
================================================
import numpy as np
import os
import ntpath
import time
from . import util
from . import html
class Visualizer():
def __init__(self, opt):
# self.opt = opt
self.display_id = opt.display_id
self.use_html = opt.isTrain and not opt.no_html
self.win_size = opt.display_winsize
self.name = opt.name
if self.display_id > 0:
import visdom
self.vis = visdom.Visdom()
if self.use_html:
self.web_dir = os.path.join(opt.checkpoints_dir, opt.name, 'web')
self.img_dir = os.path.join(self.web_dir, 'images')
print('create web directory %s...' % self.web_dir)
util.mkdirs([self.web_dir, self.img_dir])
# |visuals|: dictionary of images to display or save
def display_current_results(self, visuals, epoch):
if self.display_id > 0: # show images in the browser
idx = 1
for label, image_numpy in visuals.items():
#image_numpy = np.flipud(image_numpy)
self.vis.image(image_numpy.transpose([2,0,1]), opts=dict(title=label),
win=self.display_id + idx)
idx += 1
if self.use_html: # save images to a html file
for label, image_numpy in visuals.items():
img_path = os.path.join(self.img_dir, 'epoch%.3d_%s.png' % (epoch, label))
util.save_image(image_numpy, img_path)
# update website
webpage = html.HTML(self.web_dir, 'Experiment name = %s' % self.name, reflesh=1)
for n in range(epoch, 0, -1):
webpage.add_header('epoch [%d]' % n)
ims = []
txts = []
links = []
for label, image_numpy in visuals.items():
img_path = 'epoch%.3d_%s.png' % (n, label)
ims.append(img_path)
txts.append(label)
links.append(img_path)
webpage.add_images(ims, txts, links, width=self.win_size)
webpage.save()
# errors: dictionary of error labels and values
def plot_current_errors(self, epoch, counter_ratio, opt, errors):
if not hasattr(self, 'plot_data'):
self.plot_data = {'X':[],'Y':[], 'legend':list(errors.keys())}
self.plot_data['X'].append(epoch + counter_ratio)
self.plot_data['Y'].append([errors[k] for k in self.plot_data['legend']])
self.vis.line(
X=np.stack([np.array(self.plot_data['X'])]*len(self.plot_data['legend']),1),
Y=np.array(self.plot_data['Y']),
opts={
'title': self.name + ' loss over time',
'legend': self.plot_data['legend'],
'xlabel': 'epoch',
'ylabel': 'loss'},
win=self.display_id)
# errors: same format as |errors| of plotCurrentErrors
def print_current_errors(self, epoch, i, errors, t):
message = '(epoch: %d, iters: %d, time: %.3f) ' % (epoch, i, t)
for k, v in errors.items():
message += '%s: %.3f ' % (k, v)
print(message)
# save image to the disk
def save_images(self, webpage, visuals, image_path):
image_dir = webpage.get_image_dir()
short_path = ntpath.basename(image_path[0])
name = os.path.splitext(short_path)[0]
webpage.add_header(name)
ims = []
txts = []
links = []
for label, image_numpy in visuals.items():
image_name = '%s_%s.png' % (name, label)
save_path = os.path.join(image_dir, image_name)
util.save_image(image_numpy, save_path)
ims.append(image_name)
txts.append(label)
links.append(image_name)
webpage.add_images(ims, txts, links, width=self.win_size)
================================================
FILE: PWCNet/PWCNet.py
================================================
"""
implementation of the PWC-DC network for optical flow estimation by Sun et al., 2018
Jinwei Gu and Zhile Ren
"""
import torch
import torch.nn as nn
from torch.autograd import Variable
import os
os.environ['PYTHON_EGG_CACHE'] = 'tmp/' # a writable directory
#from .correlation_package.modules.corr import Correlation
# from PWCNet.correlation_package_pytorch0_4.correlation import Correlation #pytorch0.4 version
from PWCNet.correlation_package_pytorch1_0.correlation import Correlation #pytorch0.4 version
import numpy as np
__all__ = [
'pwc_dc_net', 'pwc_dc_net_old'
]
def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
padding=padding, dilation=dilation, bias=True),
nn.LeakyReLU(0.1))
def predict_flow(in_planes):
return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)
def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
return nn.ConvTranspose2d(in_planes, out_planes, kernel_size, stride, padding, bias=True)
import time
class PWCDCNet(nn.Module):
"""
PWC-DC net. add dilation convolution and densenet connections
"""
def __init__(self, md=4):
"""
input: md --- maximum displacement (for correlation. default: 4), after warpping
"""
super(PWCDCNet,self).__init__()
self.conv1a = conv(3, 16, kernel_size=3, stride=2)
self.conv1aa = conv(16, 16, kernel_size=3, stride=1)
self.conv1b = conv(16, 16, kernel_size=3, stride=1)
self.conv2a = conv(16, 32, kernel_size=3, stride=2)
self.conv2aa = conv(32, 32, kernel_size=3, stride=1)
self.conv2b = conv(32, 32, kernel_size=3, stride=1)
self.conv3a = conv(32, 64, kernel_size=3, stride=2)
self.conv3aa = conv(64, 64, kernel_size=3, stride=1)
self.conv3b = conv(64, 64, kernel_size=3, stride=1)
self.conv4a = conv(64, 96, kernel_size=3, stride=2)
self.conv4aa = conv(96, 96, kernel_size=3, stride=1)
self.conv4b = conv(96, 96, kernel_size=3, stride=1)
self.conv5a = conv(96, 128, kernel_size=3, stride=2)
self.conv5aa = conv(128,128, kernel_size=3, stride=1)
self.conv5b = conv(128,128, kernel_size=3, stride=1)
self.conv6aa = conv(128,196, kernel_size=3, stride=2)
self.conv6a = conv(196,196, kernel_size=3, stride=1)
self.conv6b = conv(196,196, kernel_size=3, stride=1)
self.corr = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1)
self.leakyRELU = nn.LeakyReLU(0.1)
nd = (2*md+1)**2
dd = np.cumsum([128,128,96,64,32],dtype=np.int32).astype(np.int)
dd = [int(d) for d in dd]
od = nd
self.conv6_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv6_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv6_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv6_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv6_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow6 = predict_flow(od+dd[4])
self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat6 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+128+4
self.conv5_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv5_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv5_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv5_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv5_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow5 = predict_flow(od+dd[4])
self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat5 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+96+4
self.conv4_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv4_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv4_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv4_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv4_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow4 = predict_flow(od+dd[4])
self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat4 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+64+4
self.conv3_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv3_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv3_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv3_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv3_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow3 = predict_flow(od+dd[4])
self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat3 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+32+4
self.conv2_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv2_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv2_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv2_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv2_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow2 = predict_flow(od+dd[4])
self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.dc_conv1 = conv(od+dd[4], 128, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc_conv2 = conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2)
self.dc_conv3 = conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4)
self.dc_conv4 = conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8)
self.dc_conv5 = conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16)
self.dc_conv6 = conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc_conv7 = predict_flow(32)
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
if m.bias is not None:
m.bias.data.zero_()
W_MAX = 2048
H_MAX = 1024
B_MAX = 3
xx = torch.arange(0, W_MAX).view(1,-1).cuda().repeat(H_MAX,1)
yy = torch.arange(0, H_MAX).view(-1,1).cuda().repeat(1,W_MAX)
xx = xx.view(1,1,H_MAX,W_MAX).repeat(B_MAX,1,1,1)
yy = yy.view(1,1,H_MAX,W_MAX).repeat(B_MAX,1,1,1)
grid = torch.cat((xx,yy),1).float()
## for saving time on allocating a grid in forward
self.W_MAX = W_MAX
self.H_MAX = H_MAX
self.B_MAX = B_MAX
self.grid = Variable(grid, requires_grad=False)
# self.mask_base = Variable(torch.cuda.FloatTensor().resize_(B_MAX,).zero_() + 1)
def warp(self, x, flo):
"""
warp an image/tensor (im2) back to im1, according to the optical flow
x: [B, C, H, W] (im2)
flo: [B, 2, H, W] flow
"""
B, C, H, W = x.size()
# mesh grid
# xx = torch.arange(0, W).view(1,-1).cuda().repeat(H,1)
# yy = torch.arange(0, H).view(-1,1).cuda().repeat(1,W)
# xx = xx.view(1,1,H,W).repeat(B,1,1,1)
# yy = yy.view(1,1,H,W).repeat(B,1,1,1)
# grid = torch.cat((xx,yy),1).float()
# # if x.is_cuda:
# # grid = grid.cuda()
# vgrid = Variable(grid) + flo
assert(B <= self.B_MAX and H <= self.H_MAX and W <= self.W_MAX)
vgrid = self.grid[:B,:,:H,:W] +flo
# scale grid to [-1,1]
vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:].clone()/max(W-1,1)-1.0
vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:].clone()/max(H-1,1)-1.0
vgrid = vgrid.permute(0,2,3,1)
output = nn.functional.grid_sample(x, vgrid)
# mask = torch.autograd.Variable(torch.ones(x.size())).cuda()
mask = torch.autograd.Variable(torch.cuda.FloatTensor().resize_(x.size()).zero_() + 1, requires_grad = False)
mask = nn.functional.grid_sample(mask, vgrid)
# if W==128:
# np.save('mask.npy', mask.cpu().data.numpy())
# np.save('warp.npy', output.cpu().data.numpy())
mask[mask<0.9999] = 0
mask[mask>0] = 1
return output*mask
def forward(self,x, output_more = False):
im1 = x[:,:3,:,:]
im2 = x[:,3:,:,:]
# print("\n\n***************************PWC Net details *************** \n\n")
# start= time.time()
c11 = self.conv1b(self.conv1aa(self.conv1a(im1)))
c21 = self.conv1b(self.conv1aa(self.conv1a(im2)))
c12 = self.conv2b(self.conv2aa(self.conv2a(c11)))
c22 = self.conv2b(self.conv2aa(self.conv2a(c21)))
c13 = self.conv3b(self.conv3aa(self.conv3a(c12)))
c23 = self.conv3b(self.conv3aa(self.conv3a(c22)))
c14 = self.conv4b(self.conv4aa(self.conv4a(c13)))
c24 = self.conv4b(self.conv4aa(self.conv4a(c23)))
c15 = self.conv5b(self.conv5aa(self.conv5a(c14)))
c25 = self.conv5b(self.conv5aa(self.conv5a(c24)))
c16 = self.conv6b(self.conv6a(self.conv6aa(c15)))
c26 = self.conv6b(self.conv6a(self.conv6aa(c25)))
# print("features " +str(time.time()- start))
# start= time.time()
corr6 = self.corr(c16, c26)
corr6 = self.leakyRELU(corr6)
x = torch.cat((self.conv6_0(corr6), corr6),1)
x = torch.cat((self.conv6_1(x), x),1)
x = torch.cat((self.conv6_2(x), x),1)
x = torch.cat((self.conv6_3(x), x),1)
x = torch.cat((self.conv6_4(x), x),1)
flow6 = self.predict_flow6(x)
up_flow6 = self.deconv6(flow6)
up_feat6 = self.upfeat6(x)
# print("level6 " +str(time.time()- start))
# start= time.time()
warp5 = self.warp(c25, up_flow6*0.625)
# print("level5_1 " + str(time.time() - start))
# start5 = time.time()
corr5 = self.corr(c15, warp5)
# print("level5_2 " + str(time.time() - start5))
# start5 = time.time()
corr5 = self.leakyRELU(corr5)
x = torch.cat((corr5, c15, up_flow6, up_feat6), 1)
x = torch.cat((self.conv5_0(x), x),1)
x = torch.cat((self.conv5_1(x), x),1)
x = torch.cat((self.conv5_2(x), x),1)
x = torch.cat((self.conv5_3(x), x),1)
x = torch.cat((self.conv5_4(x), x),1)
flow5 = self.predict_flow5(x)
up_flow5 = self.deconv5(flow5)
up_feat5 = self.upfeat5(x)
# print("level5_3 " + str(time.time() - start5))
# print("level5 " + str(time.time() - start))
# start = time.time()
warp4 = self.warp(c24, up_flow5*1.25)
corr4 = self.corr(c14, warp4)
corr4 = self.leakyRELU(corr4)
x = torch.cat((corr4, c14, up_flow5, up_feat5), 1)
x = torch.cat((self.conv4_0(x), x),1)
x = torch.cat((self.conv4_1(x), x),1)
x = torch.cat((self.conv4_2(x), x),1)
x = torch.cat((self.conv4_3(x), x),1)
x = torch.cat((self.conv4_4(x), x),1)
flow4 = self.predict_flow4(x)
up_flow4 = self.deconv4(flow4)
up_feat4 = self.upfeat4(x)
# print("level4 " + str(time.time() - start))
# start = time.time()
warp3 = self.warp(c23, up_flow4*2.5)
corr3 = self.corr(c13, warp3)
corr3 = self.leakyRELU(corr3)
x = torch.cat((corr3, c13, up_flow4, up_feat4), 1)
x = torch.cat((self.conv3_0(x), x),1)
x = torch.cat((self.conv3_1(x), x),1)
x = torch.cat((self.conv3_2(x), x),1)
x = torch.cat((self.conv3_3(x), x),1)
x = torch.cat((self.conv3_4(x), x),1)
flow3 = self.predict_flow3(x)
up_flow3 = self.deconv3(flow3)
up_feat3 = self.upfeat3(x)
# print("level3 " + str(time.time() - start))
# start = time.time()
warp2 = self.warp(c22, up_flow3*5.0)
corr2 = self.corr(c12, warp2)
corr2 = self.leakyRELU(corr2)
x = torch.cat((corr2, c12, up_flow3, up_feat3), 1)
x = torch.cat((self.conv2_0(x), x),1)
x = torch.cat((self.conv2_1(x), x),1)
x = torch.cat((self.conv2_2(x), x),1)
x = torch.cat((self.conv2_3(x), x),1)
x = torch.cat((self.conv2_4(x), x),1)
flow2 = self.predict_flow2(x)
# print("level2 " + str(time.time() - start))
# start = time.time()
x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x))))
flow2 += self.dc_conv7(self.dc_conv6(self.dc_conv5(x)))
# print("refine " + str(time.time() - start))
# start = time.time()
# we don't have the gt for flow, we just fine tune it on flownets
if not output_more:
return flow2
else:
return [flow2,flow3,flow4,flow5,flow6]
# if self.training:
# return flow2,flow3,flow4,flow5,flow6
# else:
# return flow2
class PWCDCNet_old(nn.Module):
"""
PWC-DC net. add dilation convolution and densenet connections
"""
def __init__(self, md=4):
"""
input: md --- maximum displacement (for correlation. default: 4), after warpping
"""
super(PWCDCNet_old,self).__init__()
self.conv1a = conv(3, 16, kernel_size=3, stride=2)
self.conv1b = conv(16, 16, kernel_size=3, stride=1)
self.conv2a = conv(16, 32, kernel_size=3, stride=2)
self.conv2b = conv(32, 32, kernel_size=3, stride=1)
self.conv3a = conv(32, 64, kernel_size=3, stride=2)
self.conv3b = conv(64, 64, kernel_size=3, stride=1)
self.conv4a = conv(64, 96, kernel_size=3, stride=2)
self.conv4b = conv(96, 96, kernel_size=3, stride=1)
self.conv5a = conv(96, 128, kernel_size=3, stride=2)
self.conv5b = conv(128,128, kernel_size=3, stride=1)
self.conv6a = conv(128,196, kernel_size=3, stride=2)
self.conv6b = conv(196,196, kernel_size=3, stride=1)
self.corr = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1)
self.leakyRELU = nn.LeakyReLU(0.1)
nd = (2*md+1)**2
dd = np.cumsum([128,128,96,64,32])
od = nd
self.conv6_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv6_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv6_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv6_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv6_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow6 = predict_flow(od+dd[4])
self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat6 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+128+4
self.conv5_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv5_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv5_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv5_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv5_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow5 = predict_flow(od+dd[4])
self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat5 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+96+4
self.conv4_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv4_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv4_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv4_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv4_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow4 = predict_flow(od+dd[4])
self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat4 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+64+4
self.conv3_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv3_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv3_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv3_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv3_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow3 = predict_flow(od+dd[4])
self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat3 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+32+4
self.conv2_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv2_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv2_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv2_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv2_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow2 = predict_flow(od+dd[4])
self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.dc_conv1 = conv(od+dd[4], 128, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc_conv2 = conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2)
self.dc_conv3 = conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4)
self.dc_conv4 = conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8)
self.dc_conv5 = conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16)
self.dc_conv6 = conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc_conv7 = predict_flow(32)
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal(m.weight.data, mode='fan_in')
if m.bias is not None:
m.bias.data.zero_()
def warp(self, x, flo):
"""
warp an image/tensor (im2) back to im1, according to the optical flow
x: [B, C, H, W] (im2)
flo: [B, 2, H, W] flow
"""
B, C, H, W = x.size()
# mesh grid
xx = torch.arange(0, W).view(1,-1).repeat(H,1)
yy = torch.arange(0, H).view(-1,1).repeat(1,W)
xx = xx.view(1,1,H,W).repeat(B,1,1,1)
yy = yy.view(1,1,H,W).repeat(B,1,1,1)
grid = torch.cat((xx,yy),1).float()
if x.is_cuda:
grid = grid.cuda()
vgrid = Variable(grid) + flo
# scale grid to [-1,1]
vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:]/max(W-1,1)-1.0
vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:]/max(H-1,1)-1.0
vgrid = vgrid.permute(0,2,3,1)
output = nn.functional.grid_sample(x, vgrid)
mask = torch.autograd.Variable(torch.ones(x.size())).cuda()
mask = nn.functional.grid_sample(mask, vgrid)
mask[mask<0.999] = 0
mask[mask>0] = 1
return output*mask
def forward(self,x):
im1 = x[:,:3,:,:]
im2 = x[:,3:,:,:]
c11 = self.conv1b(self.conv1a(im1))
c21 = self.conv1b(self.conv1a(im2))
c12 = self.conv2b(self.conv2a(c11))
c22 = self.conv2b(self.conv2a(c21))
c13 = self.conv3b(self.conv3a(c12))
c23 = self.conv3b(self.conv3a(c22))
c14 = self.conv4b(self.conv4a(c13))
c24 = self.conv4b(self.conv4a(c23))
c15 = self.conv5b(self.conv5a(c14))
c25 = self.conv5b(self.conv5a(c24))
c16 = self.conv6b(self.conv6a(c15))
c26 = self.conv6b(self.conv6a(c25))
corr6 = self.corr(c16, c26)
corr6 = self.leakyRELU(corr6)
x = torch.cat((corr6, self.conv6_0(corr6)),1)
x = torch.cat((self.conv6_1(x), x),1)
x = torch.cat((x, self.conv6_2(x)),1)
x = torch.cat((x, self.conv6_3(x)),1)
x = torch.cat((x, self.conv6_4(x)),1)
flow6 = self.predict_flow6(x)
up_flow6 = self.deconv6(flow6)
up_feat6 = self.upfeat6(x)
warp5 = self.warp(c25, up_flow6*0.625)
corr5 = self.corr(c15, warp5)
corr5 = self.leakyRELU(corr5)
x = torch.cat((corr5, c15, up_flow6, up_feat6), 1)
x = torch.cat((x, self.conv5_0(x)),1)
x = torch.cat((self.conv5_1(x), x),1)
x = torch.cat((x, self.conv5_2(x)),1)
x = torch.cat((x, self.conv5_3(x)),1)
x = torch.cat((x, self.conv5_4(x)),1)
flow5 = self.predict_flow5(x)
up_flow5 = self.deconv5(flow5)
up_feat5 = self.upfeat5(x)
warp4 = self.warp(c24, up_flow5*1.25)
corr4 = self.corr(c14, warp4)
corr4 = self.leakyRELU(corr4)
x = torch.cat((corr4, c14, up_flow5, up_feat5), 1)
x = torch.cat((x, self.conv4_0(x)),1)
x = torch.cat((self.conv4_1(x), x),1)
x = torch.cat((x, self.conv4_2(x)),1)
x = torch.cat((x, self.conv4_3(x)),1)
x = torch.cat((x, self.conv4_4(x)),1)
flow4 = self.predict_flow4(x)
up_flow4 = self.deconv4(flow4)
up_feat4 = self.upfeat4(x)
warp3 = self.warp(c23, up_flow4*2.5)
corr3 = self.corr(c13, warp3)
corr3 = self.leakyRELU(corr3)
x = torch.cat((corr3, c13, up_flow4, up_feat4), 1)
x = torch.cat((x, self.conv3_0(x)),1)
x = torch.cat((self.conv3_1(x), x),1)
x = torch.cat((x, self.conv3_2(x)),1)
x = torch.cat((x, self.conv3_3(x)),1)
x = torch.cat((x, self.conv3_4(x)),1)
flow3 = self.predict_flow3(x)
up_flow3 = self.deconv3(flow3)
up_feat3 = self.upfeat3(x)
warp2 = self.warp(c22, up_flow3*5.0)
corr2 = self.corr(c12, warp2)
corr2 = self.leakyRELU(corr2)
x = torch.cat((corr2, c12, up_flow3, up_feat3), 1)
x = torch.cat((x, self.conv2_0(x)),1)
x = torch.cat((self.conv2_1(x), x),1)
x = torch.cat((x, self.conv2_2(x)),1)
x = torch.cat((x, self.conv2_3(x)),1)
x = torch.cat((x, self.conv2_4(x)),1)
flow2 = self.predict_flow2(x)
x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x))))
flow2 += self.dc_conv7(self.dc_conv6(self.dc_conv5(x)))
if self.training:
return flow2,flow3,flow4,flow5,flow6
else:
return flow2
def pwc_dc_net(path=None):
model = PWCDCNet()
if path is not None:
data = torch.load(path)
if 'state_dict' in data.keys():
model.load_state_dict(data['state_dict'])
else:
model.load_state_dict(data)
return model
def pwc_dc_net_old(path=None):
model = PWCDCNet_old()
if path is not None:
data = torch.load(path)
if 'state_dict' in data.keys():
model.load_state_dict(data['state_dict'])
else:
model.load_state_dict(data)
return model
================================================
FILE: PWCNet/__init__.py
================================================
from .PWCNet import *
================================================
FILE: PWCNet/correlation_package_pytorch1_0/__init__.py
================================================
================================================
FILE: PWCNet/correlation_package_pytorch1_0/build.sh
================================================
#!/usr/bin/env bash
echo "Need pytorch>=1.0.0"
source activate pytorch1.0.0
export PYTHONPATH=$PYTHONPATH:$(pwd)/../../my_package
rm -rf build *.egg-info dist
python setup.py install
================================================
FILE: PWCNet/correlation_package_pytorch1_0/clean.sh
================================================
#!/usr/bin/env bash
echo "Need pytorch>=1.0.0"
source activate pytorch1.0.0
rm -rf build *.egg-info dist
#python setup.py install
================================================
FILE: PWCNet/correlation_package_pytorch1_0/correlation.py
================================================
import torch
from torch.nn.modules.module import Module
from torch.autograd import Function
import correlation_cuda
class CorrelationFunction(Function):
def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1):
super(CorrelationFunction, self).__init__()
self.pad_size = pad_size
self.kernel_size = kernel_size
self.max_displacement = max_displacement
self.stride1 = stride1
self.stride2 = stride2
self.corr_multiply = corr_multiply
# self.out_channel = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1)
def forward(self, input1, input2):
self.save_for_backward(input1, input2)
with torch.cuda.device_of(input1):
rbot1 = input1.new()
rbot2 = input2.new()
output = input1.new()
correlation_cuda.forward(input1, input2, rbot1, rbot2, output,
self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)
return output
def backward(self, grad_output):
input1, input2 = self.saved_tensors
with torch.cuda.device_of(input1):
rbot1 = input1.new()
rbot2 = input2.new()
grad_input1 = input1.new()
grad_input2 = input2.new()
correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2,
self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)
return grad_input1, grad_input2
class Correlation(Module):
def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1):
super(Correlation, self).__init__()
self.pad_size = pad_size
self.kernel_size = kernel_size
self.max_displacement = max_displacement
self.stride1 = stride1
self.stride2 = stride2
self.corr_multiply = corr_multiply
def forward(self, input1, input2):
result = CorrelationFunction(self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)(input1, input2)
return result
================================================
FILE: PWCNet/correlation_package_pytorch1_0/correlation_cuda.cc
================================================
#include <torch/torch.h>
#include <ATen/ATen.h>
#include <stdio.h>
#include <iostream>
#include <ATen/cuda/CUDAContext.h> //works for 1.0.0
#include "correlation_cuda_kernel.cuh"
int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply)
{
int batchSize = input1.size(0);
int nInputChannels = input1.size(1);
int inputHeight = input1.size(2);
int inputWidth = input1.size(3);
int kernel_radius = (kernel_size - 1) / 2;
int border_radius = kernel_radius + max_displacement;
int paddedInputHeight = inputHeight + 2 * pad_size;
int paddedInputWidth = inputWidth + 2 * pad_size;
int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1);
int outputHeight = ceil(static_cast<float>(paddedInputHeight - 2 * border_radius) / static_cast<float>(stride1));
int outputwidth = ceil(static_cast<float>(paddedInputWidth - 2 * border_radius) / static_cast<float>(stride1));
rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth});
rInput1.fill_(0);
rInput2.fill_(0);
output.fill_(0);
int success = correlation_forward_cuda_kernel(
output,
output.size(0),
output.size(1),
output.size(2),
output.size(3),
output.stride(0),
output.stride(1),
output.stride(2),
output.stride(3),
input1,
input1.size(1),
input1.size(2),
input1.size(3),
input1.stride(0),
input1.stride(1),
input1.stride(2),
input1.stride(3),
input2,
input2.size(1),
input2.stride(0),
input2.stride(1),
input2.stride(2),
input2.stride(3),
rInput1,
rInput2,
pad_size,
kernel_size,
max_displacement,
stride1,
stride2,
corr_type_multiply,
// at::globalContext().getCurrentCUDAStream() //works for 0.4.1
at::cuda::getCurrentCUDAStream() //works for 1.0.0
);
//check for errors
if (!success) {
AT_ERROR("CUDA call failed");
}
return 1;
}
int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput,
at::Tensor& gradInput1, at::Tensor& gradInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply)
{
int batchSize = input1.size(0);
int nInputChannels = input1.size(1);
int paddedInputHeight = input1.size(2)+ 2 * pad_size;
int paddedInputWidth = input1.size(3)+ 2 * pad_size;
int height = input1.size(2);
int width = input1.size(3);
rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels});
gradInput1.resize_({batchSize, nInputChannels, height, width});
gradInput2.resize_({batchSize, nInputChannels, height, width});
rInput1.fill_(0);
rInput2.fill_(0);
gradInput1.fill_(0);
gradInput2.fill_(0);
int success = correlation_backward_cuda_kernel(gradOutput,
gradOutput.size(0),
gradOutput.size(1),
gradOutput.size(2),
gradOutput.size(3),
gradOutput.stride(0),
gradOutput.stride(1),
gradOutput.stride(2),
gradOutput.stride(3),
input1,
input1.size(1),
input1.size(2),
input1.size(3),
input1.stride(0),
input1.stride(1),
input1.stride(2),
input1.stride(3),
input2,
input2.stride(0),
input2.stride(1),
input2.stride(2),
input2.stride(3),
gradInput1,
gradInput1.stride(0),
gradInput1.stride(1),
gradInput1.stride(2),
gradInput1.stride(3),
gradInput2,
gradInput2.size(1),
gradInput2.stride(0),
gradInput2.stride(1),
gradInput2.stride(2),
gradInput2.stride(3),
rInput1,
rInput2,
pad_size,
kernel_size,
max_displacement,
stride1,
stride2,
corr_type_multiply,
// at::globalContext().getCurrentCUDAStream() //works for 0.4.1
at::cuda::getCurrentCUDAStream() //works for 1.0.0
);
if (!success) {
AT_ERROR("CUDA call failed");
}
return 1;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)");
m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)");
}
================================================
FILE: PWCNet/correlation_package_pytorch1_0/correlation_cuda_kernel.cu
================================================
#include <stdio.h>
#include "correlation_cuda_kernel.cuh"
#define CUDA_NUM_THREADS 1024
#define THREADS_PER_BLOCK 32
#define FULL_MASK 0xffffffff
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
#include <ATen/Dispatch.h>
#include <ATen/cuda/CUDAApplyUtils.cuh>
using at::Half;
template<typename scalar_t>
__forceinline__ __device__ scalar_t warpReduceSum(scalar_t val) {
for (int offset = 16; offset > 0; offset /= 2)
val += __shfl_down_sync(FULL_MASK, val, offset);
return val;
}
template<typename scalar_t>
__forceinline__ __device__ scalar_t blockReduceSum(scalar_t val) {
static __shared__ scalar_t shared[32];
int lane = threadIdx.x % warpSize;
int wid = threadIdx.x / warpSize;
val = warpReduceSum(val);
if (lane == 0)
shared[wid] = val;
__syncthreads();
val = (threadIdx.x < blockDim.x / warpSize) ? shared[lane] : 0;
if (wid == 0)
val = warpReduceSum(val);
return val;
}
template <typename scalar_t>
__global__ void channels_first(const scalar_t* __restrict__ input, scalar_t* rinput, int channels, int height, int width, int pad_size)
{
// n (batch size), c (num of channels), y (height), x (width)
int n = blockIdx.x;
int y = blockIdx.y;
int x = blockIdx.z;
int ch_off = threadIdx.x;
scalar_t value;
int dimcyx = channels * height * width;
int dimyx = height * width;
int p_dimx = (width + 2 * pad_size);
int p_dimy = (height + 2 * pad_size);
int p_dimyxc = channels * p_dimy * p_dimx;
int p_dimxc = p_dimx * channels;
for (int c = ch_off; c < channels; c += THREADS_PER_BLOCK) {
value = input[n * dimcyx + c * dimyx + y * width + x];
rinput[n * p_dimyxc + (y + pad_size) * p_dimxc + (x + pad_size) * channels + c] = value;
}
}
template<typename scalar_t>
__global__ void correlation_forward(scalar_t* __restrict__ output, const int nOutputChannels,
const int outputHeight, const int outputWidth, const scalar_t* __restrict__ rInput1,
const int nInputChannels, const int inputHeight, const int inputWidth,
const scalar_t* __restrict__ rInput2, const int pad_size, const int kernel_size,
const int max_displacement, const int stride1, const int stride2) {
int32_t pInputWidth = inputWidth + 2 * pad_size;
int32_t pInputHeight = inputHeight + 2 * pad_size;
int32_t kernel_rad = (kernel_size - 1) / 2;
int32_t displacement_rad = max_displacement / stride2;
int32_t displacement_size = 2 * displacement_rad + 1;
int32_t n = blockIdx.x;
int32_t y1 = blockIdx.y * stride1 + max_displacement;
int32_t x1 = blockIdx.z * stride1 + max_displacement;
int32_t c = threadIdx.x;
int32_t pdimyxc = pInputHeight * pInputWidth * nInputChannels;
int32_t pdimxc = pInputWidth * nInputChannels;
int32_t pdimc = nInputChannels;
int32_t tdimcyx = nOutputChannels * outputHeight * outputWidth;
int32_t tdimyx = outputHeight * outputWidth;
int32_t tdimx = outputWidth;
int32_t nelems = kernel_size * kernel_size * pdimc;
// element-wise product along channel axis
for (int tj = -displacement_rad; tj <= displacement_rad; ++tj) {
for (int ti = -displacement_rad; ti <= displacement_rad; ++ti) {
int x2 = x1 + ti * stride2;
int y2 = y1 + tj * stride2;
float acc0 = 0.0f;
for (int j = -kernel_rad; j <= kernel_rad; ++j) {
for (int i = -kernel_rad; i <= kernel_rad; ++i) {
// THREADS_PER_BLOCK
#pragma unroll
for (int ch = c; ch < pdimc; ch += blockDim.x) {
int indx1 = n * pdimyxc + (y1 + j) * pdimxc
+ (x1 + i) * pdimc + ch;
int indx2 = n * pdimyxc + (y2 + j) * pdimxc
+ (x2 + i) * pdimc + ch;
acc0 += static_cast<float>(rInput1[indx1] * rInput2[indx2]);
}
}
}
if (blockDim.x == warpSize) {
__syncwarp();
acc0 = warpReduceSum(acc0);
} else {
__syncthreads();
acc0 = blockReduceSum(acc0);
}
if (threadIdx.x == 0) {
int tc = (tj + displacement_rad) * displacement_size
+ (ti + displacement_rad);
const int tindx = n * tdimcyx + tc * tdimyx + blockIdx.y * tdimx
+ blockIdx.z;
output[tindx] = static_cast<scalar_t>(acc0 / nelems);
}
}
}
}
template <typename scalar_t>
__global__ void correlation_backward_input1(int item, scalar_t* gradInput1, int nInputChannels, int inputHeight, int inputWidth,
const scalar_t* __restrict__ gradOutput, int nOutputChannels, int outputHeight, int outputWidth,
const scalar_t* __restrict__ rInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2)
{
// n (batch size), c (num of channels), y (height), x (width)
int n = item;
int y = blockIdx.x * stride1 + pad_size;
int x = blockIdx.y * stride1 + pad_size;
int c = blockIdx.z;
int tch_off = threadIdx.x;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int xmin = (x - kernel_rad - max_displacement) / stride1;
int ymin = (y - kernel_rad - max_displacement) / stride1;
int xmax = (x + kernel_rad - max_displacement) / stride1;
int ymax = (y + kernel_rad - max_displacement) / stride1;
if (xmax < 0 || ymax < 0 || xmin >= outputWidth || ymin >= outputHeight) {
// assumes gradInput1 is pre-allocated and zero filled
return;
}
if (xmin > xmax || ymin > ymax) {
// assumes gradInput1 is pre-allocated and zero filled
return;
}
xmin = max(0,xmin);
xmax = min(outputWidth-1,xmax);
ymin = max(0,ymin);
ymax = min(outputHeight-1,ymax);
int pInputWidth = inputWidth + 2 * pad_size;
int pInputHeight = inputHeight + 2 * pad_size;
int pdimyxc = pInputHeight * pInputWidth * nInputChannels;
int pdimxc = pInputWidth * nInputChannels;
int pdimc = nInputChannels;
int tdimcyx = nOutputChannels * outputHeight * outputWidth;
int tdimyx = outputHeight * outputWidth;
int tdimx = outputWidth;
int odimcyx = nInputChannels * inputHeight* inputWidth;
int odimyx = inputHeight * inputWidth;
int odimx = inputWidth;
scalar_t nelems = kernel_size * kernel_size * nInputChannels;
__shared__ scalar_t prod_sum[THREADS_PER_BLOCK];
prod_sum[tch_off] = 0;
for (int tc = tch_off; tc < nOutputChannels; tc += THREADS_PER_BLOCK) {
int i2 = (tc % displacement_size - displacement_rad) * stride2;
int j2 = (tc / displacement_size - displacement_rad) * stride2;
int indx2 = n * pdimyxc + (y + j2)* pdimxc + (x + i2) * pdimc + c;
scalar_t val2 = rInput2[indx2];
for (int j = ymin; j <= ymax; ++j) {
for (int i = xmin; i <= xmax; ++i) {
int tindx = n * tdimcyx + tc * tdimyx + j * tdimx + i;
prod_sum[tch_off] += gradOutput[tindx] * val2;
}
}
}
__syncthreads();
if(tch_off == 0) {
scalar_t reduce_sum = 0;
for(int idx = 0; idx < THREADS_PER_BLOCK; idx++) {
reduce_sum += prod_sum[idx];
}
const int indx1 = n * odimcyx + c * odimyx + (y - pad_size) * odimx + (x - pad_size);
gradInput1[indx1] = reduce_sum / nelems;
}
}
template <typename scalar_t>
__global__ void correlation_backward_input2(int item, scalar_t* gradInput2, int nInputChannels, int inputHeight, int inputWidth,
const scalar_t* __restrict__ gradOutput, int nOutputChannels, int outputHeight, int outputWidth,
const scalar_t* __restrict__ rInput1,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2)
{
// n (batch size), c (num of channels), y (height), x (width)
int n = item;
int y = blockIdx.x * stride1 + pad_size;
int x = blockIdx.y * stride1 + pad_size;
int c = blockIdx.z;
int tch_off = threadIdx.x;
int kernel_rad = (kernel_size - 1) / 2;
int displacement_rad = max_displacement / stride2;
int displacement_size = 2 * displacement_rad + 1;
int pInputWidth = inputWidth + 2 * pad_size;
int pInputHeight = inputHeight + 2 * pad_size;
int pdimyxc = pInputHeight * pInputWidth * nInputChannels;
int pdimxc = pInputWidth * nInputChannels;
int pdimc = nInputChannels;
int tdimcyx = nOutputChannels * outputHeight * outputWidth;
int tdimyx = outputHeight * outputWidth;
int tdimx = outputWidth;
int odimcyx = nInputChannels * inputHeight* inputWidth;
int odimyx = inputHeight * inputWidth;
int odimx = inputWidth;
scalar_t nelems = kernel_size * kernel_size * nInputChannels;
__shared__ scalar_t prod_sum[THREADS_PER_BLOCK];
prod_sum[tch_off] = 0;
for (int tc = tch_off; tc < nOutputChannels; tc += THREADS_PER_BLOCK) {
int i2 = (tc % displacement_size - displacement_rad) * stride2;
int j2 = (tc / displacement_size - displacement_rad) * stride2;
int xmin = (x - kernel_rad - max_displacement - i2) / stride1;
int ymin = (y - kernel_rad - max_displacement - j2) / stride1;
int xmax = (x + kernel_rad - max_displacement - i2) / stride1;
int ymax = (y + kernel_rad - max_displacement - j2) / stride1;
if (xmax < 0 || ymax < 0 || xmin >= outputWidth || ymin >= outputHeight) {
// assumes gradInput2 is pre-allocated and zero filled
continue;
}
if (xmin > xmax || ymin > ymax) {
// assumes gradInput2 is pre-allocated and zero filled
continue;
}
xmin = max(0,xmin);
xmax = min(outputWidth-1,xmax);
ymin = max(0,ymin);
ymax = min(outputHeight-1,ymax);
int indx1 = n * pdimyxc + (y - j2)* pdimxc + (x - i2) * pdimc + c;
scalar_t val1 = rInput1[indx1];
for (int j = ymin; j <= ymax; ++j) {
for (int i = xmin; i <= xmax; ++i) {
int tindx = n * tdimcyx + tc * tdimyx + j * tdimx + i;
prod_sum[tch_off] += gradOutput[tindx] * val1;
}
}
}
__syncthreads();
if(tch_off == 0) {
scalar_t reduce_sum = 0;
for(int idx = 0; idx < THREADS_PER_BLOCK; idx++) {
reduce_sum += prod_sum[idx];
}
const int indx2 = n * odimcyx + c * odimyx + (y - pad_size) * odimx + (x - pad_size);
gradInput2[indx2] = reduce_sum / nelems;
}
}
int correlation_forward_cuda_kernel(at::Tensor& output,
int ob,
int oc,
int oh,
int ow,
int osb,
int osc,
int osh,
int osw,
at::Tensor& input1,
int ic,
int ih,
int iw,
int isb,
int isc,
int ish,
int isw,
at::Tensor& input2,
int gc,
int gsb,
int gsc,
int gsh,
int gsw,
at::Tensor& rInput1,
at::Tensor& rInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply,
cudaStream_t stream)
{
int batchSize = ob;
int nInputChannels = ic;
int inputWidth = iw;
int inputHeight = ih;
int nOutputChannels = oc;
int outputWidth = ow;
int outputHeight = oh;
dim3 blocks_grid(batchSize, inputHeight, inputWidth);
dim3 threads_block(THREADS_PER_BLOCK);
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channels_first_fwd_1", ([&] {
channels_first<scalar_t><<<blocks_grid,threads_block, 0, stream>>>(
input1.data<scalar_t>(), rInput1.data<scalar_t>(), nInputChannels, inputHeight, inputWidth, pad_size);
}));
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "channels_first_fwd_2", ([&] {
channels_first<scalar_t><<<blocks_grid,threads_block, 0, stream>>> (
input2.data<scalar_t>(), rInput2.data<scalar_t>(), nInputChannels, inputHeight, inputWidth, pad_size);
}));
dim3 threadsPerBlock(THREADS_PER_BLOCK);
dim3 totalBlocksCorr(batchSize, outputHeight, outputWidth);
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "correlation_forward", ([&] {
correlation_forward<scalar_t><<<totalBlocksCorr, threadsPerBlock, 0, stream>>>
(output.data<scalar_t>(), nOutputChannels, outputHeight, outputWidth,
rInput1.data<scalar_t>(), nInputChannels, inputHeight, inputWidth,
rInput2.data<scalar_t>(),
pad_size,
kernel_size,
max_displacement,
stride1,
stride2);
}));
cudaError_t err = cudaGetLastError();
// check for errors
if (err != cudaSuccess) {
printf("error in correlation_forward_cuda_kernel: %s\n", cudaGetErrorString(err));
return 0;
}
return 1;
}
int correlation_backward_cuda_kernel(
at::Tensor& gradOutput,
int gob,
int goc,
int goh,
int gow,
int gosb,
int gosc,
int gosh,
int gosw,
at::Tensor& input1,
int ic,
int ih,
int iw,
int isb,
int isc,
int ish,
int isw,
at::Tensor& input2,
int gsb,
int gsc,
int gsh,
int gsw,
at::Tensor& gradInput1,
int gisb,
int gisc,
int gish,
int gisw,
at::Tensor& gradInput2,
int ggc,
int ggsb,
int ggsc,
int ggsh,
int ggsw,
at::Tensor& rInput1,
at::Tensor& rInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply,
cudaStream_t stream)
{
int batchSize = gob;
int num = batchSize;
int nInputChannels = ic;
int inputWidth = iw;
int inputHeight = ih;
int nOutputChannels = goc;
int outputWidth = gow;
int outputHeight = goh;
dim3 blocks_grid(batchSize, inputHeight, inputWidth);
dim3 threads_block(THREADS_PER_BLOCK);
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "lltm_forward_cuda", ([&] {
channels_first<scalar_t><<<blocks_grid, threads_block, 0, stream>>>(
input1.data<scalar_t>(),
rInput1.data<scalar_t>(),
nInputChannels,
inputHeight,
inputWidth,
pad_size
);
}));
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "lltm_forward_cuda", ([&] {
channels_first<scalar_t><<<blocks_grid, threads_block, 0, stream>>>(
input2.data<scalar_t>(),
rInput2.data<scalar_t>(),
nInputChannels,
inputHeight,
inputWidth,
pad_size
);
}));
dim3 threadsPerBlock(THREADS_PER_BLOCK);
dim3 totalBlocksCorr(inputHeight, inputWidth, nInputChannels);
for (int n = 0; n < num; ++n) {
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input2.type(), "lltm_forward_cuda", ([&] {
correlation_backward_input1<scalar_t><<<totalBlocksCorr, threadsPerBlock, 0, stream>>> (
n, gradInput1.data<scalar_t>(), nInputChannels, inputHeight, inputWidth,
gradOutput.data<scalar_t>(), nOutputChannels, outputHeight, outputWidth,
rInput2.data<scalar_t>(),
pad_size,
kernel_size,
max_displacement,
stride1,
stride2);
}));
}
for(int n = 0; n < batchSize; n++) {
AT_DISPATCH_FLOATING_TYPES_AND_HALF(rInput1.type(), "lltm_forward_cuda", ([&] {
correlation_backward_input2<scalar_t><<<totalBlocksCorr, threadsPerBlock, 0, stream>>>(
n, gradInput2.data<scalar_t>(), nInputChannels, inputHeight, inputWidth,
gradOutput.data<scalar_t>(), nOutputChannels, outputHeight, outputWidth,
rInput1.data<scalar_t>(),
pad_size,
kernel_size,
max_displacement,
stride1,
stride2);
}));
}
// check for errors
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in correlation_backward_cuda_kernel: %s\n", cudaGetErrorString(err));
return 0;
}
return 1;
}
================================================
FILE: PWCNet/correlation_package_pytorch1_0/correlation_cuda_kernel.cuh
================================================
#pragma once
#include <ATen/ATen.h>
#include <ATen/Context.h>
#include <cuda_runtime.h>
int correlation_forward_cuda_kernel(at::Tensor& output,
int ob,
int oc,
int oh,
int ow,
int osb,
int osc,
int osh,
int osw,
at::Tensor& input1,
int ic,
int ih,
int iw,
int isb,
int isc,
int ish,
int isw,
at::Tensor& input2,
int gc,
int gsb,
int gsc,
int gsh,
int gsw,
at::Tensor& rInput1,
at::Tensor& rInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply,
cudaStream_t stream);
int correlation_backward_cuda_kernel(
at::Tensor& gradOutput,
int gob,
int goc,
int goh,
int gow,
int gosb,
int gosc,
int gosh,
int gosw,
at::Tensor& input1,
int ic,
int ih,
int iw,
int isb,
int isc,
int ish,
int isw,
at::Tensor& input2,
int gsb,
int gsc,
int gsh,
int gsw,
at::Tensor& gradInput1,
int gisb,
int gisc,
int gish,
int gisw,
at::Tensor& gradInput2,
int ggc,
int ggsb,
int ggsc,
int ggsh,
int ggsw,
at::Tensor& rInput1,
at::Tensor& rInput2,
int pad_size,
int kernel_size,
int max_displacement,
int stride1,
int stride2,
int corr_type_multiply,
cudaStream_t stream);
================================================
FILE: PWCNet/correlation_package_pytorch1_0/setup.py
================================================
#!/usr/bin/env python3
import os
import torch
from setuptools import setup, find_packages
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
from compiler_args import nvcc_args, cxx_args
setup(
name='correlation_cuda',
ext_modules=[
CUDAExtension('correlation_cuda', [
'correlation_cuda.cc',
'correlation_cuda_kernel.cu'
], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
],
cmdclass={
'build_ext': BuildExtension
})
================================================
FILE: PWCNet/models/PWCNet.py
================================================
"""
implementation of the PWC-DC network for optical flow estimation by Sun et al., 2018
Jinwei Gu and Zhile Ren
"""
import torch
import torch.nn as nn
from torch.autograd import Variable
import os
os.environ['PYTHON_EGG_CACHE'] = 'tmp/' # a writable directory
from correlation_package.modules.corr import Correlation
import numpy as np
__all__ = [
'pwc_dc_net', 'pwc_dc_net_old'
]
def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):
return nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
padding=padding, dilation=dilation, bias=True),
nn.LeakyReLU(0.1))
def predict_flow(in_planes):
return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=True)
def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
return nn.ConvTranspose2d(in_planes, out_planes, kernel_size, stride, padding, bias=True)
class PWCDCNet(nn.Module):
"""
PWC-DC net. add dilation convolution and densenet connections
"""
def __init__(self, md=4):
"""
input: md --- maximum displacement (for correlation. default: 4), after warpping
"""
super(PWCDCNet,self).__init__()
self.conv1a = conv(3, 16, kernel_size=3, stride=2)
self.conv1aa = conv(16, 16, kernel_size=3, stride=1)
self.conv1b = conv(16, 16, kernel_size=3, stride=1)
self.conv2a = conv(16, 32, kernel_size=3, stride=2)
self.conv2aa = conv(32, 32, kernel_size=3, stride=1)
self.conv2b = conv(32, 32, kernel_size=3, stride=1)
self.conv3a = conv(32, 64, kernel_size=3, stride=2)
self.conv3aa = conv(64, 64, kernel_size=3, stride=1)
self.conv3b = conv(64, 64, kernel_size=3, stride=1)
self.conv4a = conv(64, 96, kernel_size=3, stride=2)
self.conv4aa = conv(96, 96, kernel_size=3, stride=1)
self.conv4b = conv(96, 96, kernel_size=3, stride=1)
self.conv5a = conv(96, 128, kernel_size=3, stride=2)
self.conv5aa = conv(128,128, kernel_size=3, stride=1)
self.conv5b = conv(128,128, kernel_size=3, stride=1)
self.conv6aa = conv(128,196, kernel_size=3, stride=2)
self.conv6a = conv(196,196, kernel_size=3, stride=1)
self.conv6b = conv(196,196, kernel_size=3, stride=1)
self.corr = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1)
self.leakyRELU = nn.LeakyReLU(0.1)
nd = (2*md+1)**2
dd = np.cumsum([128,128,96,64,32],dtype=np.int32).astype(np.int)
dd = [int(d) for d in dd]
od = nd
self.conv6_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv6_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv6_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv6_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv6_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow6 = predict_flow(od+dd[4])
self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat6 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+128+4
self.conv5_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv5_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv5_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv5_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv5_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow5 = predict_flow(od+dd[4])
self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat5 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+96+4
self.conv4_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv4_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv4_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv4_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv4_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow4 = predict_flow(od+dd[4])
self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat4 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+64+4
self.conv3_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv3_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv3_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv3_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv3_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow3 = predict_flow(od+dd[4])
self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat3 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+32+4
self.conv2_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv2_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv2_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv2_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv2_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow2 = predict_flow(od+dd[4])
self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.dc_conv1 = conv(od+dd[4], 128, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc_conv2 = conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2)
self.dc_conv3 = conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4)
self.dc_conv4 = conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8)
self.dc_conv5 = conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16)
self.dc_conv6 = conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc_conv7 = predict_flow(32)
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal(m.weight.data, mode='fan_in')
if m.bias is not None:
m.bias.data.zero_()
def warp(self, x, flo):
"""
warp an image/tensor (im2) back to im1, according to the optical flow
x: [B, C, H, W] (im2)
flo: [B, 2, H, W] flow
"""
B, C, H, W = x.size()
# mesh grid
xx = torch.arange(0, W).view(1,-1).repeat(H,1)
yy = torch.arange(0, H).view(-1,1).repeat(1,W)
xx = xx.view(1,1,H,W).repeat(B,1,1,1)
yy = yy.view(1,1,H,W).repeat(B,1,1,1)
grid = torch.cat((xx,yy),1).float()
if x.is_cuda:
grid = grid.cuda()
vgrid = Variable(grid) + flo
# scale grid to [-1,1]
vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:]/max(W-1,1)-1.0
vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:]/max(H-1,1)-1.0
vgrid = vgrid.permute(0,2,3,1)
output = nn.functional.grid_sample(x, vgrid)
mask = torch.autograd.Variable(torch.ones(x.size())).cuda()
mask = nn.functional.grid_sample(mask, vgrid)
# if W==128:
# np.save('mask.npy', mask.cpu().data.numpy())
# np.save('warp.npy', output.cpu().data.numpy())
mask[mask<0.9999] = 0
mask[mask>0] = 1
return output*mask
def forward(self,x):
im1 = x[:,:3,:,:]
im2 = x[:,3:,:,:]
c11 = self.conv1b(self.conv1aa(self.conv1a(im1)))
c21 = self.conv1b(self.conv1aa(self.conv1a(im2)))
c12 = self.conv2b(self.conv2aa(self.conv2a(c11)))
c22 = self.conv2b(self.conv2aa(self.conv2a(c21)))
c13 = self.conv3b(self.conv3aa(self.conv3a(c12)))
c23 = self.conv3b(self.conv3aa(self.conv3a(c22)))
c14 = self.conv4b(self.conv4aa(self.conv4a(c13)))
c24 = self.conv4b(self.conv4aa(self.conv4a(c23)))
c15 = self.conv5b(self.conv5aa(self.conv5a(c14)))
c25 = self.conv5b(self.conv5aa(self.conv5a(c24)))
c16 = self.conv6b(self.conv6a(self.conv6aa(c15)))
c26 = self.conv6b(self.conv6a(self.conv6aa(c25)))
corr6 = self.corr(c16, c26)
corr6 = self.leakyRELU(corr6)
x = torch.cat((self.conv6_0(corr6), corr6),1)
x = torch.cat((self.conv6_1(x), x),1)
x = torch.cat((self.conv6_2(x), x),1)
x = torch.cat((self.conv6_3(x), x),1)
x = torch.cat((self.conv6_4(x), x),1)
flow6 = self.predict_flow6(x)
up_flow6 = self.deconv6(flow6)
up_feat6 = self.upfeat6(x)
warp5 = self.warp(c25, up_flow6*0.625)
corr5 = self.corr(c15, warp5)
corr5 = self.leakyRELU(corr5)
x = torch.cat((corr5, c15, up_flow6, up_feat6), 1)
x = torch.cat((self.conv5_0(x), x),1)
x = torch.cat((self.conv5_1(x), x),1)
x = torch.cat((self.conv5_2(x), x),1)
x = torch.cat((self.conv5_3(x), x),1)
x = torch.cat((self.conv5_4(x), x),1)
flow5 = self.predict_flow5(x)
up_flow5 = self.deconv5(flow5)
up_feat5 = self.upfeat5(x)
warp4 = self.warp(c24, up_flow5*1.25)
corr4 = self.corr(c14, warp4)
corr4 = self.leakyRELU(corr4)
x = torch.cat((corr4, c14, up_flow5, up_feat5), 1)
x = torch.cat((self.conv4_0(x), x),1)
x = torch.cat((self.conv4_1(x), x),1)
x = torch.cat((self.conv4_2(x), x),1)
x = torch.cat((self.conv4_3(x), x),1)
x = torch.cat((self.conv4_4(x), x),1)
flow4 = self.predict_flow4(x)
up_flow4 = self.deconv4(flow4)
up_feat4 = self.upfeat4(x)
warp3 = self.warp(c23, up_flow4*2.5)
corr3 = self.corr(c13, warp3)
corr3 = self.leakyRELU(corr3)
x = torch.cat((corr3, c13, up_flow4, up_feat4), 1)
x = torch.cat((self.conv3_0(x), x),1)
x = torch.cat((self.conv3_1(x), x),1)
x = torch.cat((self.conv3_2(x), x),1)
x = torch.cat((self.conv3_3(x), x),1)
x = torch.cat((self.conv3_4(x), x),1)
flow3 = self.predict_flow3(x)
up_flow3 = self.deconv3(flow3)
up_feat3 = self.upfeat3(x)
warp2 = self.warp(c22, up_flow3*5.0)
corr2 = self.corr(c12, warp2)
corr2 = self.leakyRELU(corr2)
x = torch.cat((corr2, c12, up_flow3, up_feat3), 1)
x = torch.cat((self.conv2_0(x), x),1)
x = torch.cat((self.conv2_1(x), x),1)
x = torch.cat((self.conv2_2(x), x),1)
x = torch.cat((self.conv2_3(x), x),1)
x = torch.cat((self.conv2_4(x), x),1)
flow2 = self.predict_flow2(x)
x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x))))
flow2 += self.dc_conv7(self.dc_conv6(self.dc_conv5(x)))
if self.training:
return flow2,flow3,flow4,flow5,flow6
else:
return flow2
class PWCDCNet_old(nn.Module):
"""
PWC-DC net. add dilation convolution and densenet connections
"""
def __init__(self, md=4):
"""
input: md --- maximum displacement (for correlation. default: 4), after warpping
"""
super(PWCDCNet_old,self).__init__()
self.conv1a = conv(3, 16, kernel_size=3, stride=2)
self.conv1b = conv(16, 16, kernel_size=3, stride=1)
self.conv2a = conv(16, 32, kernel_size=3, stride=2)
self.conv2b = conv(32, 32, kernel_size=3, stride=1)
self.conv3a = conv(32, 64, kernel_size=3, stride=2)
self.conv3b = conv(64, 64, kernel_size=3, stride=1)
self.conv4a = conv(64, 96, kernel_size=3, stride=2)
self.conv4b = conv(96, 96, kernel_size=3, stride=1)
self.conv5a = conv(96, 128, kernel_size=3, stride=2)
self.conv5b = conv(128,128, kernel_size=3, stride=1)
self.conv6a = conv(128,196, kernel_size=3, stride=2)
self.conv6b = conv(196,196, kernel_size=3, stride=1)
self.corr = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1)
self.leakyRELU = nn.LeakyReLU(0.1)
nd = (2*md+1)**2
dd = np.cumsum([128,128,96,64,32])
od = nd
self.conv6_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv6_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv6_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv6_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv6_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow6 = predict_flow(od+dd[4])
self.deconv6 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat6 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+128+4
self.conv5_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv5_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv5_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv5_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv5_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow5 = predict_flow(od+dd[4])
self.deconv5 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat5 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+96+4
self.conv4_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv4_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv4_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv4_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv4_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow4 = predict_flow(od+dd[4])
self.deconv4 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat4 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+64+4
self.conv3_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv3_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv3_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv3_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv3_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow3 = predict_flow(od+dd[4])
self.deconv3 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.upfeat3 = deconv(od+dd[4], 2, kernel_size=4, stride=2, padding=1)
od = nd+32+4
self.conv2_0 = conv(od, 128, kernel_size=3, stride=1)
self.conv2_1 = conv(od+dd[0],128, kernel_size=3, stride=1)
self.conv2_2 = conv(od+dd[1],96, kernel_size=3, stride=1)
self.conv2_3 = conv(od+dd[2],64, kernel_size=3, stride=1)
self.conv2_4 = conv(od+dd[3],32, kernel_size=3, stride=1)
self.predict_flow2 = predict_flow(od+dd[4])
self.deconv2 = deconv(2, 2, kernel_size=4, stride=2, padding=1)
self.dc_conv1 = conv(od+dd[4], 128, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc_conv2 = conv(128, 128, kernel_size=3, stride=1, padding=2, dilation=2)
self.dc_conv3 = conv(128, 128, kernel_size=3, stride=1, padding=4, dilation=4)
self.dc_conv4 = conv(128, 96, kernel_size=3, stride=1, padding=8, dilation=8)
self.dc_conv5 = conv(96, 64, kernel_size=3, stride=1, padding=16, dilation=16)
self.dc_conv6 = conv(64, 32, kernel_size=3, stride=1, padding=1, dilation=1)
self.dc_conv7 = predict_flow(32)
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal(m.weight.data, mode='fan_in')
if m.bias is not None:
m.bias.data.zero_()
def warp(self, x, flo):
"""
warp an image/tensor (im2) back to im1, according to the optical flow
x: [B, C, H, W] (im2)
flo: [B, 2, H, W] flow
"""
B, C, H, W = x.size()
# mesh grid
xx = torch.arange(0, W).view(1,-1).repeat(H,1)
yy = torch.arange(0, H).view(-1,1).repeat(1,W)
xx = xx.view(1,1,H,W).repeat(B,1,1,1)
yy = yy.view(1,1,H,W).repeat(B,1,1,1)
grid = torch.cat((xx,yy),1).float()
if x.is_cuda:
grid = grid.cuda()
vgrid = Variable(grid) + flo
# scale grid to [-1,1]
vgrid[:,0,:,:] = 2.0*vgrid[:,0,:,:]/max(W-1,1)-1.0
vgrid[:,1,:,:] = 2.0*vgrid[:,1,:,:]/max(H-1,1)-1.0
vgrid = vgrid.permute(0,2,3,1)
output = nn.functional.grid_sample(x, vgrid)
mask = torch.autograd.Variable(torch.ones(x.size())).cuda()
mask = nn.functional.grid_sample(mask, vgrid)
mask[mask<0.999] = 0
mask[mask>0] = 1
return output*mask
def forward(self,x):
im1 = x[:,:3,:,:]
im2 = x[:,3:,:,:]
c11 = self.conv1b(self.conv1a(im1))
c21 = self.conv1b(self.conv1a(im2))
c12 = self.conv2b(self.conv2a(c11))
c22 = self.conv2b(self.conv2a(c21))
c13 = self.conv3b(self.conv3a(c12))
c23 = self.conv3b(self.conv3a(c22))
c14 = self.conv4b(self.conv4a(c13))
c24 = self.conv4b(self.conv4a(c23))
c15 = self.conv5b(self.conv5a(c14))
c25 = self.conv5b(self.conv5a(c24))
c16 = self.conv6b(self.conv6a(c15))
c26 = self.conv6b(self.conv6a(c25))
corr6 = self.corr(c16, c26)
corr6 = self.leakyRELU(corr6)
x = torch.cat((corr6, self.conv6_0(corr6)),1)
x = torch.cat((self.conv6_1(x), x),1)
x = torch.cat((x, self.conv6_2(x)),1)
x = torch.cat((x, self.conv6_3(x)),1)
x = torch.cat((x, self.conv6_4(x)),1)
flow6 = self.predict_flow6(x)
up_flow6 = self.deconv6(flow6)
up_feat6 = self.upfeat6(x)
warp5 = self.warp(c25, up_flow6*0.625)
corr5 = self.corr(c15, warp5)
corr5 = self.leakyRELU(corr5)
x = torch.cat((corr5, c15, up_flow6, up_feat6), 1)
x = torch.cat((x, self.conv5_0(x)),1)
x = torch.cat((self.conv5_1(x), x),1)
x = torch.cat((x, self.conv5_2(x)),1)
x = torch.cat((x, self.conv5_3(x)),1)
x = torch.cat((x, self.conv5_4(x)),1)
flow5 = self.predict_flow5(x)
up_flow5 = self.deconv5(flow5)
up_feat5 = self.upfeat5(x)
warp4 = self.warp(c24, up_flow5*1.25)
corr4 = self.corr(c14, warp4)
corr4 = self.leakyRELU(corr4)
x = torch.cat((corr4, c14, up_flow5, up_feat5), 1)
x = torch.cat((x, self.conv4_0(x)),1)
x = torch.cat((self.conv4_1(x), x),1)
x = torch.cat((x, self.conv4_2(x)),1)
x = torch.cat((x, self.conv4_3(x)),1)
x = torch.cat((x, self.conv4_4(x)),1)
flow4 = self.predict_flow4(x)
up_flow4 = self.deconv4(flow4)
up_feat4 = self.upfeat4(x)
warp3 = self.warp(c23, up_flow4*2.5)
corr3 = self.corr(c13, warp3)
corr3 = self.leakyRELU(corr3)
x = torch.cat((corr3, c13, up_flow4, up_feat4), 1)
x = torch.cat((x, self.conv3_0(x)),1)
x = torch.cat((self.conv3_1(x), x),1)
x = torch.cat((x, self.conv3_2(x)),1)
x = torch.cat((x, self.conv3_3(x)),1)
x = torch.cat((x, self.conv3_4(x)),1)
flow3 = self.predict_flow3(x)
up_flow3 = self.deconv3(flow3)
up_feat3 = self.upfeat3(x)
warp2 = self.warp(c22, up_flow3*5.0)
corr2 = self.corr(c12, warp2)
corr2 = self.leakyRELU(corr2)
x = torch.cat((corr2, c12, up_flow3, up_feat3), 1)
x = torch.cat((x, self.conv2_0(x)),1)
x = torch.cat((self.conv2_1(x), x),1)
x = torch.cat((x, self.conv2_2(x)),1)
x = torch.cat((x, self.conv2_3(x)),1)
x = torch.cat((x, self.conv2_4(x)),1)
flow2 = self.predict_flow2(x)
x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x))))
flow2 += self.dc_conv7(self.dc_conv6(self.dc_conv5(x)))
if self.training:
return flow2,flow3,flow4,flow5,flow6
else:
return flow2
def pwc_dc_net(path=None):
model = PWCDCNet()
if path is not None:
data = torch.load(path)
if 'state_dict' in data.keys():
model.load_state_dict(data['state_dict'])
else:
model.load_state_dict(data)
return model
def pwc_dc_net_old(path=None):
model = PWCDCNet_old()
if path is not None:
data = torch.load(path)
if 'state_dict' in data.keys():
model.load_state_dict(data['state_dict'])
else:
model.load_state_dict(data)
return model
================================================
FILE: PWCNet/models/__init__.py
================================================
from .PWCNet import *
================================================
FILE: README.md
================================================
# DAIN (Depth-Aware Video Frame Interpolation)
[Project](https://sites.google.com/view/wenbobao/dain) **|** [Paper](http://arxiv.org/abs/1904.00830)
[Wenbo Bao](https://sites.google.com/view/wenbobao/home),
[Wei-Sheng Lai](http://graduatestudents.ucmerced.edu/wlai24/),
[Chao Ma](https://sites.google.com/site/chaoma99/),
Xiaoyun Zhang,
Zhiyong Gao,
and [Ming-Hsuan Yang](http://faculty.ucmerced.edu/mhyang/)
IEEE Conference on Computer Vision and Pattern Recognition, Long Beach, CVPR 2019
This work is developed based on our TPAMI work [MEMC-Net](https://github.com/baowenbo/MEMC-Net), where we propose the adaptive warping layer. Please also consider referring to it.
### Table of Contents
1. [Introduction](#introduction)
1. [Citation](#citation)
1. [Requirements and Dependencies](#requirements-and-dependencies)
1. [Installation](#installation)
1. [Testing Pre-trained Models](#testing-pre-trained-models)
1. [Downloading Results](#downloading-results)
1. [Slow-motion Generation](#slow-motion-generation)
1. [Training New Models](#training-new-models)
1. [Google Colab Demo](#google-colab-demo)
### Introduction
We propose the **D**epth-**A**ware video frame **IN**terpolation (**DAIN**) model to explicitly detect the occlusion by exploring the depth cue.
We develop a depth-aware flow projection layer to synthesize intermediate flows that preferably sample closer objects than farther ones.
Our method achieves state-of-the-art performance on the Middlebury dataset.
We provide videos [here](https://www.youtube.com/watch?v=-f8f0igQi5I&t=5s).
<!---->
<!--[](https://www.youtube.com/watch?v=icJ0WbPsE20&feature=youtu.be)
<!--<iframe width="560" height="315" src="https://www.youtube.com/embed/icJ0WbPsE20" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>

<!--哈哈我是注释,不会在浏览器中显示。
Beanbags
https://drive.google.com/open?id=170vdxANGoNKO5_8MYOuiDvoIXzucv7HW
Dimentrodon
https://drive.google.com/open?id=14n7xvb9hjTKqfcr7ZpEFyfMvx6E8NhD_
DogDance
https://drive.google.com/open?id=1YWAyAJ3T48fMFv2K8j8wIVcmQm39cRof
Grove2
https://drive.google.com/open?id=1sJLwdQdL6JYXSQo_Bev0aQMleWacxCsN
Grove3
https://drive.google.com/open?id=1jGj3UdGppoJO02Of8ZaNXqDH4fnXuQ8O
Hydrangea
https://drive.google.com/open?id=1_4kVlhvrmCv54aXi7vZMk3-FtRQF7s0s
MiniCooper
https://drive.google.com/open?id=1pWHtyBSZsOTC7NTVdHTrv1W-dxa95BLo
RubberWhale
https://drive.google.com/open?id=1korbXsGpSgJn7THBHkLRVrJMtCt5YZPB
Urban2
https://drive.google.com/open?id=1v57RMm9x5vM36mCgPy5hresXDZWtw3Vs
Urban3
https://drive.google.com/open?id=1LMwSU0PrG4_GaDjWRI2v9hvWpYwzRKca
Venus
https://drive.google.com/open?id=1piPnEexuHaiAr4ZzWSAxGi1u1Xo_6vPp
Walking
https://drive.google.com/open?id=1CgCLmVC_WTVTAcA_IdWbLqR8MS18zHoa
-->
<p float="middle">
<img src="https://drive.google.com/uc?export=view&id=1YWAyAJ3T48fMFv2K8j8wIVcmQm39cRof" width="200"/>
<img src="https://drive.google.com/uc?export=view&id=1CgCLmVC_WTVTAcA_IdWbLqR8MS18zHoa" width="200"/>
<img src="https://drive.google.com/uc?export=view&id=1pWHtyBSZsOTC7NTVdHTrv1W-dxa95BLo" width="200"/>
<img src="https://drive.google.com/uc?export=view&id=170vdxANGoNKO5_8MYOuiDvoIXzucv7HW" width="200"/>
</p>
<p float="middle">
<img src="https://drive.google.com/uc?export=view&id=1sJLwdQdL6JYXSQo_Bev0aQMleWacxCsN" width="200"/>
<img src="https://drive.google.com/uc?export=view&id=1jGj3UdGppoJO02Of8ZaNXqDH4fnXuQ8O" width="200"/>
<img src="https://drive.google.com/uc?export=view&id=1v57RMm9x5vM36mCgPy5hresXDZWtw3Vs" width="200"/>
<img src="https://drive.google.com/uc?export=view&id=1LMwSU0PrG4_GaDjWRI2v9hvWpYwzRKca" width="200"/>
</p>
<p float="middle">
<img src="https://drive.google.com/uc?export=view&id=1piPnEexuHaiAr4ZzWSAxGi1u1Xo_6vPp" width="200"/>
<img src="https://drive.google.com/uc?export=view&id=1korbXsGpSgJn7THBHkLRVrJMtCt5YZPB" width="200"/>
<img src="https://drive.google.com/uc?export=view&id=1_4kVlhvrmCv54aXi7vZMk3-FtRQF7s0s" width="200"/>
<img src="https://drive.google.com/uc?export=view&id=14n7xvb9hjTKqfcr7ZpEFyfMvx6E8NhD_" width="200"/>
</p>
### Citation
If you find the code and datasets useful in your research, please cite:
@inproceedings{DAIN,
author = {Bao, Wenbo and Lai, Wei-Sheng and Ma, Chao and Zhang, Xiaoyun and Gao, Zhiyong and Yang, Ming-Hsuan},
title = {Depth-Aware Video Frame Interpolation},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition},
year = {2019}
}
@article{MEMC-Net,
title={MEMC-Net: Motion Estimation and Motion Compensation Driven Neural Network for Video Interpolation and Enhancement},
author={Bao, Wenbo and Lai, Wei-Sheng, and Zhang, Xiaoyun and Gao, Zhiyong and Yang, Ming-Hsuan},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
doi={10.1109/TPAMI.2019.2941941},
year={2018}
}
### Requirements and Dependencies
- Ubuntu (We test with Ubuntu = 16.04.5 LTS)
- Python (We test with Python = 3.6.8 in Anaconda3 = 4.1.1)
- Cuda & Cudnn (We test with Cuda = 9.0 and Cudnn = 7.0)
- PyTorch (The customized depth-aware flow projection and other layers require ATen API in PyTorch = 1.0.0)
- GCC (Compiling PyTorch 1.0.0 extension files (.c/.cu) requires gcc = 4.9.1 and nvcc = 9.0 compilers)
- NVIDIA GPU (We use Titan X (Pascal) with compute = 6.1, but we support compute_50/52/60/61 devices, should you have devices with higher compute capability, please revise [this](https://github.com/baowenbo/DAIN/blob/master/my_package/DepthFlowProjection/setup.py))
### Installation
Download repository:
$ git clone https://github.com/baowenbo/DAIN.git
Before building Pytorch extensions, be sure you have `pytorch >= 1.0.0`:
$ python -c "import torch; print(torch.__version__)"
Generate our PyTorch extensions:
$ cd DAIN
$ cd my_package
$ ./build.sh
Generate the Correlation package required by [PWCNet](https://github.com/NVlabs/PWC-Net/tree/master/PyTorch/external_packages/correlation-pytorch-master):
$ cd ../PWCNet/correlation_package_pytorch1_0
$ ./build.sh
### Testing Pre-trained Models
Make model weights dir and Middlebury dataset dir:
$ cd DAIN
$ mkdir model_weights
$ mkdir MiddleBurySet
Download pretrained models,
$ cd model_weights
$ wget http://vllab1.ucmerced.edu/~wenbobao/DAIN/best.pth
and Middlebury dataset:
$ cd ../MiddleBurySet
$ wget http://vision.middlebury.edu/flow/data/comp/zip/other-color-allframes.zip
$ unzip other-color-allframes.zip
$ wget http://vision.middlebury.edu/flow/data/comp/zip/other-gt-interp.zip
$ unzip other-gt-interp.zip
$ cd ..
preinstallations:
$ cd PWCNet/correlation_package_pytorch1_0
$ sh build.sh
$ cd ../my_package
$ sh build.sh
$ cd ..
We are good to go by:
$ CUDA_VISIBLE_DEVICES=0 python demo_MiddleBury.py
The interpolated results are under `MiddleBurySet/other-result-author/[random number]/`, where the `random number` is used to distinguish different runnings.
### Downloading Results
Our DAIN model achieves the state-of-the-art performance on the UCF101, Vimeo90K, and Middlebury ([*eval*](http://vision.middlebury.edu/flow/eval/results/results-n1.php) and *other*).
Download our interpolated results with:
$ wget http://vllab1.ucmerced.edu/~wenbobao/DAIN/UCF101_DAIN.zip
$ wget http://vllab1.ucmerced.edu/~wenbobao/DAIN/Vimeo90K_interp_DAIN.zip
$ wget http://vllab1.ucmerced.edu/~wenbobao/DAIN/Middlebury_eval_DAIN.zip
$ wget http://vllab1.ucmerced.edu/~wenbobao/DAIN/Middlebury_other_DAIN.zip
### Slow-motion Generation
Our model is fully capable of generating slow-motion effect with minor modification on the network architecture.
Run the following code by specifying `time_step = 0.25` to generate x4 slow-motion effect:
$ CUDA_VISIBLE_DEVICES=0 python demo_MiddleBury_slowmotion.py --netName DAIN_slowmotion --time_step 0.25
or set `time_step` to `0.125` or `0.1` as follows
$ CUDA_VISIBLE_DEVICES=0 python demo_MiddleBury_slowmotion.py --netName DAIN_slowmotion --time_step 0.125
$ CUDA_VISIBLE_DEVICES=0 python demo_MiddleBury_slowmotion.py --netName DAIN_slowmotion --time_step 0.1
to generate x8 and x10 slow-motion respectively. Or if you would like to have x100 slow-motion for a little fun.
$ CUDA_VISIBLE_DEVICES=0 python demo_MiddleBury_slowmotion.py --netName DAIN_slowmotion --time_step 0.01
You may also want to create gif animations by:
$ cd MiddleBurySet/other-result-author/[random number]/Beanbags
$ convert -delay 1 *.png -loop 0 Beanbags.gif //1*10ms delay
Have fun and enjoy yourself!
### Training New Models
Download the Vimeo90K triplet dataset for video frame interpolation task, also see [here](https://github.com/anchen1011/toflow/blob/master/download_dataset.sh) by [Xue et al., IJCV19](https://arxiv.org/abs/1711.09078).
$ cd DAIN
$ mkdir /path/to/your/dataset & cd /path/to/your/dataset
$ wget http://data.csail.mit.edu/tofu/dataset/vimeo_triplet.zip
$ unzip vimeo_triplet.zip
$ rm vimeo_triplet.zip
Download the pretrained MegaDepth and PWCNet models
$ cd MegaDepth/checkpoints/test_local
$ wget http://vllab1.ucmerced.edu/~wenbobao/DAIN/best_generalization_net_G.pth
$ cd ../../../PWCNet
$ wget http://vllab1.ucmerced.edu/~wenbobao/DAIN/pwc_net.pth.tar
$ cd ..
Run the training script:
$ CUDA_VISIBLE_DEVICES=0 python train.py --datasetPath /path/to/your/dataset --batch_size 1 --save_which 1 --lr 0.0005 --rectify_lr 0.0005 --flow_lr_coe 0.01 --occ_lr_coe 0.0 --filter_lr_coe 1.0 --ctx_lr_coe 1.0 --alpha 0.0 1.0 --patience 4 --factor 0.2
The optimized models will be saved to the `model_weights/[random number]` directory, where [random number] is generated for different runs.
Replace the pre-trained `model_weights/best.pth` model with the newly trained `model_weights/[random number]/best.pth` model.
Then test the new model by executing:
$ CUDA_VISIBLE_DEVICES=0 python demo_MiddleBury.py
### Google Colab Demo
This is a modification of DAIN that allows the usage of Google Colab and is able to do a full demo interpolation from a source video to a target video.
Original Notebook File by btahir can be found [here](https://github.com/baowenbo/DAIN/issues/44).
To use the Colab, follow these steps:
- Download the `Colab_DAIN.ipynb` file ([link](https://raw.githubusercontent.com/baowenbo/DAIN/master/Colab_DAIN.ipynb)).
- Visit Google Colaboratory ([link](https://colab.research.google.com/))
- Select the "Upload" option, and upload the `.ipynb` file
- Start running the cells one by one, following the instructions.
Colab file authors: [Styler00Dollar](https://github.com/styler00dollar) and [Alpha](https://github.com/AlphaGit).
### Contact
[Wenbo Bao](mailto:bwb0813@gmail.com); [Wei-Sheng (Jason) Lai](mailto:phoenix104104@gmail.com)
### License
See [MIT License](https://github.com/baowenbo/DAIN/blob/master/LICENSE)
================================================
FILE: Resblock/BasicBlock.py
================================================
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
import torch.nn.init as weight_init
import torch
__all__ = ['MultipleBasicBlock','MultipleBasicBlock_4']
def conv3x3(in_planes, out_planes, dilation = 1, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=int(dilation*(3-1)/2), dilation=dilation, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, dilation = 1, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes,dilation, stride)
# self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
# self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
# weight_init.xavier_normal()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def forward(self, x):
residual = x
out = self.conv1(x)
# out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
# out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class MultipleBasicBlock(nn.Module):
def __init__(self,input_feature,
block, num_blocks,
intermediate_feature = 64, dense = True):
super(MultipleBasicBlock, self).__init__()
self.dense = dense
self.num_block = num_blocks
self.intermediate_feature = intermediate_feature
self.block1= nn.Sequential(*[
nn.Conv2d(input_feature, intermediate_feature,
kernel_size=7, stride=1, padding=3, bias=True),
nn.ReLU(inplace=True)
])
# for i in range(1, num_blocks):
self.block2 = block(intermediate_feature, intermediate_feature, dilation = 1) if num_blocks>=2 else None
self.block3 = block(intermediate_feature, intermediate_feature, dilation = 1) if num_blocks>=3 else None
self.block4 = block(intermediate_feature, intermediate_feature, dilation = 1) if num_blocks>=4 else None
self.block5 = nn.Sequential(*[nn.Conv2d(intermediate_feature, 3 , (3, 3), 1, (1, 1))])
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def forward(self, x):
x = self.block1(x)
x = self.block2(x) if self.num_block>=2 else x
x = self.block3(x) if self.num_block>=3 else x
x = self.block4(x) if self.num_block== 4 else x
x = self.block5(x)
return x
def MultipleBasicBlock_4(input_feature,intermediate_feature = 64):
model = MultipleBasicBlock(input_feature,
BasicBlock,4 ,
intermediate_feature)
return model
if __name__ == '__main__':
# x= Variable(torch.randn(2,3,224,448))
# model = S2DF(BasicBlock,3,True)
# y = model(x)
model = MultipleBasicBlock(200, BasicBlock,4)
model = BasicBlock(64,64,1)
# y = model(x)
exit(0)
================================================
FILE: Resblock/__init__.py
================================================
from .BasicBlock import *
================================================
FILE: S2D_models/S2DF.py
================================================
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
import torch
# __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
# 'resnet152','resnet18_conv1']
__all__ = ['S2DF','S2DF_3dense','S2DF_3dense_nodilation',
'S2DF_3last','S2DF_2dense', 'BasicBlock']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, dilation = 1, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=int(dilation*(3-1)/2), dilation=dilation, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, dilation = 1, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes,dilation, stride)
# self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
# self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
# out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
# out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, dilation = 1, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
# self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=int(dilation*(3-1)/2), dilation = dilation, bias=False)
# self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
# self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
# out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
# out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
# out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class S2DF(nn.Module):
def __init__(self, block, num_blocks,dense = True,dilation=True):
self.inplanes = 64
super(S2DF, self).__init__()
self.dense = dense
self.num_block = num_blocks
assert(num_blocks>=1 and num_blocks<=4)
self.block1 = nn.Sequential(*[
nn.Conv2d(3, 64, kernel_size=7, stride=1, padding=3, bias=False),
nn.ReLU(inplace=True)
])
self.dilation = dilation
# for i in range(1, num_blocks):
self.block2 = block(self.inplanes, 64, dilation = 4 if dilation else 1) if num_blocks>=2 else None
self.block3 = block(self.inplanes, 64, dilation = 8 if dilation else 1) if num_blocks>=3 else None
self.block4 = block(self.inplanes, 64, dilation = 16 if dilation else 1) if num_blocks>=4 else None
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def forward(self, x):
y = []
y.append(x) #raw feature
x = self.block1(x)
if (self.num_block > 1 and self.dense) or self.num_block == 1:
y.append(x)
x = self.block2(x) if self.num_block>=2 else x
if (self.num_block > 2 and self.dense) or self.num_block == 2:
y.append(x)
x = self.block3(x) if self.num_block>=3 else x
if (self.num_block > 3 and self.dense) or self.num_block == 3:
y.append(x)
x = self.block4(x) if self.num_block== 4 else x
if self.num_block == 4 :
y.append(x)
return torch.cat(y,dim=1)
class S2DFsim(nn.Module):
def __init__(self, block, num_blocks,dense = True,dilation=True):
self.inplanes = 64
super(S2DFsim, self).__init__()
self.dense = dense
self.num_block = num_blocks
assert(num_blocks>=1 and num_blocks<=4)
self.block1 = nn.Sequential(*[
nn.Conv2d(3, 64, kernel_size=7, stride=1, padding=3, bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
])
self.dilation = dilation
# for i in range(1, num_blocks):
self.block2 = nn.Sequential(*[
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
]) if num_blocks >= 2 else None
self.block3 = nn.Sequential(*[
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
]) if num_blocks >= 3 else None
self.block4 = nn.Sequential(*[
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
]) if num_blocks >= 4 else None
# for m in self.modules():
# if isinstance(m, nn.Conv2d):
# n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
# m.weight.data.normal_(0, math.sqrt(2. / n))
# elif isinstance(m, nn.BatchNorm2d):
# m.weight.data.fill_(1)
# m.bias.data.zero_()
def forward(self, x):
y = []
y.append(x) #raw feature
x = self.block1(x)
if (self.num_block > 1 and self.dense) or self.num_block == 1:
y.append(x)
x = self.block2(x) if self.num_block>=2 else x
if (self.num_block > 2 and self.dense) or self.num_block == 2:
y.append(x)
x = self.block3(x) if self.num_block>=3 else x
if (self.num_block > 3 and self.dense) or self.num_block == 3:
y.append(x)
x = self.block4(x) if self.num_block== 4 else x
if self.num_block == 4 :
y.append(x)
return torch.cat(y,dim=1)
def S2DF_3dense_nodilation():
model = S2DFsim(None,3,dense=True,dilation=False)
return model
def S2DF_3dense():
model = S2DF(BasicBlock,3,dense=True)
return model
def S2DF_3last():
model = S2DF(BasicBlock,3,dense=False)
return model
def S2DF_2dense():
model = S2DF(BasicBlock,2,dense=True)
return model
from torch.autograd import Variable
if __name__ == '__main__':
x= Variable(torch.randn(2,3,224,448))
# model = S2DF(BasicBlock,3,True)
# y = model(x)
model = S2DF(BasicBlock,4,False)
y = model(x)
exit(0)
================================================
FILE: S2D_models/__init__.py
================================================
from .S2DF import *
================================================
FILE: Stack.py
================================================
class Stack:
def __init__(self):
self.stack = []
def pop(self):
if self.is_empty():
return None
else:
return self.stack.pop()
def push(self,val):
return self.stack.append(val)
def peak(self):
if self.is_empty():
return None
else:
return self.stack[-1]
def size(self):
return len(self.stack)
def is_empty(self):
return self.size() == 0
================================================
FILE: balancedsampler.py
================================================
from torch.utils.data.sampler import Sampler
import torch
class RandomBalancedSampler(Sampler):
"""Samples elements randomly, with an arbitrary size, independant from dataset length.
this is a balanced sampling that will sample the whole dataset with a random permutation.
Arguments:
data_source (Dataset): dataset to sample from
"""
def __init__(self, data_source, epoch_size):
self.data_size = len(data_source)
self.epoch_size = epoch_size
self.index = 0
def __next__(self):
if self.index == 0:
#re-shuffle the sampler
self.indices = torch.randperm(self.data_size)
self.index = (self.index+1)%self.data_size
return self.indices[self.index]
def next(self):
return self.__next__()
def __iter__(self):
return self
def __len__(self):
return min(self.data_size,self.epoch_size) if self.epoch_size>0 else self.data_size
class SequentialBalancedSampler(Sampler):
"""Samples elements dequentially, with an arbitrary size, independant from dataset length.
this is a balanced sampling that will sample the whole dataset before resetting it.
Arguments:
data_source (Dataset): dataset to sample from
"""
def __init__(self, data_source, epoch_size):
self.data_size = len(data_source)
self.epoch_size = epoch_size
self.index = 0
def __next__(self):
self.index = (self.index+1)%self.data_size
return self.index
def next(self):
return self.__next__()
def __iter__(self):
return self
def __len__(self):
return min(self.data_size,self.epoch_size) if self.epoch_size>0 else self.data_size
================================================
FILE: colab_interpolate.py
================================================
import time
import os
from torch.autograd import Variable
import torch
import numpy as np
import numpy
import networks
from my_args import args
from imageio import imread, imsave
from AverageMeter import *
import shutil
import datetime
torch.backends.cudnn.benchmark = True
model = networks.__dict__[args.netName](
channel = args.channels,
filter_size = args.filter_size,
timestep = args.time_step,
training = False)
if args.use_cuda:
model = model.cuda()
model_path = './model_weights/best.pth'
if not os.path.exists(model_path):
print("*****************************************************************")
print("**** We couldn't load any trained weights ***********************")
print("*****************************************************************")
exit(1)
if args.use_cuda:
pretrained_dict = torch.load(model_path)
else:
pretrained_dict = torch.load(model_path, map_location=lambda storage, loc: storage)
model_dict = model.state_dict()
# 1. filter out unnecessary keys
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict)
# 3. load the new state dict
model.load_state_dict(model_dict)
# 4. release the pretrained dict for saving memory
pretrained_dict = []
model = model.eval() # deploy mode
frames_dir = args.frame_input_dir
output_dir = args.frame_output_dir
timestep = args.time_step
time_offsets = [kk * timestep for kk in range(1, int(1.0 / timestep))]
input_frame = args.start_frame - 1
loop_timer = AverageMeter()
final_frame = args.end_frame
torch.set_grad_enabled(False)
# we want to have input_frame between (start_frame-1) and (end_frame-2)
# this is because at each step we read (frame) and (frame+1)
# so the last iteration will actuall be (end_frame-1) and (end_frame)
while input_frame < final_frame - 1:
input_frame += 1
start_time = time.time()
filename_frame_1 = os.path.join(frames_dir, f'{input_frame:0>5d}.png')
filename_frame_2 = os.path.join(frames_dir, f'{input_frame+1:0>5d}.png')
X0 = torch.from_numpy(np.transpose(imread(filename_frame_1), (2,0,1)).astype("float32") / 255.0).type(args.dtype)
X1 = torch.from_numpy(np.transpose(imread(filename_frame_2), (2,0,1)).astype("float32") / 255.0).type(args.dtype)
assert (X0.size(1) == X1.size(1))
assert (X0.size(2) == X1.size(2))
intWidth = X0.size(2)
intHeight = X0.size(1)
channels = X0.size(0)
if not channels == 3:
print(f"Skipping {filename_frame_1}-{filename_frame_2} -- expected 3 color channels but found {channels}.")
continue
if intWidth != ((intWidth >> 7) << 7):
intWidth_pad = (((intWidth >> 7) + 1) << 7) # more than necessary
intPaddingLeft = int((intWidth_pad - intWidth) / 2)
intPaddingRight = intWidth_pad - intWidth - intPaddingLeft
else:
intPaddingLeft = 32
intPaddingRight= 32
if intHeight != ((intHeight >> 7) << 7):
intHeight_pad = (((intHeight >> 7) + 1) << 7) # more than necessary
intPaddingTop = int((intHeight_pad - intHeight) / 2)
intPaddingBottom = intHeight_pad - intHeight - intPaddingTop
else:
intPaddingTop = 32
intPaddingBottom = 32
pader = torch.nn.ReplicationPad2d([intPaddingLeft, intPaddingRight, intPaddingTop, intPaddingBottom])
X0 = Variable(torch.unsqueeze(X0,0))
X1 = Variable(torch.unsqueeze(X1,0))
X0 = pader(X0)
X1 = pader(X1)
if args.use_cuda:
X0 = X0.cuda()
X1 = X1.cuda()
y_s, offset, filter = mode
gitextract_7t87l58_/ ├── .gitignore ├── AverageMeter.py ├── Colab_DAIN.ipynb ├── LICENSE ├── MegaDepth/ │ ├── LICENSE │ ├── MegaDepth_model.py │ ├── README.md │ ├── SDR_compute.py │ ├── __init__.py │ ├── data/ │ │ ├── __init__.py │ │ ├── aligned_data_loader.py │ │ ├── base_data_loader.py │ │ ├── data_loader.py │ │ └── image_folder.py │ ├── models/ │ │ ├── HG_model.py │ │ ├── __init__.py │ │ ├── base_model.py │ │ └── models.py │ ├── options/ │ │ ├── __init__.py │ │ ├── base_options.py │ │ ├── test_options.py │ │ └── train_options.py │ ├── pytorch_DIW_scratch.py │ ├── rmse_error_main.py │ └── util/ │ ├── __init__.py │ ├── html.py │ ├── image_pool.py │ ├── png.py │ ├── util.py │ └── visualizer.py ├── PWCNet/ │ ├── PWCNet.py │ ├── __init__.py │ ├── correlation_package_pytorch1_0/ │ │ ├── __init__.py │ │ ├── build.sh │ │ ├── clean.sh │ │ ├── correlation.py │ │ ├── correlation_cuda.cc │ │ ├── correlation_cuda_kernel.cu │ │ ├── correlation_cuda_kernel.cuh │ │ └── setup.py │ └── models/ │ ├── PWCNet.py │ └── __init__.py ├── README.md ├── Resblock/ │ ├── BasicBlock.py │ └── __init__.py ├── S2D_models/ │ ├── S2DF.py │ └── __init__.py ├── Stack.py ├── balancedsampler.py ├── colab_interpolate.py ├── datasets/ │ ├── Vimeo_90K_interp.py │ ├── __init__.py │ └── listdatasets.py ├── demo_MiddleBury.py ├── demo_MiddleBury_slowmotion.py ├── environment.yaml ├── loss_function.py ├── lr_scheduler.py ├── my_args.py ├── my_package/ │ ├── DepthFlowProjection/ │ │ ├── DepthFlowProjectionLayer.py │ │ ├── DepthFlowProjectionModule.py │ │ ├── __init__.py │ │ ├── depthflowprojection_cuda.cc │ │ ├── depthflowprojection_cuda_kernel.cu │ │ ├── depthflowprojection_cuda_kernel.cuh │ │ └── setup.py │ ├── FilterInterpolation/ │ │ ├── FilterInterpolationLayer.py │ │ ├── FilterInterpolationModule.py │ │ ├── __init__.py │ │ ├── filterinterpolation_cuda.cc │ │ ├── filterinterpolation_cuda_kernel.cu │ │ ├── filterinterpolation_cuda_kernel.cuh │ │ └── setup.py │ ├── FlowProjection/ │ │ ├── FlowProjectionLayer.py │ │ ├── FlowProjectionModule.py │ │ ├── __init__.py │ │ ├── flowprojection_cuda.cc │ │ ├── flowprojection_cuda_kernel.cu │ │ ├── flowprojection_cuda_kernel.cuh │ │ └── setup.py │ ├── Interpolation/ │ │ ├── InterpolationLayer.py │ │ ├── InterpolationModule.py │ │ ├── __init__.py │ │ ├── interpolation_cuda.cc │ │ ├── interpolation_cuda_kernel.cu │ │ ├── interpolation_cuda_kernel.cuh │ │ └── setup.py │ ├── InterpolationCh/ │ │ ├── InterpolationChLayer.py │ │ ├── InterpolationChModule.py │ │ ├── __init__.py │ │ ├── interpolationch_cuda.cc │ │ ├── interpolationch_cuda_kernel.cu │ │ ├── interpolationch_cuda_kernel.cuh │ │ └── setup.py │ ├── MinDepthFlowProjection/ │ │ ├── __init__.py │ │ ├── minDepthFlowProjectionLayer.py │ │ ├── minDepthFlowProjectionModule.py │ │ ├── mindepthflowprojection_cuda.cc │ │ ├── mindepthflowprojection_cuda_kernel.cu │ │ ├── mindepthflowprojection_cuda_kernel.cuh │ │ └── setup.py │ ├── SeparableConv/ │ │ ├── SeparableConvLayer.py │ │ ├── SeparableConvModule.py │ │ ├── __init__.py │ │ ├── separableconv_cuda.cc │ │ ├── separableconv_cuda_kernel.cu │ │ ├── separableconv_cuda_kernel.cuh │ │ └── setup.py │ ├── SeparableConvFlow/ │ │ ├── SeparableConvFlowLayer.py │ │ ├── SeparableConvFlowModule.py │ │ ├── __init__.py │ │ ├── separableconvflow_cuda.cc │ │ ├── separableconvflow_cuda_kernel.cu │ │ ├── separableconvflow_cuda_kernel.cuh │ │ └── setup.py │ ├── build.sh │ ├── clean.sh │ ├── compiler_args.py │ └── test_module.py ├── networks/ │ ├── DAIN.py │ ├── DAIN_slowmotion.py │ └── __init__.py └── train.py
SYMBOL INDEX (362 symbols across 60 files)
FILE: AverageMeter.py
class AverageMeter (line 3) | class AverageMeter(object):
method __init__ (line 5) | def __init__(self):
method reset (line 8) | def reset(self):
method update (line 14) | def update(self, val, n=1):
FILE: MegaDepth/MegaDepth_model.py
function HourGlass (line 11) | def HourGlass(pretrained=None):
FILE: MegaDepth/SDR_compute.py
function test_SDR (line 41) | def test_SDR(model):
FILE: MegaDepth/data/aligned_data_loader.py
class PairedData (line 12) | class PairedData(object):
method __init__ (line 13) | def __init__(self, data_loader, flip):
method __iter__ (line 22) | def __iter__(self):
method __next__ (line 27) | def __next__(self):
class AlignedDataLoader (line 35) | class AlignedDataLoader(BaseDataLoader):
method __init__ (line 36) | def __init__(self,_root, _list_dir, _input_height, _input_width, _is_f...
method name (line 47) | def name(self):
method load_data (line 50) | def load_data(self):
method __len__ (line 53) | def __len__(self):
class AlignedDataLoader_TEST (line 58) | class AlignedDataLoader_TEST(BaseDataLoader):
method __init__ (line 59) | def __init__(self,_root, _list_dir, _input_height, _input_width):
method name (line 69) | def name(self):
method load_data (line 72) | def load_data(self):
method __len__ (line 76) | def __len__(self):
FILE: MegaDepth/data/base_data_loader.py
class BaseDataLoader (line 2) | class BaseDataLoader():
method __init__ (line 3) | def __init__(self):
method load_data (line 10) | def load_data():
FILE: MegaDepth/data/data_loader.py
function CreateDataLoader (line 2) | def CreateDataLoader(_root, _list_dir, _input_height, _input_width, is_f...
function CreateDataLoader_TEST (line 8) | def CreateDataLoader_TEST(_root, _list_dir, _input_height, _input_width):
FILE: MegaDepth/data/image_folder.py
function make_dataset (line 20) | def make_dataset(list_dir):
class ImageFolder (line 34) | class ImageFolder(data.Dataset):
method __init__ (line 36) | def __init__(self, root, list_dir, input_height, input_width, transfor...
method load_MD (line 55) | def load_MD(self, img_path, depth_path):
method __getitem__ (line 95) | def __getitem__(self, index):
method __len__ (line 119) | def __len__(self):
class ImageFolder_TEST (line 124) | class ImageFolder_TEST(data.Dataset):
method __init__ (line 126) | def __init__(self, root, list_dir, _input_height, _input_width):
method load_SfM_ORD (line 139) | def load_SfM_ORD(self, img_path, targets_path):
method __getitem__ (line 170) | def __getitem__(self, index):
method __len__ (line 211) | def __len__(self):
FILE: MegaDepth/models/HG_model.py
class HGModel (line 10) | class HGModel(BaseModel):
method name (line 11) | def name(self):
method __init__ (line 14) | def __init__(self, opt,pretrained=None):
method batch_classify (line 48) | def batch_classify(self, z_A_arr, z_B_arr, ground_truth ):
method computeSDR (line 86) | def computeSDR(self, prediction_d, targets):
method evaluate_SDR (line 123) | def evaluate_SDR(self, input_, targets):
method rmse_Loss (line 131) | def rmse_Loss(self, log_prediction_d, mask, log_gt):
method evaluate_RMSE (line 144) | def evaluate_RMSE(self, input_images, prediction_d, targets):
method evaluate_sc_inv (line 159) | def evaluate_sc_inv(self, input_, targets):
method switch_to_train (line 167) | def switch_to_train(self):
method switch_to_eval (line 170) | def switch_to_eval(self):
FILE: MegaDepth/models/base_model.py
class BaseModel (line 4) | class BaseModel():
method name (line 5) | def name(self):
method initialize (line 8) | def initialize(self, opt):
method set_input (line 15) | def set_input(self, input):
method forward (line 18) | def forward(self):
method test (line 22) | def test(self):
method get_image_paths (line 25) | def get_image_paths(self):
method optimize_parameters (line 28) | def optimize_parameters(self):
method get_current_visuals (line 31) | def get_current_visuals(self):
method get_current_errors (line 34) | def get_current_errors(self):
method save (line 37) | def save(self, label):
method save_network (line 41) | def save_network(self, network, network_label, epoch_label, gpu_ids):
method load_network (line 49) | def load_network(self, network, network_label, epoch_label):
method update_learning_rate (line 57) | def update_learning_rate():
FILE: MegaDepth/models/models.py
function create_model (line 2) | def create_model(opt,pretrained=None):
FILE: MegaDepth/options/base_options.py
class BaseOptions (line 5) | class BaseOptions():
method __init__ (line 6) | def __init__(self):
method initialize (line 10) | def initialize(self):
method parse (line 41) | def parse(self):
FILE: MegaDepth/options/test_options.py
class TestOptions (line 3) | class TestOptions(BaseOptions):
method initialize (line 4) | def initialize(self):
FILE: MegaDepth/options/train_options.py
class TrainOptions (line 3) | class TrainOptions(BaseOptions):
method initialize (line 4) | def initialize(self):
FILE: MegaDepth/pytorch_DIW_scratch.py
class LambdaBase (line 7) | class LambdaBase(nn.Sequential):
method __init__ (line 8) | def __init__(self, fn, *args):
method forward_prepare (line 12) | def forward_prepare(self, input):
class Lambda (line 18) | class Lambda(LambdaBase):
method forward (line 19) | def forward(self, input):
class LambdaMap (line 22) | class LambdaMap(LambdaBase):
method forward (line 23) | def forward(self, input):
class LambdaReduce (line 26) | class LambdaReduce(LambdaBase):
method forward (line 27) | def forward(self, input):
FILE: MegaDepth/rmse_error_main.py
function test (line 33) | def test(model):
FILE: MegaDepth/util/html.py
class HTML (line 6) | class HTML:
method __init__ (line 7) | def __init__(self, web_dir, title, reflesh=0):
method get_image_dir (line 22) | def get_image_dir(self):
method add_header (line 25) | def add_header(self, str):
method add_table (line 29) | def add_table(self, border=1):
method add_images (line 33) | def add_images(self, ims, txts, links, width=400):
method save (line 45) | def save(self):
FILE: MegaDepth/util/image_pool.py
class ImagePool (line 6) | class ImagePool():
method __init__ (line 7) | def __init__(self, pool_size):
method query (line 13) | def query(self, images):
FILE: MegaDepth/util/png.py
function encode (line 4) | def encode(buf, width, height):
FILE: MegaDepth/util/util.py
function tensor2im (line 12) | def tensor2im(image_tensor, imtype=np.uint8):
function diagnose_network (line 18) | def diagnose_network(net, name='network'):
function save_image (line 31) | def save_image(image_numpy, image_path):
function info (line 35) | def info(object, spacing=10, collapse=1):
function varname (line 45) | def varname(p):
function print_numpy (line 51) | def print_numpy(x, val=True, shp=False):
function mkdirs (line 61) | def mkdirs(paths):
function mkdir (line 69) | def mkdir(path):
FILE: MegaDepth/util/visualizer.py
class Visualizer (line 8) | class Visualizer():
method __init__ (line 9) | def __init__(self, opt):
method display_current_results (line 27) | def display_current_results(self, visuals, epoch):
method plot_current_errors (line 57) | def plot_current_errors(self, epoch, counter_ratio, opt, errors):
method print_current_errors (line 73) | def print_current_errors(self, epoch, i, errors, t):
method save_images (line 81) | def save_images(self, webpage, visuals, image_path):
FILE: PWCNet/PWCNet.py
function conv (line 27) | def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dila...
function predict_flow (line 33) | def predict_flow(in_planes):
function deconv (line 36) | def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
class PWCDCNet (line 41) | class PWCDCNet(nn.Module):
method __init__ (line 46) | def __init__(self, md=4):
method warp (line 159) | def warp(self, x, flo):
method forward (line 202) | def forward(self,x, output_more = False):
class PWCDCNet_old (line 322) | class PWCDCNet_old(nn.Module):
method __init__ (line 327) | def __init__(self, md=4):
method warp (line 417) | def warp(self, x, flo):
method forward (line 452) | def forward(self,x):
function pwc_dc_net (line 542) | def pwc_dc_net(path=None):
function pwc_dc_net_old (line 556) | def pwc_dc_net_old(path=None):
FILE: PWCNet/correlation_package_pytorch1_0/correlation.py
class CorrelationFunction (line 6) | class CorrelationFunction(Function):
method __init__ (line 8) | def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, str...
method forward (line 18) | def forward(self, input1, input2):
method backward (line 31) | def backward(self, grad_output):
class Correlation (line 47) | class Correlation(Module):
method __init__ (line 48) | def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stri...
method forward (line 57) | def forward(self, input1, input2):
FILE: PWCNet/correlation_package_pytorch1_0/correlation_cuda.cc
function correlation_forward_cuda (line 8) | int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at:...
function correlation_backward_cuda (line 87) | int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at...
function PYBIND11_MODULE (line 167) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: PWCNet/models/PWCNet.py
function conv (line 24) | def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dila...
function predict_flow (line 30) | def predict_flow(in_planes):
function deconv (line 33) | def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
class PWCDCNet (line 38) | class PWCDCNet(nn.Module):
method __init__ (line 43) | def __init__(self, md=4):
method warp (line 140) | def warp(self, x, flo):
method forward (line 179) | def forward(self,x):
class PWCDCNet_old (line 276) | class PWCDCNet_old(nn.Module):
method __init__ (line 281) | def __init__(self, md=4):
method warp (line 371) | def warp(self, x, flo):
method forward (line 406) | def forward(self,x):
function pwc_dc_net (line 496) | def pwc_dc_net(path=None):
function pwc_dc_net_old (line 510) | def pwc_dc_net_old(path=None):
FILE: Resblock/BasicBlock.py
function conv3x3 (line 7) | def conv3x3(in_planes, out_planes, dilation = 1, stride=1):
class BasicBlock (line 11) | class BasicBlock(nn.Module):
method __init__ (line 14) | def __init__(self, inplanes, planes, dilation = 1, stride=1, downsampl...
method forward (line 33) | def forward(self, x):
class MultipleBasicBlock (line 50) | class MultipleBasicBlock(nn.Module):
method __init__ (line 52) | def __init__(self,input_feature,
method forward (line 80) | def forward(self, x):
function MultipleBasicBlock_4 (line 88) | def MultipleBasicBlock_4(input_feature,intermediate_feature = 64):
FILE: S2D_models/S2DF.py
function conv3x3 (line 20) | def conv3x3(in_planes, out_planes, dilation = 1, stride=1):
class BasicBlock (line 26) | class BasicBlock(nn.Module):
method __init__ (line 29) | def __init__(self, inplanes, planes, dilation = 1, stride=1, downsampl...
method forward (line 39) | def forward(self, x):
class Bottleneck (line 58) | class Bottleneck(nn.Module):
method __init__ (line 61) | def __init__(self, inplanes, planes, dilation = 1, stride=1, downsampl...
method forward (line 74) | def forward(self, x):
class S2DF (line 97) | class S2DF(nn.Module):
method __init__ (line 99) | def __init__(self, block, num_blocks,dense = True,dilation=True):
method forward (line 124) | def forward(self, x):
class S2DFsim (line 147) | class S2DFsim(nn.Module):
method __init__ (line 149) | def __init__(self, block, num_blocks,dense = True,dilation=True):
method forward (line 187) | def forward(self, x):
function S2DF_3dense_nodilation (line 208) | def S2DF_3dense_nodilation():
function S2DF_3dense (line 211) | def S2DF_3dense():
function S2DF_3last (line 214) | def S2DF_3last():
function S2DF_2dense (line 217) | def S2DF_2dense():
FILE: Stack.py
class Stack (line 2) | class Stack:
method __init__ (line 3) | def __init__(self):
method pop (line 5) | def pop(self):
method push (line 10) | def push(self,val):
method peak (line 12) | def peak(self):
method size (line 17) | def size(self):
method is_empty (line 19) | def is_empty(self):
FILE: balancedsampler.py
class RandomBalancedSampler (line 4) | class RandomBalancedSampler(Sampler):
method __init__ (line 12) | def __init__(self, data_source, epoch_size):
method __next__ (line 17) | def __next__(self):
method next (line 24) | def next(self):
method __iter__ (line 27) | def __iter__(self):
method __len__ (line 30) | def __len__(self):
class SequentialBalancedSampler (line 33) | class SequentialBalancedSampler(Sampler):
method __init__ (line 41) | def __init__(self, data_source, epoch_size):
method __next__ (line 46) | def __next__(self):
method next (line 50) | def next(self):
method __iter__ (line 53) | def __iter__(self):
method __len__ (line 56) | def __len__(self):
FILE: datasets/Vimeo_90K_interp.py
function make_dataset (line 8) | def make_dataset(root, list_file):
function Vimeo_90K_interp (line 18) | def Vimeo_90K_interp(root, split=1.0, single=False, task = 'interp' ):
FILE: datasets/listdatasets.py
function Vimeo_90K_loader (line 8) | def Vimeo_90K_loader(root, im_path, input_frame_size = (3, 256, 448), ou...
class ListDataset (line 53) | class ListDataset(data.Dataset):
method __init__ (line 54) | def __init__(self, root, path_list, loader=Vimeo_90K_loader):
method __getitem__ (line 60) | def __getitem__(self, index):
method __len__ (line 66) | def __len__(self):
FILE: loss_function.py
function charbonier_loss (line 16) | def charbonier_loss(x,epsilon):
function negPSNR_loss (line 19) | def negPSNR_loss(x,epsilon):
function tv_loss (line 23) | def tv_loss(x,epsilon):
function gra_adap_tv_loss (line 32) | def gra_adap_tv_loss(flow, image, epsilon):
function smooth_loss (line 39) | def smooth_loss(x,epsilon):
function motion_sym_loss (line 49) | def motion_sym_loss(offset, epsilon, occlusion = None):
function part_loss (line 61) | def part_loss(diffs, offsets, occlusions, images, epsilon, use_negPSNR=F...
FILE: lr_scheduler.py
class _LRScheduler (line 5) | class _LRScheduler(object):
method __init__ (line 6) | def __init__(self, optimizer, last_epoch=-1):
method get_lr (line 23) | def get_lr(self):
method step (line 26) | def step(self, epoch=None):
class LambdaLR (line 34) | class LambdaLR(_LRScheduler):
method __init__ (line 55) | def __init__(self, optimizer, lr_lambda, last_epoch=-1):
method get_lr (line 67) | def get_lr(self):
class StepLR (line 73) | class StepLR(_LRScheduler):
method __init__ (line 98) | def __init__(self, optimizer, step_size, gamma=0.1, last_epoch=-1):
method get_lr (line 103) | def get_lr(self):
class MultiStepLR (line 109) | class MultiStepLR(_LRScheduler):
method __init__ (line 133) | def __init__(self, optimizer, milestones, gamma=0.1, last_epoch=-1):
method get_lr (line 141) | def get_lr(self):
class ExponentialLR (line 147) | class ExponentialLR(_LRScheduler):
method __init__ (line 157) | def __init__(self, optimizer, gamma, last_epoch=-1):
method get_lr (line 161) | def get_lr(self):
class ReduceLROnPlateau (line 167) | class ReduceLROnPlateau(object):
method __init__ (line 212) | def __init__(self, optimizer, mode='min', factor=0.1, patience=10,
method _reset (line 250) | def _reset(self):
method step (line 256) | def step(self, metrics, epoch=None):
method _reduce_lr (line 277) | def _reduce_lr(self, epoch):
method in_cooldown (line 288) | def in_cooldown(self):
method _init_is_better (line 291) | def _init_is_better(self, mode, threshold, threshold_mode):
FILE: my_package/DepthFlowProjection/DepthFlowProjectionLayer.py
class DepthFlowProjectionLayer (line 7) | class DepthFlowProjectionLayer(Function):
method __init__ (line 8) | def __init__(self,requires_grad):
method forward (line 13) | def forward(ctx, input1, input2, requires_grad):
method backward (line 59) | def backward(ctx, gradoutput):
FILE: my_package/DepthFlowProjection/DepthFlowProjectionModule.py
class DepthFlowProjectionModule (line 7) | class DepthFlowProjectionModule(Module):
method __init__ (line 8) | def __init__(self, requires_grad = True):
method forward (line 13) | def forward(self, input1, input2):
FILE: my_package/DepthFlowProjection/depthflowprojection_cuda.cc
function DepthFlowProjectionLayer_gpu_forward (line 10) | int DepthFlowProjectionLayer_gpu_forward(
function DepthFlowProjectionLayer_gpu_backward (line 70) | int DepthFlowProjectionLayer_gpu_backward(
function PYBIND11_MODULE (line 143) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: my_package/FilterInterpolation/FilterInterpolationLayer.py
class FilterInterpolationLayer (line 10) | class FilterInterpolationLayer(Function):
method __init__ (line 11) | def __init__(self):
method forward (line 14) | def forward(ctx, input1,input2,input3):
method backward (line 44) | def backward(ctx, gradoutput):
class WeightLayer (line 82) | class WeightLayer(Function):
method __init__ (line 83) | def __init__(self, lambda_e = 10.0/255.0, lambda_v = 1.0, Nw = 3):
method forward (line 91) | def forward(self, input1,input2,input3):
method backward (line 129) | def backward(self, gradoutput):
class PixelValueLayer (line 172) | class PixelValueLayer(Function):
method __init__ (line 173) | def __init__(self, sigma_d = 3, tao_r = 0.05, Prowindow = 2 ):
method forward (line 180) | def forward(self, input1, input3, flow_weights):
method backward (line 218) | def backward(self, gradoutput):
class PixelWeightLayer (line 267) | class PixelWeightLayer(Function):
method __init__ (line 268) | def __init__(self,threshhold, sigma_d =3, tao_r =0.05, Prowindow = 2 ):
method forward (line 275) | def forward(self, input3, flow_weights):
method backward (line 313) | def backward(self, gradoutput):
class ReliableWeightLayer (line 441) | class ReliableWeightLayer(Function):
method __init__ (line 442) | def __init__(self, threshhold, sigma_d =3, tao_r =0.05, Prowindow = 2 ):
method forward (line 450) | def forward(self, input3):
method backward (line 490) | def backward(self, gradoutput):
FILE: my_package/FilterInterpolation/FilterInterpolationModule.py
class FilterInterpolationModule (line 8) | class FilterInterpolationModule(Module):
method __init__ (line 9) | def __init__(self):
method forward (line 13) | def forward(self, input1, input2, input3):
class AdaptiveWeightInterpolationModule (line 25) | class AdaptiveWeightInterpolationModule(Module):
method __init__ (line 26) | def __init__(self, training = False, threshhold = 1e-6,
method forward (line 63) | def forward(self, input1, input2, input3, input4):
FILE: my_package/FilterInterpolation/filterinterpolation_cuda.cc
function FilterInterpolationLayer_gpu_forward (line 11) | int FilterInterpolationLayer_gpu_forward(
function FilterInterpolationLayer_gpu_backward (line 83) | int FilterInterpolationLayer_gpu_backward(
function PYBIND11_MODULE (line 168) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: my_package/FlowProjection/FlowProjectionLayer.py
class FlowProjectionLayer (line 10) | class FlowProjectionLayer(Function):
method __init__ (line 11) | def __init__(self,requires_grad):
method forward (line 16) | def forward(ctx, input1, requires_grad):
method backward (line 55) | def backward(ctx, gradoutput):
class FlowFillholelayer (line 87) | class FlowFillholelayer(Function):
method __init__ (line 88) | def __init__(self):
method forward (line 91) | def forward(self, input1):
FILE: my_package/FlowProjection/FlowProjectionModule.py
class FlowProjectionModule (line 5) | class FlowProjectionModule(Module):
method __init__ (line 6) | def __init__(self, requires_grad = True):
method forward (line 11) | def forward(self, input1):
FILE: my_package/FlowProjection/flowprojection_cuda.cc
function FlowProjectionLayer_gpu_forward (line 9) | int FlowProjectionLayer_gpu_forward(
function FlowProjectionLayer_gpu_backward (line 59) | int FlowProjectionLayer_gpu_backward(
function PYBIND11_MODULE (line 117) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: my_package/Interpolation/InterpolationLayer.py
class InterpolationLayer (line 10) | class InterpolationLayer(Function):
method __init__ (line 11) | def __init__(self):
method forward (line 15) | def forward(ctx, input1,input2):
method backward (line 42) | def backward(ctx, gradoutput):
FILE: my_package/Interpolation/InterpolationModule.py
class InterpolationModule (line 5) | class InterpolationModule(Module):
method __init__ (line 6) | def __init__(self):
method forward (line 10) | def forward(self, input1, input2):
FILE: my_package/Interpolation/interpolation_cuda.cc
function InterpolationLayer_gpu_forward (line 10) | int InterpolationLayer_gpu_forward(
function InterpolationLayer_gpu_backward (line 63) | int InterpolationLayer_gpu_backward(
function PYBIND11_MODULE (line 125) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: my_package/InterpolationCh/InterpolationChLayer.py
class InterpolationChLayer (line 10) | class InterpolationChLayer(Function):
method __init__ (line 11) | def __init__(self,ch):
method forward (line 16) | def forward(ctx, input1,input2):
method backward (line 43) | def backward(ctx, gradoutput):
FILE: my_package/InterpolationCh/InterpolationChModule.py
class InterpolationChModule (line 5) | class InterpolationChModule(Module):
method __init__ (line 6) | def __init__(self,ch):
method forward (line 11) | def forward(self, input1, input2):
FILE: my_package/InterpolationCh/interpolationch_cuda.cc
function InterpolationChLayer_gpu_forward (line 10) | int InterpolationChLayer_gpu_forward(
function InterpolationChLayer_gpu_backward (line 63) | int InterpolationChLayer_gpu_backward(
function PYBIND11_MODULE (line 125) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: my_package/MinDepthFlowProjection/minDepthFlowProjectionLayer.py
class minDepthFlowProjectionLayer (line 7) | class minDepthFlowProjectionLayer(Function):
method __init__ (line 8) | def __init__(self,requires_grad):
method forward (line 13) | def forward(ctx, input1, input2, requires_grad):
method backward (line 59) | def backward(ctx, gradoutput):
FILE: my_package/MinDepthFlowProjection/minDepthFlowProjectionModule.py
class minDepthFlowProjectionModule (line 7) | class minDepthFlowProjectionModule(Module):
method __init__ (line 8) | def __init__(self, requires_grad = True):
method forward (line 13) | def forward(self, input1, input2):
FILE: my_package/MinDepthFlowProjection/mindepthflowprojection_cuda.cc
function minDepthFlowProjectionLayer_gpu_forward (line 10) | int minDepthFlowProjectionLayer_gpu_forward(
function minDepthFlowProjectionLayer_gpu_backward (line 70) | int minDepthFlowProjectionLayer_gpu_backward(
function PYBIND11_MODULE (line 143) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: my_package/SeparableConv/SeparableConvLayer.py
class SeparableConvLayer (line 10) | class SeparableConvLayer(Function):
method __init__ (line 11) | def __init__(self,filtersize):
method forward (line 15) | def forward(self, input1,input2,input3):
method backward (line 57) | def backward(self, gradoutput):
FILE: my_package/SeparableConv/SeparableConvModule.py
class SeparableConvModule (line 5) | class SeparableConvModule(Module):
method __init__ (line 6) | def __init__(self,filtersize):
method forward (line 10) | def forward(self, input1, input2, input3):
FILE: my_package/SeparableConv/separableconv_cuda.cc
function SeparableConvLayer_gpu_forward (line 10) | int SeparableConvLayer_gpu_forward(
function SeparableConvLayer_gpu_backward (line 88) | int SeparableConvLayer_gpu_backward(
function PYBIND11_MODULE (line 178) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: my_package/SeparableConvFlow/SeparableConvFlowLayer.py
class SeparableConvFlowLayer (line 10) | class SeparableConvFlowLayer(Function):
method __init__ (line 11) | def __init__(self,filtersize):
method forward (line 16) | def forward(self, input1,input2,input3):
method backward (line 60) | def backward(self, gradoutput):
FILE: my_package/SeparableConvFlow/SeparableConvFlowModule.py
class SeparableConvFlowModule (line 5) | class SeparableConvFlowModule(Module):
method __init__ (line 6) | def __init__(self,filtersize):
method forward (line 10) | def forward(self, input1, input2, input3):
FILE: my_package/SeparableConvFlow/separableconvflow_cuda.cc
function SeparableConvFlowLayer_gpu_forward (line 9) | int SeparableConvFlowLayer_gpu_forward(
function SeparableConvFlowLayer_gpu_backward (line 103) | int SeparableConvFlowLayer_gpu_backward(
function PYBIND11_MODULE (line 201) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: my_package/test_module.py
function test_SeparableConvFlowModule (line 20) | def test_SeparableConvFlowModule(input1, input2, input3,filtersize):
function test_SeparableConvModule (line 124) | def test_SeparableConvModule(input1, input2, input3,filtersize):
function test_FilterInterpolation (line 218) | def test_FilterInterpolation(input1,input2,input3):
function test_InterpolationModule (line 313) | def test_InterpolationModule(input1,input2):
function test_InterpolationChModule (line 397) | def test_InterpolationChModule(input1,input2):
function test_FlowProjectionModule (line 481) | def test_FlowProjectionModule(input1):
function test_DepthFlowProjectionModule (line 576) | def test_DepthFlowProjectionModule(input1,input2):
function test_WeightedFlowProjectionModule (line 671) | def test_WeightedFlowProjectionModule(input1 , input2, input3):
function test_AdaptiveWeightInterpolationModule (line 768) | def test_AdaptiveWeightInterpolationModule(input1, input2, input3, input4):
FILE: networks/DAIN.py
class DAIN (line 16) | class DAIN(torch.nn.Module):
method __init__ (line 17) | def __init__(self,
method _initialize_weights (line 57) | def _initialize_weights(self):
method forward (line 81) | def forward(self, input):
method forward_flownets (line 197) | def forward_flownets(self, model, input, time_offsets = None):
method forward_singlePath (line 212) | def forward_singlePath(self, modulelist, input, name):
method get_MonoNet5 (line 243) | def get_MonoNet5(self, channel_in, channel_out, name):
method FlowProject (line 294) | def FlowProject(inputs, depth = None):
method FilterInterpolate_ctx (line 304) | def FilterInterpolate_ctx(ctx0,ctx2,offset,filter):
method FilterInterpolate (line 317) | def FilterInterpolate(ref0, ref2, offset, filter,filter_size2):
method conv_relu_conv (line 324) | def conv_relu_conv(input_filter, output_filter, kernel_size,
method conv_relu (line 342) | def conv_relu(input_filter, output_filter, kernel_size,
method conv_relu_maxpool (line 353) | def conv_relu_maxpool(input_filter, output_filter, kernel_size,
method conv_relu_unpool (line 369) | def conv_relu_unpool(input_filter, output_filter, kernel_size,
FILE: networks/DAIN_slowmotion.py
class DAIN_slowmotion (line 16) | class DAIN_slowmotion(torch.nn.Module):
method __init__ (line 17) | def __init__(self,
method _initialize_weights (line 56) | def _initialize_weights(self):
method forward (line 80) | def forward(self, input):
method forward_flownets (line 204) | def forward_flownets(self, model, input, time_offsets = None):
method forward_singlePath (line 219) | def forward_singlePath(self, modulelist, input, name):
method get_MonoNet5 (line 250) | def get_MonoNet5(self, channel_in, channel_out, name):
method FlowProject (line 301) | def FlowProject(inputs, depth = None):
method FilterInterpolate_ctx (line 311) | def FilterInterpolate_ctx(ctx0,ctx2,offset,filter, timeoffset):
method FilterInterpolate (line 324) | def FilterInterpolate(ref0, ref2, offset, filter,filter_size2, time_of...
method conv_relu_conv (line 339) | def conv_relu_conv(input_filter, output_filter, kernel_size,
method conv_relu (line 357) | def conv_relu(input_filter, output_filter, kernel_size,
method conv_relu_maxpool (line 368) | def conv_relu_maxpool(input_filter, output_filter, kernel_size,
method conv_relu_unpool (line 384) | def conv_relu_unpool(input_filter, output_filter, kernel_size,
FILE: train.py
function train (line 20) | def train():
Condensed preview — 123 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (619K chars).
[
{
"path": ".gitignore",
"chars": 2416,
"preview": "# Ignore Git here\n.git\n\n# But not these files...\n# !.gitignore\n\ncheckpoints/test_local/opt.txt\nPWCNet/pwc_net.pth.tar\nMe"
},
{
"path": "AverageMeter.py",
"chars": 392,
"preview": "\n\nclass AverageMeter(object):\n \"\"\"Computes and stores the average and current value\"\"\"\n def __init__(self):\n "
},
{
"path": "Colab_DAIN.ipynb",
"chars": 12952,
"preview": "{\n \"nbformat\": 4,\n \"nbformat_minor\": 0,\n \"metadata\": {\n \"colab\": {\n \"name\": \"Colab_DAIN_new.ipynb\",\n \"pr"
},
{
"path": "LICENSE",
"chars": 1066,
"preview": "MIT License\n\nCopyright (c) 2019 Wenbo Bao\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\n"
},
{
"path": "MegaDepth/LICENSE",
"chars": 1067,
"preview": "MIT License\n\nCopyright (c) 2018 Zhengqi Li\n\nPermission is hereby granted, free of charge, to any person obtaining a copy"
},
{
"path": "MegaDepth/MegaDepth_model.py",
"chars": 535,
"preview": "import torch\nimport sys\nfrom torch.autograd import Variable\nimport numpy as np\nfrom .options.train_options import TrainO"
},
{
"path": "MegaDepth/README.md",
"chars": 4053,
"preview": "# MegaDepth: Learning Single-View Depth Prediction from Internet Photos\n\nThis is a code of the algorithm described in \"M"
},
{
"path": "MegaDepth/SDR_compute.py",
"chars": 2780,
"preview": "import time\nimport torch\nimport sys\n\nfrom options.train_options import TrainOptions\nopt = TrainOptions().parse() # set "
},
{
"path": "MegaDepth/__init__.py",
"chars": 34,
"preview": "from .MegaDepth_model import *\n"
},
{
"path": "MegaDepth/data/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "MegaDepth/data/aligned_data_loader.py",
"chars": 2226,
"preview": "import random\nimport numpy as np\nimport torch.utils.data\nfrom data.base_data_loader import BaseDataLoader\nfrom data.imag"
},
{
"path": "MegaDepth/data/base_data_loader.py",
"chars": 198,
"preview": "\nclass BaseDataLoader():\n def __init__(self):\n pass\n \n # def initialize(self):\n # # self.opt = op"
},
{
"path": "MegaDepth/data/data_loader.py",
"chars": 583,
"preview": "\ndef CreateDataLoader(_root, _list_dir, _input_height, _input_width, is_flip = True, shuffle = True):\n data_loader ="
},
{
"path": "MegaDepth/data/image_folder.py",
"chars": 7149,
"preview": "################################################################################\n# Code from\n# https://github.com/pytorc"
},
{
"path": "MegaDepth/models/HG_model.py",
"chars": 5957,
"preview": "import numpy as np\nimport torch\nimport os\nfrom torch.autograd import Variable\nfrom .base_model import BaseModel\nimport s"
},
{
"path": "MegaDepth/models/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "MegaDepth/models/base_model.py",
"chars": 1658,
"preview": "import os\nimport torch\n\nclass BaseModel():\n def name(self):\n return 'BaseModel'\n\n def initialize(self, opt)"
},
{
"path": "MegaDepth/models/models.py",
"chars": 199,
"preview": "\ndef create_model(opt,pretrained=None):\n model = None\n from .HG_model import HGModel\n model = HGModel(opt,pretr"
},
{
"path": "MegaDepth/options/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "MegaDepth/options/base_options.py",
"chars": 4792,
"preview": "import argparse\nimport os\nfrom ..util import util\n\nclass BaseOptions():\n def __init__(self):\n self.parser = ar"
},
{
"path": "MegaDepth/options/test_options.py",
"chars": 844,
"preview": "from .base_options import BaseOptions\n\nclass TestOptions(BaseOptions):\n def initialize(self):\n BaseOptions.ini"
},
{
"path": "MegaDepth/options/train_options.py",
"chars": 2453,
"preview": "from .base_options import BaseOptions\n\nclass TrainOptions(BaseOptions):\n def initialize(self):\n BaseOptions.in"
},
{
"path": "MegaDepth/pytorch_DIW_scratch.py",
"chars": 25569,
"preview": "\nimport torch\nimport torch.nn as nn\nfrom torch.autograd import Variable\nfrom functools import reduce\n\nclass LambdaBase(n"
},
{
"path": "MegaDepth/rmse_error_main.py",
"chars": 2101,
"preview": "import time\nimport torch\nimport sys\n\nfrom options.train_options import TrainOptions\nopt = TrainOptions().parse() # set "
},
{
"path": "MegaDepth/util/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "MegaDepth/util/html.py",
"chars": 1912,
"preview": "import dominate\nfrom dominate.tags import *\nimport os\n\n\nclass HTML:\n def __init__(self, web_dir, title, reflesh=0):\n "
},
{
"path": "MegaDepth/util/image_pool.py",
"chars": 1141,
"preview": "import random\nimport numpy as np\nimport torch\nfrom pdb import set_trace as st\nfrom torch.autograd import Variable\nclass "
},
{
"path": "MegaDepth/util/png.py",
"chars": 978,
"preview": "import struct\nimport zlib\n\ndef encode(buf, width, height):\n \"\"\" buf: must be bytes or a bytearray in py3, a regular str"
},
{
"path": "MegaDepth/util/util.py",
"chars": 2177,
"preview": "from __future__ import print_function\nimport torch\nimport numpy as np\nfrom PIL import Image\nimport inspect, re\nimport nu"
},
{
"path": "MegaDepth/util/visualizer.py",
"chars": 3864,
"preview": "import numpy as np\nimport os\nimport ntpath\nimport time\nfrom . import util\nfrom . import html\n\nclass Visualizer():\n de"
},
{
"path": "PWCNet/PWCNet.py",
"chars": 23264,
"preview": "\"\"\"\nimplementation of the PWC-DC network for optical flow estimation by Sun et al., 2018\n\nJinwei Gu and Zhile Ren\n\n\"\"\"\n\n"
},
{
"path": "PWCNet/__init__.py",
"chars": 21,
"preview": "from .PWCNet import *"
},
{
"path": "PWCNet/correlation_package_pytorch1_0/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "PWCNet/correlation_package_pytorch1_0/build.sh",
"chars": 186,
"preview": "#!/usr/bin/env bash\n\necho \"Need pytorch>=1.0.0\"\nsource activate pytorch1.0.0\n\nexport PYTHONPATH=$PYTHONPATH:$(pwd)/../.."
},
{
"path": "PWCNet/correlation_package_pytorch1_0/clean.sh",
"chars": 133,
"preview": "#!/usr/bin/env bash\n\necho \"Need pytorch>=1.0.0\"\nsource activate pytorch1.0.0\n\n\nrm -rf build *.egg-info dist\n#python setu"
},
{
"path": "PWCNet/correlation_package_pytorch1_0/correlation.py",
"chars": 2264,
"preview": "import torch\nfrom torch.nn.modules.module import Module\nfrom torch.autograd import Function\nimport correlation_cuda\n\ncla"
},
{
"path": "PWCNet/correlation_package_pytorch1_0/correlation_cuda.cc",
"chars": 6637,
"preview": "#include <torch/torch.h>\n#include <ATen/ATen.h>\n#include <stdio.h>\n#include <iostream>\n#include <ATen/cuda/CUDAContext.h"
},
{
"path": "PWCNet/correlation_package_pytorch1_0/correlation_cuda_kernel.cu",
"chars": 19919,
"preview": "#include <stdio.h>\n\n#include \"correlation_cuda_kernel.cuh\"\n\n#define CUDA_NUM_THREADS 1024\n#define THREADS_PER_BLOCK 32\n#"
},
{
"path": "PWCNet/correlation_package_pytorch1_0/correlation_cuda_kernel.cuh",
"chars": 1409,
"preview": "#pragma once\n\n#include <ATen/ATen.h>\n#include <ATen/Context.h>\n#include <cuda_runtime.h>\n\nint correlation_forward_cuda_k"
},
{
"path": "PWCNet/correlation_package_pytorch1_0/setup.py",
"chars": 514,
"preview": "#!/usr/bin/env python3\nimport os\nimport torch\n\nfrom setuptools import setup, find_packages\nfrom torch.utils.cpp_extensio"
},
{
"path": "PWCNet/models/PWCNet.py",
"chars": 20998,
"preview": "\"\"\"\nimplementation of the PWC-DC network for optical flow estimation by Sun et al., 2018\n\nJinwei Gu and Zhile Ren\n\n\"\"\"\n\n"
},
{
"path": "PWCNet/models/__init__.py",
"chars": 22,
"preview": "from .PWCNet import *\n"
},
{
"path": "README.md",
"chars": 11257,
"preview": "# DAIN (Depth-Aware Video Frame Interpolation)\n[Project](https://sites.google.com/view/wenbobao/dain) **|** [Paper](http"
},
{
"path": "Resblock/BasicBlock.py",
"chars": 3744,
"preview": "import torch.nn as nn\nimport math\nimport torch.utils.model_zoo as model_zoo\nimport torch.nn.init as weight_init\nimport t"
},
{
"path": "Resblock/__init__.py",
"chars": 27,
"preview": "from .BasicBlock import *"
},
{
"path": "S2D_models/S2DF.py",
"chars": 7789,
"preview": "import torch.nn as nn\nimport math\nimport torch.utils.model_zoo as model_zoo\n\nimport torch\n# __all__ = ['ResNet', 'resnet"
},
{
"path": "S2D_models/__init__.py",
"chars": 19,
"preview": "from .S2DF import *"
},
{
"path": "Stack.py",
"chars": 471,
"preview": "\nclass Stack:\n def __init__(self):\n self.stack = []\n def pop(self):\n if self.is_empty():\n "
},
{
"path": "balancedsampler.py",
"chars": 1734,
"preview": "from torch.utils.data.sampler import Sampler\nimport torch\n\nclass RandomBalancedSampler(Sampler):\n \"\"\"Samples elements"
},
{
"path": "colab_interpolate.py",
"chars": 6488,
"preview": "import time\nimport os\nfrom torch.autograd import Variable\nimport torch\nimport numpy as np\nimport numpy\nimport networks\nf"
},
{
"path": "datasets/Vimeo_90K_interp.py",
"chars": 812,
"preview": "import os.path\nimport random\n# import glob\nimport math\nfrom .listdatasets import ListDataset,Vimeo_90K_loader\n\n\ndef make"
},
{
"path": "datasets/__init__.py",
"chars": 144,
"preview": "from .Vimeo_90K_interp import Vimeo_90K_interp\n\n__all__ = (\n 'Vimeo_90K_interp',\n)\n\n# Vimeo_90K = \"/tmp4/wenbo"
},
{
"path": "datasets/listdatasets.py",
"chars": 2229,
"preview": "import torch.utils.data as data\nimport os\nimport os.path\nfrom scipy.ndimage import imread\nimport numpy as np\nimport rand"
},
{
"path": "demo_MiddleBury.py",
"chars": 6909,
"preview": "import time\nimport os\nfrom torch.autograd import Variable\nimport math\nimport torch\n\nimport random\nimport numpy as np\nimp"
},
{
"path": "demo_MiddleBury_slowmotion.py",
"chars": 7664,
"preview": "import time\nimport os\nfrom torch.autograd import Variable\nimport torch\nimport random\nimport numpy as np\nimport numpy\nimp"
},
{
"path": "environment.yaml",
"chars": 3023,
"preview": "name: pytorch1.0.0\nchannels:\n - pytorch\n - serge-sans-paille\n - anaconda\n - conda-forge\n - defaults\ndependencies:\n "
},
{
"path": "loss_function.py",
"chars": 3178,
"preview": "import sys\nimport os\n\nimport sys\nimport threading\nimport torch\nfrom torch.autograd import Variable\nfrom lr_scheduler im"
},
{
"path": "lr_scheduler.py",
"chars": 12877,
"preview": "from bisect import bisect_right\r\nfrom torch.optim.optimizer import Optimizer\r\n\r\n\r\nclass _LRScheduler(object):\r\n def _"
},
{
"path": "my_args.py",
"chars": 6677,
"preview": "import os\nimport datetime\nimport argparse\nimport numpy\nimport networks\nimport torch\nmodelnames = networks.__all__\n# im"
},
{
"path": "my_package/DepthFlowProjection/DepthFlowProjectionLayer.py",
"chars": 4028,
"preview": "# this is for wrapping the customized layer\nimport torch\nfrom torch.autograd import Function\n#import _ext.my_lib as my_l"
},
{
"path": "my_package/DepthFlowProjection/DepthFlowProjectionModule.py",
"chars": 898,
"preview": "# modules/FlowProjectionModule.py\nfrom torch.nn.modules.module import Module\nfrom .DepthFlowProjectionLayer import Depth"
},
{
"path": "my_package/DepthFlowProjection/__init__.py",
"chars": 42,
"preview": "from .DepthFlowProjectionModule import *\n"
},
{
"path": "my_package/DepthFlowProjection/depthflowprojection_cuda.cc",
"chars": 4636,
"preview": "#include <torch/torch.h>\r\n#include <ATen/ATen.h>\r\n#include <stdio.h>\r\n#include <iostream>\r\n#include <ATen/cuda/CUDAConte"
},
{
"path": "my_package/DepthFlowProjection/depthflowprojection_cuda_kernel.cu",
"chars": 22355,
"preview": "#include <stdio.h>\r\n\r\n#include \"depthflowprojection_cuda_kernel.cuh\"\r\n\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/NativeF"
},
{
"path": "my_package/DepthFlowProjection/depthflowprojection_cuda_kernel.cuh",
"chars": 1429,
"preview": "#pragma once\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/Context.h>\r\n#include <cuda_runtime.h>\r\n\r\nint DepthFlowProjection_"
},
{
"path": "my_package/DepthFlowProjection/setup.py",
"chars": 546,
"preview": "#!/usr/bin/env python3\nimport os\nimport torch\n\nfrom setuptools import setup, find_packages\nfrom torch.utils.cpp_extensio"
},
{
"path": "my_package/FilterInterpolation/FilterInterpolationLayer.py",
"chars": 20311,
"preview": "# this is for wrapping the customized layer\nimport torch\nfrom torch.autograd import Function\nimport filterinterpolation_"
},
{
"path": "my_package/FilterInterpolation/FilterInterpolationModule.py",
"chars": 7400,
"preview": "# modules/AdaptiveInterpolationLayer.py\nfrom torch.nn import Module\nimport torch\nfrom torch.autograd import Variable\nfro"
},
{
"path": "my_package/FilterInterpolation/__init__.py",
"chars": 41,
"preview": "from .FilterInterpolationModule import *\n"
},
{
"path": "my_package/FilterInterpolation/filterinterpolation_cuda.cc",
"chars": 5503,
"preview": "#include <torch/torch.h>\r\n#include <ATen/ATen.h>\r\n#include <stdio.h>\r\n#include <iostream>\r\n#include <ATen/cuda/CUDAConte"
},
{
"path": "my_package/FilterInterpolation/filterinterpolation_cuda_kernel.cu",
"chars": 29284,
"preview": "#include <stdio.h>\r\n\r\n#include \"filterinterpolation_cuda_kernel.cuh\"\r\n\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/NativeF"
},
{
"path": "my_package/FilterInterpolation/filterinterpolation_cuda_kernel.cuh",
"chars": 1458,
"preview": "#pragma once\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/Context.h>\r\n#include <cuda_runtime.h>\r\n\r\nint FilterInterpolationL"
},
{
"path": "my_package/FilterInterpolation/setup.py",
"chars": 546,
"preview": "#!/usr/bin/env python3\nimport os\nimport torch\n\nfrom setuptools import setup, find_packages\nfrom torch.utils.cpp_extensio"
},
{
"path": "my_package/FlowProjection/FlowProjectionLayer.py",
"chars": 5723,
"preview": "# this is for wrapping the customized layer\nimport torch\nfrom torch.autograd import Function\nimport flowprojection_cuda "
},
{
"path": "my_package/FlowProjection/FlowProjectionModule.py",
"chars": 715,
"preview": "# modules/FlowProjectionModule.py\nfrom torch.nn import Module\nfrom .FlowProjectionLayer import FlowProjectionLayer #, Fl"
},
{
"path": "my_package/FlowProjection/__init__.py",
"chars": 36,
"preview": "from .FlowProjectionModule import *"
},
{
"path": "my_package/FlowProjection/flowprojection_cuda.cc",
"chars": 3717,
"preview": "#include <torch/torch.h>\r\n#include <ATen/ATen.h>\r\n#include <stdio.h>\r\n#include <iostream>\r\n#include <ATen/cuda/CUDAConte"
},
{
"path": "my_package/FlowProjection/flowprojection_cuda_kernel.cu",
"chars": 17573,
"preview": "#include <stdio.h>\r\n\r\n#include \"flowprojection_cuda_kernel.cuh\"\r\n\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/NativeFuncti"
},
{
"path": "my_package/FlowProjection/flowprojection_cuda_kernel.cuh",
"chars": 1060,
"preview": "#pragma once\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/Context.h>\r\n#include <cuda_runtime.h>\r\n\r\nint FlowProjection_gpu_f"
},
{
"path": "my_package/FlowProjection/setup.py",
"chars": 526,
"preview": "#!/usr/bin/env python3\nimport os\nimport torch\n\nfrom setuptools import setup, find_packages\nfrom torch.utils.cpp_extensio"
},
{
"path": "my_package/Interpolation/InterpolationLayer.py",
"chars": 3070,
"preview": "# this is for wrapping the customized layer\r\nimport torch\r\nfrom torch.autograd import Function\r\nimport interpolation_cud"
},
{
"path": "my_package/Interpolation/InterpolationModule.py",
"chars": 458,
"preview": "# modules/InterpolationLayer.py\r\nfrom torch.nn import Module\r\nfrom .InterpolationLayer import InterpolationLayer\r\n\r\nclas"
},
{
"path": "my_package/Interpolation/__init__.py",
"chars": 35,
"preview": "from .InterpolationModule import *"
},
{
"path": "my_package/Interpolation/interpolation_cuda.cc",
"chars": 3921,
"preview": "#include <torch/torch.h>\r\n#include <ATen/ATen.h>\r\n#include <stdio.h>\r\n#include <iostream>\r\n#include <ATen/cuda/CUDAConte"
},
{
"path": "my_package/Interpolation/interpolation_cuda_kernel.cu",
"chars": 10390,
"preview": "#include <stdio.h>\r\n\r\n#include \"interpolation_cuda_kernel.cuh\"\r\n\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/NativeFunctio"
},
{
"path": "my_package/Interpolation/interpolation_cuda_kernel.cuh",
"chars": 1085,
"preview": "#pragma once\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/Context.h>\r\n#include <cuda_runtime.h>\r\n\r\nint InterpolationLayer_g"
},
{
"path": "my_package/Interpolation/setup.py",
"chars": 522,
"preview": "#!/usr/bin/env python3\nimport os\nimport torch\n\nfrom setuptools import setup, find_packages\nfrom torch.utils.cpp_extensio"
},
{
"path": "my_package/InterpolationCh/InterpolationChLayer.py",
"chars": 3194,
"preview": "# this is for wrapping the customized layer\r\nimport torch\r\nfrom torch.autograd import Function\r\nimport interpolationch_c"
},
{
"path": "my_package/InterpolationCh/InterpolationChModule.py",
"chars": 497,
"preview": "# modules/InterpolationLayer.py\r\nfrom torch.nn import Module\r\nfrom .InterpolationChLayer import InterpolationChLayer\r\n\r\n"
},
{
"path": "my_package/InterpolationCh/__init__.py",
"chars": 38,
"preview": "from .InterpolationChModule import *\n"
},
{
"path": "my_package/InterpolationCh/interpolationch_cuda.cc",
"chars": 3965,
"preview": "#include <torch/torch.h>\r\n#include <ATen/ATen.h>\r\n#include <stdio.h>\r\n#include <iostream>\r\n#include <ATen/cuda/CUDAConte"
},
{
"path": "my_package/InterpolationCh/interpolationch_cuda_kernel.cu",
"chars": 10427,
"preview": "#include <stdio.h>\r\n\r\n#include \"interpolationch_cuda_kernel.cuh\"\r\n\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/NativeFunct"
},
{
"path": "my_package/InterpolationCh/interpolationch_cuda_kernel.cuh",
"chars": 1092,
"preview": "#pragma once\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/Context.h>\r\n#include <cuda_runtime.h>\r\n\r\n\r\nint InterpolationChLay"
},
{
"path": "my_package/InterpolationCh/setup.py",
"chars": 530,
"preview": "#!/usr/bin/env python3\nimport os\nimport torch\n\nfrom setuptools import setup, find_packages\nfrom torch.utils.cpp_extensio"
},
{
"path": "my_package/MinDepthFlowProjection/__init__.py",
"chars": 45,
"preview": "from .minDepthFlowProjectionModule import *\n"
},
{
"path": "my_package/MinDepthFlowProjection/minDepthFlowProjectionLayer.py",
"chars": 4049,
"preview": "# this is for wrapping the customized layer\nimport torch\nfrom torch.autograd import Function\n#import _ext.my_lib as my_l"
},
{
"path": "my_package/MinDepthFlowProjection/minDepthFlowProjectionModule.py",
"chars": 919,
"preview": "# modules/FlowProjectionModule.py\nfrom torch.nn.modules.module import Module\nfrom .minDepthFlowProjectionLayer import mi"
},
{
"path": "my_package/MinDepthFlowProjection/mindepthflowprojection_cuda.cc",
"chars": 4673,
"preview": "#include <torch/extension.h>\r\n#include <ATen/ATen.h>\r\n#include <stdio.h>\r\n#include <iostream>\r\n#include <ATen/cuda/CUDAC"
},
{
"path": "my_package/MinDepthFlowProjection/mindepthflowprojection_cuda_kernel.cu",
"chars": 20513,
"preview": "#include <stdio.h>\r\n\r\n#include \"mindepthflowprojection_cuda_kernel.cuh\"\r\n\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/Nati"
},
{
"path": "my_package/MinDepthFlowProjection/mindepthflowprojection_cuda_kernel.cuh",
"chars": 1435,
"preview": "#pragma once\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/Context.h>\r\n#include <cuda_runtime.h>\r\n\r\nint minDepthFlowProjecti"
},
{
"path": "my_package/MinDepthFlowProjection/setup.py",
"chars": 558,
"preview": "#!/usr/bin/env python3\nimport os\nimport torch\n\nfrom setuptools import setup, find_packages\nfrom torch.utils.cpp_extensio"
},
{
"path": "my_package/SeparableConv/SeparableConvLayer.py",
"chars": 3742,
"preview": "# this is for wrapping the customized layer\r\nimport torch\r\nfrom torch.autograd import Function\r\nimport _ext.my_lib as my"
},
{
"path": "my_package/SeparableConv/SeparableConvModule.py",
"chars": 484,
"preview": "# modules/InterpolationLayer.py\r\nfrom torch.nn import Module\r\nfrom functions.SeparableConvLayer import SeparableConvLaye"
},
{
"path": "my_package/SeparableConv/__init__.py",
"chars": 36,
"preview": "from .SeparableConvModule import *\n"
},
{
"path": "my_package/SeparableConv/separableconv_cuda.cc",
"chars": 5810,
"preview": "#include <torch/torch.h>\r\n#include <ATen/ATen.h>\r\n#include <stdio.h>\r\n#include <iostream>\r\n#include <ATen/cuda/CUDAConte"
},
{
"path": "my_package/SeparableConv/separableconv_cuda_kernel.cu",
"chars": 10122,
"preview": "#include <stdio.h>\r\n\r\n#include \"separableconv_cuda_kernel.cuh\"\r\n\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/NativeFunctio"
},
{
"path": "my_package/SeparableConv/separableconv_cuda_kernel.cuh",
"chars": 1662,
"preview": "#pragma once\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/Context.h>\r\n#include <cuda_runtime.h>\r\n\r\nint SeparableConvLayer_g"
},
{
"path": "my_package/SeparableConv/setup.py",
"chars": 522,
"preview": "#!/usr/bin/env python3\nimport os\nimport torch\n\nfrom setuptools import setup, find_packages\nfrom torch.utils.cpp_extensio"
},
{
"path": "my_package/SeparableConvFlow/SeparableConvFlowLayer.py",
"chars": 4462,
"preview": "# this is for wrapping the customized layer\r\nimport torch\r\nfrom torch.autograd import Function\r\nimport separableconvflow"
},
{
"path": "my_package/SeparableConvFlow/SeparableConvFlowModule.py",
"chars": 583,
"preview": "# modules/InterpolationLayer.py\r\nfrom torch.nn import Module\r\nfrom .SeparableConvFlowLayer import SeparableConvFlowLayer"
},
{
"path": "my_package/SeparableConvFlow/__init__.py",
"chars": 40,
"preview": "from .SeparableConvFlowModule import *\n"
},
{
"path": "my_package/SeparableConvFlow/separableconvflow_cuda.cc",
"chars": 7011,
"preview": "#include <torch/torch.h>\r\n#include <ATen/ATen.h>\r\n#include <stdio.h>\r\n#include <iostream>\r\n#include <ATen/cuda/CUDAConte"
},
{
"path": "my_package/SeparableConvFlow/separableconvflow_cuda_kernel.cu",
"chars": 12559,
"preview": "#include <stdio.h>\r\n\r\n#include \"separableconvflow_cuda_kernel.cuh\"\r\n\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/NativeFun"
},
{
"path": "my_package/SeparableConvFlow/separableconvflow_cuda_kernel.cuh",
"chars": 1953,
"preview": "#pragma once\r\n\r\n#include <ATen/ATen.h>\r\n#include <ATen/Context.h>\r\n#include <cuda_runtime.h>\r\n\r\nint SeparableConvFlowLay"
},
{
"path": "my_package/SeparableConvFlow/setup.py",
"chars": 538,
"preview": "#!/usr/bin/env python3\nimport os\nimport torch\n\nfrom setuptools import setup, find_packages\nfrom torch.utils.cpp_extensio"
},
{
"path": "my_package/build.sh",
"chars": 760,
"preview": "#!/usr/bin/env bash\n\necho \"Need pytorch>=1.0.0\"\nsource activate pytorch1.0.0\n\nexport PYTHONPATH=$PYTHONPATH:$(pwd)\n\ncd M"
},
{
"path": "my_package/clean.sh",
"chars": 730,
"preview": "#!/usr/bin/env bash\n\necho \"Need pytorch>=1.0.0\"\nsource activate pytorch1.0.0\n\ncd MinDepthFlowProjection\nrm -rf build *.e"
},
{
"path": "my_package/compiler_args.py",
"chars": 1923,
"preview": "# References: https://developer.nvidia.com/cuda-gpus\r\nnvcc_args = [\r\n # Tesla: K80, K80\r\n # Quadro: (None)\r\n # "
},
{
"path": "my_package/test_module.py",
"chars": 38594,
"preview": "# main.py\r\nimport torch\r\nimport torch.nn as nn\r\nfrom torch.autograd import Variable\r\nfrom torch.autograd import gradchec"
},
{
"path": "networks/DAIN.py",
"chars": 14953,
"preview": "# -*- coding: utf-8 -*-\nimport torch\nimport torch.nn as nn\nfrom my_package.FilterInterpolation import FilterInterpolati"
},
{
"path": "networks/DAIN_slowmotion.py",
"chars": 15905,
"preview": "# -*- coding: utf-8 -*-\nimport torch\nimport torch.nn as nn\nfrom my_package.FilterInterpolation import FilterInterpolati"
},
{
"path": "networks/__init__.py",
"chars": 131,
"preview": "from .DAIN import DAIN\nfrom .DAIN_slowmotion import DAIN_slowmotion\n__all__ = (\n 'DAIN',\n 'DAIN_slow"
},
{
"path": "train.py",
"chars": 12288,
"preview": "import sys\nimport os\n\nimport threading\nimport torch\nfrom torch.autograd import Variable\nimport torch.utils.data\nfrom lr_"
}
]
About this extraction
This page contains the full source code of the baowenbo/DAIN GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 123 files (560.1 KB), approximately 164.3k tokens, and a symbol index with 362 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.