Full Code of tfzhou/MATNet for AI

master c8b95e527c48 cached

52 files

167.4 KB

48.2k tokens

191 symbols

1 requests

Download .txt

Repository: tfzhou/MATNet
Branch: master
Commit: c8b95e527c48
Files: 52
Total size: 167.4 KB

Directory structure:
gitextract_05390pb8/

├── .gitignore
├── .idea/
│   ├── aaai20-zero-ShotVOS-github.iml
│   ├── inspectionProfiles/
│   │   └── Project_Default.xml
│   ├── misc.xml
│   ├── modules.xml
│   ├── vcs.xml
│   └── workspace.xml
├── 3rdparty/
│   ├── run_davis.py
│   ├── run_davis2017.m
│   ├── run_davis_flow.py
│   ├── run_youtube.m
│   ├── run_youtube.py
│   ├── run_youtube_flow.py
│   └── seg2edge.m
├── README.md
├── apply_densecrf_davis.py
├── args.py
├── data/
│   ├── db_info.yaml
│   ├── palette.txt
│   ├── run_davis2017.m
│   ├── run_youtube.m
│   └── seg2edge.m
├── dataloader/
│   ├── base.py
│   ├── base_youtube.py
│   ├── custom_transforms.py
│   ├── dataset_utils.py
│   ├── davis2017.py
│   ├── davis2017_youtubevos_ehem.py
│   ├── hed_bsds.py
│   └── youtubeVOS.py
├── dataset_lmdb_generator.py
├── measures/
│   ├── __init__.py
│   ├── f_boundary.py
│   ├── jaccard.py
│   ├── statistics.py
│   └── t_stability.py
├── misc/
│   ├── __init__.py
│   ├── config.py
│   ├── config_youtubeVOS.py
│   ├── io_aux.py
│   ├── logger.py
│   ├── parallel.py
│   ├── timer.py
│   └── visual.py
├── modules/
│   ├── MATNet.py
│   └── cbam.py
├── requirements.txt
├── test_MATNet.py
├── train_MATNet.py
└── utils/
    ├── __init__.py
    ├── objectives.py
    └── utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
ckpt/

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/


================================================
FILE: .idea/aaai20-zero-ShotVOS-github.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
    <orderEntry type="jdk" jdkName="Python 3.6 (pytorch-1.0.0)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="TestRunnerService">
    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
  </component>
</module>

================================================
FILE: .idea/inspectionProfiles/Project_Default.xml
================================================
<component name="InspectionProjectProfileManager">
  <profile version="1.0">
    <option name="myName" value="Project Default" />
    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
      <option name="ignoredPackages">
        <value>
          <list size="3">
            <item index="0" class="java.lang.String" itemvalue="PyYAML" />
            <item index="1" class="java.lang.String" itemvalue="skimage" />
            <item index="2" class="java.lang.String" itemvalue="Pillow" />
          </list>
        </value>
      </option>
    </inspection_tool>
  </profile>
</component>

================================================
FILE: .idea/misc.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="JavaScriptSettings">
    <option name="languageLevel" value="ES6" />
  </component>
  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (pytorch-1.0.0)" project-jdk-type="Python SDK" />
</project>

================================================
FILE: .idea/modules.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/aaai20-zero-ShotVOS-github.iml" filepath="$PROJECT_DIR$/.idea/aaai20-zero-ShotVOS-github.iml" />
    </modules>
  </component>
</project>

================================================
FILE: .idea/vcs.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
    <mapping directory="$PROJECT_DIR$/3rdparty/pytorch-hed" vcs="Git" />
    <mapping directory="$PROJECT_DIR$/3rdparty/pytorch-pwc" vcs="Git" />
  </component>
</project>

================================================
FILE: .idea/workspace.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="ChangeListManager">
    <list default="true" id="9b6416b8-b073-4819-80b8-0da9065c5674" name="Default Changelist" comment="">
      <change afterPath="$PROJECT_DIR$/.idea/aaai20-zero-ShotVOS-github.iml" afterDir="false" />
      <change afterPath="$PROJECT_DIR$/.idea/inspectionProfiles/Project_Default.xml" afterDir="false" />
      <change afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
      <change afterPath="$PROJECT_DIR$/.idea/modules.xml" afterDir="false" />
      <change afterPath="$PROJECT_DIR$/.idea/vcs.xml" afterDir="false" />
      <change afterPath="$PROJECT_DIR$/dataloader/base.py" afterDir="false" />
      <change afterPath="$PROJECT_DIR$/dataloader/base_youtube.py" afterDir="false" />
      <change afterPath="$PROJECT_DIR$/dataloader/custom_transforms.py" afterDir="false" />
      <change afterPath="$PROJECT_DIR$/dataloader/dataset_utils.py" afterDir="false" />
      <change afterPath="$PROJECT_DIR$/dataloader/davis2017.py" afterDir="false" />
      <change afterPath="$PROJECT_DIR$/dataloader/davis2017_youtubevos_ehem.py" afterDir="false" />
      <change afterPath="$PROJECT_DIR$/dataloader/hed_bsds.py" afterDir="false" />
      <change afterPath="$PROJECT_DIR$/dataloader/youtubeVOS.py" afterDir="false" />
      <change beforePath="$PROJECT_DIR$/3rdparty/pytorch-hed/run.py" beforeDir="false" afterPath="$PROJECT_DIR$/3rdparty/pytorch-hed/run.py" afterDir="false" />
      <change beforePath="$PROJECT_DIR$/3rdparty/pytorch-pwc/run.py" beforeDir="false" afterPath="$PROJECT_DIR$/3rdparty/pytorch-pwc/run.py" afterDir="false" />
      <change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
    </list>
    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
    <option name="SHOW_DIALOG" value="false" />
    <option name="HIGHLIGHT_CONFLICTS" value="true" />
    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
    <option name="LAST_RESOLUTION" value="IGNORE" />
  </component>
  <component name="FileEditorManager">
    <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
      <file pinned="false" current-in-tab="true">
        <entry file="file://$PROJECT_DIR$/README.md">
          <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
            <state split_layout="SPLIT">
              <first_editor relative-caret-position="126">
                <caret line="7" column="44" selection-start-line="7" selection-start-column="44" selection-end-line="7" selection-end-column="44" />
              </first_editor>
              <second_editor />
            </state>
          </provider>
        </entry>
      </file>
      <file pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/test_MATNet.py">
          <provider selected="true" editor-type-id="text-editor">
            <state relative-caret-position="486">
              <caret line="99" column="32" selection-start-line="99" selection-start-column="32" selection-end-line="99" selection-end-column="32" />
              <folding>
                <element signature="e#0#12#0" expanded="true" />
              </folding>
            </state>
          </provider>
        </entry>
      </file>
      <file pinned="false" current-in-tab="false">
        <entry file="file://$PROJECT_DIR$/dataset_lmdb_generator.py">
          <provider selected="true" editor-type-id="text-editor" />
        </entry>
      </file>
    </leaf>
  </component>
  <component name="Git.Settings">
    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
  </component>
  <component name="IdeDocumentHistory">
    <option name="CHANGED_PATHS">
      <list>
        <option value="$PROJECT_DIR$/train_MANet_Residual_Cascade_SA_Interleave.py" />
        <option value="$PROJECT_DIR$/apply_densecrf_davis.py" />
        <option value="$PROJECT_DIR$/utils/objectives.py" />
        <option value="$PROJECT_DIR$/utils/utils.py" />
        <option value="$PROJECT_DIR$/misc/visual.py" />
        <option value="$PROJECT_DIR$/dataloader/davis2017_youtubevos_ehem.py" />
        <option value="$PROJECT_DIR$/dataloader/dataset_utils.py" />
        <option value="$PROJECT_DIR$/args.py" />
        <option value="$PROJECT_DIR$/dataloader/base.py" />
        <option value="$PROJECT_DIR$/misc/config_youtubeVOS.py" />
        <option value="$PROJECT_DIR$/misc/config.py" />
        <option value="$PROJECT_DIR$/modules/MATNet.py" />
        <option value="$PROJECT_DIR$/measures/jaccard.py" />
        <option value="$PROJECT_DIR$/train_MATNet.py" />
        <option value="$PROJECT_DIR$/test_MATNet.py" />
        <option value="$PROJECT_DIR$/README.md" />
      </list>
    </option>
  </component>
  <component name="ProjectFrameBounds" extendedState="6">
    <option name="x" value="65" />
    <option name="y" value="325" />
    <option name="width" value="1853" />
    <option name="height" value="1055" />
  </component>
  <component name="ProjectLevelVcsManager" settingsEditedManually="true" />
  <component name="ProjectView">
    <navigator proportions="" version="1">
      <foldersAlwaysOnTop value="true" />
    </navigator>
    <panes>
      <pane id="ProjectPane">
        <subPane>
          <expand>
            <path>
              <item name="aaai20-zero-ShotVOS-github" type="b2602c69:ProjectViewProjectNode" />
              <item name="aaai20-zero-ShotVOS-github" type="462c0819:PsiDirectoryNode" />
            </path>
            <path>
              <item name="aaai20-zero-ShotVOS-github" type="b2602c69:ProjectViewProjectNode" />
              <item name="aaai20-zero-ShotVOS-github" type="462c0819:PsiDirectoryNode" />
              <item name="utils" type="462c0819:PsiDirectoryNode" />
            </path>
          </expand>
          <select />
        </subPane>
      </pane>
      <pane id="Scope" />
    </panes>
  </component>
  <component name="PropertiesComponent">
    <property name="SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
    <property name="WebServerToolWindowFactoryState" value="true" />
    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
    <property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
    <property name="nodejs_npm_path_reset_for_default_project" value="true" />
    <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
  </component>
  <component name="RecentsManager">
    <key name="CopyFile.RECENT_KEYS">
      <recent name="$PROJECT_DIR$" />
    </key>
  </component>
  <component name="RunDashboard">
    <option name="ruleStates">
      <list>
        <RuleState>
          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
        </RuleState>
        <RuleState>
          <option name="name" value="StatusDashboardGroupingRule" />
        </RuleState>
      </list>
    </option>
  </component>
  <component name="RunManager" selected="Python.apply_densecrf_davis">
    <configuration name="apply_densecrf_davis" type="PythonConfigurationType" factoryName="Python" temporary="true">
      <module name="aaai20-zero-ShotVOS-github" />
      <option name="INTERPRETER_OPTIONS" value="" />
      <option name="PARENT_ENVS" value="true" />
      <envs>
        <env name="PYTHONUNBUFFERED" value="1" />
      </envs>
      <option name="SDK_HOME" value="" />
      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
      <option name="IS_MODULE_SDK" value="true" />
      <option name="ADD_CONTENT_ROOTS" value="true" />
      <option name="ADD_SOURCE_ROOTS" value="true" />
      <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/apply_densecrf_davis.py" />
      <option name="PARAMETERS" value="" />
      <option name="SHOW_COMMAND_LINE" value="false" />
      <option name="EMULATE_TERMINAL" value="false" />
      <option name="MODULE_MODE" value="false" />
      <option name="REDIRECT_INPUT" value="false" />
      <option name="INPUT_FILE" value="" />
      <method v="2" />
    </configuration>
    <configuration name="test_MATNet" type="PythonConfigurationType" factoryName="Python" temporary="true">
      <module name="aaai20-zero-ShotVOS-github" />
      <option name="INTERPRETER_OPTIONS" value="" />
      <option name="PARENT_ENVS" value="true" />
      <envs>
        <env name="PYTHONUNBUFFERED" value="1" />
      </envs>
      <option name="SDK_HOME" value="" />
      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
      <option name="IS_MODULE_SDK" value="true" />
      <option name="ADD_CONTENT_ROOTS" value="true" />
      <option name="ADD_SOURCE_ROOTS" value="true" />
      <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/test_MATNet.py" />
      <option name="PARAMETERS" value="" />
      <option name="SHOW_COMMAND_LINE" value="false" />
      <option name="EMULATE_TERMINAL" value="false" />
      <option name="MODULE_MODE" value="false" />
      <option name="REDIRECT_INPUT" value="false" />
      <option name="INPUT_FILE" value="" />
      <method v="2" />
    </configuration>
    <recent_temporary>
      <list>
        <item itemvalue="Python.apply_densecrf_davis" />
        <item itemvalue="Python.test_MATNet" />
      </list>
    </recent_temporary>
  </component>
  <component name="SvnConfiguration">
    <configuration />
  </component>
  <component name="TaskManager">
    <task active="true" id="Default" summary="Default task">
      <changelist id="9b6416b8-b073-4819-80b8-0da9065c5674" name="Default Changelist" comment="" />
      <created>1574574216202</created>
      <option name="number" value="Default" />
      <option name="presentableId" value="Default" />
      <updated>1574574216202</updated>
      <workItem from="1574574222318" duration="8114000" />
      <workItem from="1574586098553" duration="647000" />
      <workItem from="1574600105252" duration="1530000" />
      <workItem from="1574603699909" duration="23031000" />
    </task>
    <servers />
  </component>
  <component name="TimeTrackingManager">
    <option name="totallyTimeSpent" value="33322000" />
  </component>
  <component name="ToolWindowManager">
    <frame x="65" y="-4" width="1855" height="1084" extended-state="6" />
    <editor active="true" />
    <layout>
      <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.18815717" />
      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
      <window_info id="Favorites" order="2" side_tool="true" />
      <window_info anchor="bottom" id="Message" order="0" />
      <window_info anchor="bottom" id="Find" order="1" />
      <window_info anchor="bottom" id="Run" order="2" weight="0.32959184" />
      <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
      <window_info anchor="bottom" id="TODO" order="6" />
      <window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
      <window_info anchor="bottom" id="Version Control" order="8" />
      <window_info anchor="bottom" id="Database Changes" order="9" />
      <window_info anchor="bottom" id="Event Log" order="10" side_tool="true" visible="true" weight="0.22040816" />
      <window_info anchor="bottom" id="Terminal" order="11" />
      <window_info anchor="bottom" id="Python Console" order="12" />
      <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
      <window_info anchor="right" id="Remote Host" order="3" />
      <window_info anchor="right" id="SciView" order="4" />
      <window_info anchor="right" id="Database" order="5" />
    </layout>
  </component>
  <component name="TypeScriptGeneratedFilesManager">
    <option name="version" value="1" />
  </component>
  <component name="com.intellij.coverage.CoverageDataManagerImpl">
    <SUITE FILE_PATH="coverage/aaai20_zero_ShotVOS_github$test_MATNet.coverage" NAME="test_MATNet Coverage Results" MODIFIED="1574582203491" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
    <SUITE FILE_PATH="coverage/aaai20_zero_ShotVOS_github$apply_densecrf_davis.coverage" NAME="apply_densecrf_davis Coverage Results" MODIFIED="1574582766918" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
  </component>
  <component name="editorHistoryManager">
    <entry file="file://$PROJECT_DIR$/modules/MANet_Residual_Cascade_SA.py" />
    <entry file="file://$PROJECT_DIR$/modules/MANet.py" />
    <entry file="file://$PROJECT_DIR$/modules/MANet_Residual_Cascade_SA_Interleave_1layers.py" />
    <entry file="file://$PROJECT_DIR$/modules/seed.py" />
    <entry file="file://$PROJECT_DIR$/utils/hungarian.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="288">
          <caret line="16" column="23" lean-forward="true" selection-start-line="16" selection-start-column="23" selection-end-line="16" selection-end-column="23" />
          <folding>
            <element signature="e#0#12#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/misc/parallel.py">
      <provider selected="true" editor-type-id="text-editor" />
    </entry>
    <entry file="file://$PROJECT_DIR$/measures/statistics.py">
      <provider selected="true" editor-type-id="text-editor">
        <state>
          <folding>
            <element signature="e#0#18#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/dataloader/davis2017_ehem.py">
      <provider selected="true" editor-type-id="text-editor">
        <state>
          <folding>
            <element signature="e#0#31#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/dataloader/dataset_utils.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="162">
          <caret line="9" column="4" selection-start-line="9" selection-start-column="4" selection-end-line="9" selection-end-column="4" />
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/dataloader/davis2017.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="-342">
          <caret line="20" column="21" selection-start-line="20" selection-start-column="21" selection-end-line="20" selection-end-column="21" />
          <folding>
            <element signature="e#0#31#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/dataloader/run_davis2017.m">
      <provider selected="true" editor-type-id="text-editor" />
    </entry>
    <entry file="file://$PROJECT_DIR$/dataloader/run_youtube.m">
      <provider selected="true" editor-type-id="text-editor" />
    </entry>
    <entry file="file://$PROJECT_DIR$/dataloader/hed_bsds.py">
      <provider selected="true" editor-type-id="text-editor">
        <state>
          <folding>
            <element signature="e#0#9#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/dataloader/youtubeVOS.py">
      <provider selected="true" editor-type-id="text-editor" />
    </entry>
    <entry file="file://$PROJECT_DIR$/dataloader/base_youtube.py">
      <provider selected="true" editor-type-id="text-editor">
        <state>
          <folding>
            <element signature="e#0#16#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/dataloader/base.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="414">
          <caret line="23" column="30" selection-start-line="23" selection-start-column="30" selection-end-line="23" selection-end-column="30" />
          <folding>
            <element signature="e#451#467#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/misc/io_aux.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="144">
          <caret line="11" column="41" selection-start-line="11" selection-start-column="41" selection-end-line="11" selection-end-column="41" />
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/misc/config.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="280">
          <caret line="53" column="30" selection-start-line="53" selection-start-column="30" selection-end-line="53" selection-end-column="30" />
          <folding>
            <element signature="e#22#33#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/misc/config_youtubeVOS.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="820">
          <caret line="65" column="60" selection-start-line="65" selection-start-column="60" selection-end-line="65" selection-end-column="60" />
          <folding>
            <element signature="e#22#33#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/misc/logger.py">
      <provider selected="true" editor-type-id="text-editor" />
    </entry>
    <entry file="file://$PROJECT_DIR$/modules/MATNet.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="756">
          <caret line="261" column="47" lean-forward="true" selection-start-line="261" selection-start-column="47" selection-end-line="261" selection-end-column="47" />
          <folding>
            <element signature="e#0#12#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/utils/__init__.py">
      <provider selected="true" editor-type-id="text-editor" />
    </entry>
    <entry file="file://$PROJECT_DIR$/modules/cbam.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="198">
          <caret line="11" column="21" selection-start-line="11" selection-start-column="21" selection-end-line="11" selection-end-column="21" />
          <folding>
            <element signature="e#0#12#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/utils/objectives.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="162">
          <caret line="9" column="25" selection-start-line="9" selection-start-column="25" selection-end-line="9" selection-end-column="25" />
          <folding>
            <element signature="e#0#12#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/misc/visual.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="198">
          <caret line="12" column="41" selection-start-line="12" selection-start-column="41" selection-end-line="12" selection-end-column="41" />
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/misc/timer.py">
      <provider selected="true" editor-type-id="text-editor" />
    </entry>
    <entry file="file://$PROJECT_DIR$/measures/t_stability.py">
      <provider selected="true" editor-type-id="text-editor" />
    </entry>
    <entry file="file://$PROJECT_DIR$/measures/f_boundary.py">
      <provider selected="true" editor-type-id="text-editor" />
    </entry>
    <entry file="file://$PROJECT_DIR$/utils/utils.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="72">
          <caret line="4" lean-forward="true" selection-start-line="4" selection-end-line="4" />
          <folding>
            <element signature="e#0#12#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$USER_HOME$/anaconda2/envs/pytorch-1.0.0/lib/python3.6/site-packages/torch/utils/data/dataloader.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="2044">
          <caret line="767" column="8" selection-start-line="767" selection-start-column="8" selection-end-line="767" selection-end-column="8" />
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/train_MATNet.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="591">
          <caret line="145" column="20" selection-start-line="145" selection-start-column="20" selection-end-line="145" selection-end-column="20" />
          <folding>
            <element signature="e#0#12#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/dataloader/davis2017_youtubevos_ehem.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="-116">
          <caret line="193" column="56" selection-start-line="193" selection-start-column="56" selection-end-line="193" selection-end-column="56" />
          <folding>
            <element signature="e#0#31#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/measures/jaccard.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="564">
          <caret line="42" selection-start-line="42" selection-end-line="42" />
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/apply_densecrf_davis.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="144">
          <caret line="8" column="29" lean-forward="true" selection-start-line="8" selection-start-column="29" selection-end-line="8" selection-end-column="29" />
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/args.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="144">
          <caret line="8" column="54" selection-start-line="8" selection-start-column="54" selection-end-line="8" selection-end-column="54" />
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/test_MATNet.py">
      <provider selected="true" editor-type-id="text-editor">
        <state relative-caret-position="486">
          <caret line="99" column="32" selection-start-line="99" selection-start-column="32" selection-end-line="99" selection-end-column="32" />
          <folding>
            <element signature="e#0#12#0" expanded="true" />
          </folding>
        </state>
      </provider>
    </entry>
    <entry file="file://$PROJECT_DIR$/dataset_lmdb_generator.py">
      <provider selected="true" editor-type-id="text-editor" />
    </entry>
    <entry file="file://$PROJECT_DIR$/README.md">
      <provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
        <state split_layout="SPLIT">
          <first_editor relative-caret-position="126">
            <caret line="7" column="44" selection-start-line="7" selection-start-column="44" selection-end-line="7" selection-end-column="44" />
          </first_editor>
          <second_editor />
        </state>
      </provider>
    </entry>
  </component>
</project>

================================================
FILE: 3rdparty/run_davis.py
================================================
import os
import glob
import torch
import numpy
import PIL
from run import estimate

def main():
    davis_folder = '/media/iiai/data/VOS/DAVIS2017/JPEGImages/480p'
    save_dir = '/media/iiai/data/VOS/DAVIS2017/davis2017-hed'

    videos = os.listdir(davis_folder)
    print(videos)

    for idx, video in enumerate(videos):
        print('process {}[{}/{}]'.format(video, idx, len(videos)))
        save_dir_video = os.path.join(save_dir, video)
        if not os.path.exists(save_dir_video):
            os.makedirs(save_dir_video)

        imagefiles = sorted(glob.glob(os.path.join(davis_folder, video, '*.jpg')))

        for imagefile in imagefiles:
            tensorInput = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))

            tensorOutput = estimate(tensorInput)

            save_name = os.path.basename(imagefile)
            save_file = os.path.join(save_dir_video, save_name)
            PIL.Image.fromarray(
                (tensorOutput.clamp(0.0, 1.0).numpy().transpose(1, 2, 0)[:, :, 0] * 255.0).astype(numpy.uint8)).save(
                save_file)

main()


================================================
FILE: 3rdparty/run_davis2017.m
================================================
clc

save_dir = '/media/iiai/data/VOS/DAVIS2017/Annotations_edge/480p';


src_dir = '/media/iiai/data/VOS/DAVIS2017/Annotations/480p';

seqs = dir(src_dir);
seqs = {seqs.name};
seqs = seqs(3:end);

for i = 1 : length(seqs)
    seq_path = [src_dir, '/', seqs{i}];
    images = dir([seq_path, '/*.png']);
    images = {images.name};
    
    for j = 1 : length(images)
        imagefile = fullfile(seq_path, images{j});
        im = imread(imagefile);
        
        res = seg2edge(im, 2, [], 'regular');
        
        save_path = fullfile(save_dir, seqs{i});
        if ~exist(save_path, 'dir')
            mkdir(save_path)
        end
        imwrite(res, fullfile(save_path, images{j}))
    end
end

================================================
FILE: 3rdparty/run_davis_flow.py
================================================
import torch
import glob
import getopt
import math
import numpy
import os
import PIL
import PIL.Image
import sys
from run import estimate
import flow_vis, cv2 

def main():
    davis_folder = '/home/tianfei/dataset/DAVIS2017/JPEGImages/480p'
    save_dir = './davis2017'

    videos = os.listdir(davis_folder)

    for idx, video in enumerate(videos):
        print('process {}[{}/{}]'.format(video, idx, len(videos)))
        save_dir_video = os.path.join(save_dir, video)
        if not os.path.exists(save_dir_video):
            os.makedirs(save_dir_video)

        imagefiles = sorted(glob.glob(os.path.join(davis_folder, video, '*.jpg')))

        for i in range(len(imagefiles)-1):
            f1 = imagefiles[i]
            f2 = imagefiles[i+1]

            save_name = os.path.basename(f1)[:-4] + '_' + os.path.basename(f2)[:-4] + '.png'
            save_file = os.path.join(save_dir_video, save_name)
            run(f1, f2, save_file)


def run(imagefile1, imagefile2, save_file):
	tensorFirst = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile1))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))
	tensorSecond = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile2))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))

	tensorOutput = estimate(tensorFirst, tensorSecond)

	flow_color = flow_vis.flow_to_color(tensorOutput.numpy().transpose(1,2,0), convert_to_bgr=True)
	cv2.imwrite(save_file, flow_color)

	#objectOutput = open(save_file, 'wb')

	#numpy.array([ 80, 73, 69, 72 ], numpy.uint8).tofile(objectOutput)
	#numpy.array([ tensorOutput.size(2), tensorOutput.size(1) ], numpy.int32).tofile(objectOutput)
	#numpy.array(tensorOutput.numpy().transpose(1, 2, 0), numpy.float32).tofile(objectOutput)

	#objectOutput.close()


if __name__ == '__main__':
    main()


================================================
FILE: 3rdparty/run_youtube.m
================================================
clc

save_dir = '/media/tfzhou/Yi Passport/YouTubeVOS_2018/train/Annotations_edge';


src_dir = '/media/tfzhou/Yi Passport/YouTubeVOS_2018/train/Annotations';

seqs = dir(src_dir);
seqs = {seqs.name};
seqs = seqs(3:end);

for i = 1 : length(seqs)
    disp(seqs{i})
    seq_path = [src_dir, '/', seqs{i}];
    images = dir([seq_path, '/*.png']);
    images = {images.name};
    
    for j = 1 : length(images)
        imagefile = fullfile(seq_path, images{j});
        im = imread(imagefile);
        
        res = seg2edge(im, 2, [], 'regular');
        
        save_path = fullfile(save_dir, seqs{i});
        if ~exist(save_path, 'dir')
            mkdir(save_path)
        end
        imwrite(res, fullfile(save_path, images{j}))
    end
end

================================================
FILE: 3rdparty/run_youtube.py
================================================
import os
import glob
import torch
import numpy
import PIL
from run import estimate

def main():
    davis_folder = '/media/tfzhou/Yi Passport/YouTubeVOS_2018/train/JPEGImages'
    save_dir = '/media/tfzhou/Yi Passport/YouTubeVOS_2018-hed'

    videos = os.listdir(davis_folder)
    print(videos)

    for idx, video in enumerate(videos):
        print('process {}[{}/{}]'.format(video, idx, len(videos)))
        save_dir_video = os.path.join(save_dir, video)
        if not os.path.exists(save_dir_video):
            os.makedirs(save_dir_video)

        imagefiles = sorted(glob.glob(os.path.join(davis_folder, video, '*.jpg')))

        for imagefile in imagefiles:
            tensorInput = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))

            tensorOutput = estimate(tensorInput)

            save_name = os.path.basename(imagefile)
            save_file = os.path.join(save_dir_video, save_name)
            PIL.Image.fromarray(
                (tensorOutput.clamp(0.0, 1.0).numpy().transpose(1, 2, 0)[:, :, 0] * 255.0).astype(numpy.uint8)).save(
                save_file)

main()


================================================
FILE: 3rdparty/run_youtube_flow.py
================================================
import torch
import glob
import getopt
import math
import numpy
import os
import PIL
import PIL.Image
import sys
import flow_vis
from run import estimate
import cv2

def main():
    davis_folder = '/home/tianfei/dataset/YouTubeVOS/train/JPEGImages/'
    save_dir = './youtubevos'

    videos = os.listdir(davis_folder)

    for idx, video in enumerate(videos):
        print('process {}[{}/{}]'.format(video, idx, len(videos)))
        save_dir_video = os.path.join(save_dir, video)
        if not os.path.exists(save_dir_video):
            os.makedirs(save_dir_video)

        imagefiles = sorted(glob.glob(os.path.join(davis_folder, video, '*.jpg')))

        for i in range(len(imagefiles)-1):
            f1 = imagefiles[i]
            f2 = imagefiles[i+1]

            save_name = os.path.basename(f1)[:-4] + '_' + os.path.basename(f2)[:-4] + '.png'
            save_file = os.path.join(save_dir_video, save_name)
            run(f1, f2, save_file)


def run(imagefile1, imagefile2, save_file):
	tensorFirst = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile1))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))
	tensorSecond = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile2))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))

	tensorOutput = estimate(tensorFirst, tensorSecond)

	#objectOutput = open(save_file, 'wb')

	flow_color = flow_vis.flow_to_color(tensorOutput.numpy().transpose(1,2,0), convert_to_bgr=True)
	cv2.imwrite(save_file, flow_color)

	#numpy.array([ 80, 73, 69, 72 ], numpy.uint8).tofile(objectOutput)
	#numpy.array([ tensorOutput.size(2), tensorOutput.size(1) ], numpy.int32).tofile(objectOutput)
	#numpy.array(tensorOutput.numpy().transpose(1, 2, 0), numpy.float32).tofile(objectOutput)

	#objectOutput.close()


if __name__ == '__main__':
    main()


================================================
FILE: 3rdparty/seg2edge.m
================================================
% This function takes an input segment and produces binary boundaries.
% Multi-channel input segments are supported by the function.
function [idxEdge] = seg2edge(seg, radius, labelIgnore, edge_type)
% Get dimensions
[height, width, chn] = size(seg);
if(~isempty(labelIgnore))
    if(chn~=size(labelIgnore, 2))
        error('Channel dimension not matching ignored label dimension!')
    end
end

% Set the considered neighborhood.
radius_search = max(ceil(radius), 1);
[X, Y] = meshgrid(1:width, 1:height);
[x, y] = meshgrid(-radius_search:radius_search, -radius_search:radius_search);

% Columnize everything
X = X(:); Y = Y(:);
x = x(:); y = y(:);
if(chn == 1)
    seg = seg(:);
else
    seg = reshape(seg, [height*width chn]);
end

% Build circular neighborhood
idxNeigh = sqrt(x.^2 + y.^2) <= radius;
x = x(idxNeigh); y = y(idxNeigh);
numPxlImg = length(X);
numPxlNeigh = length(x);

% Compute Gaussian weight
idxEdge = false(numPxlImg, 1);
for i = 1:numPxlNeigh
    XNeigh = X+x(i);
    YNeigh = Y+y(i);
    idxValid = find( XNeigh >= 1 & XNeigh <= width & YNeigh >=1 & YNeigh <= height );
    
    XCenter = X(idxValid);
    YCenter = Y(idxValid);
    XNeigh = XNeigh(idxValid);
    YNeigh = YNeigh(idxValid);
    LCenter = seg(sub2ind([height width], YCenter, XCenter), :);
    LNeigh = seg(sub2ind([height width], YNeigh, XNeigh), :);
    
    if(strcmp(edge_type, 'regular'))
        idxDiff = find(any(LCenter~=LNeigh, 2));
    elseif(strcmp(edge_type, 'inner'))
        idxDiff = find(any(LCenter~=LNeigh, 2) & any(LCenter~=0, 2) & all(LNeigh==0, 2) );
    elseif(strcmp(edge_type, 'outer'))
        idxDiff = find(any(LCenter~=LNeigh, 2) & all(LCenter==0, 2) & any(LNeigh~=0, 2) );
    else
        error('Wrong edge type input!');
    end    
    
    LCenterEdge = LCenter(idxDiff, :);
    LNeighEdge = LNeigh(idxDiff, :);
    idxIgnore2 = false(length(idxDiff), 1);
    for j = 1:size(labelIgnore, 1)
        idxIgnore2 = idxIgnore2 | ( all(bsxfun(@eq, LCenterEdge, labelIgnore(j, :)), 2) | all(bsxfun(@eq, LNeighEdge, labelIgnore(j, :)), 2) );
    end
    
    idxDiffGT = idxDiff(~idxIgnore2);
    idxEdge(idxValid(idxDiffGT)) = true;
end
idxEdge = reshape(idxEdge, [height, width]);


================================================
FILE: README.md
================================================
## Motion-Attentive Transition for Zero-Shot Video Object Segmentation

[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/motion-attentive-transition-for-zero-shot/unsupervised-video-object-segmentation-on)](https://paperswithcode.com/sota/unsupervised-video-object-segmentation-on?p=motion-attentive-transition-for-zero-shot)

[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/matnet-motion-attentive-transition-network/unsupervised-video-object-segmentation-on-4)](https://paperswithcode.com/sota/unsupervised-video-object-segmentation-on-4?p=matnet-motion-attentive-transition-network)

> UPDATES:<br>
> - [2021/04/17] Our MATNet achieves state-of-the-art results (__64.2__ in terms of _Mean J_) on the [MoCA](https://drive.google.com/drive/folders/1x-owzr9Voz65NQghrN_H1LEYDaaQP5n1?usp=sharing) dataset in "Self-supervised Video Object Segmentation by Motion Grouping" by Charig Yang, Hala Lamdouar, Erika Lu, Andrew Zisserman, Weidi Xie. Thanks [Charig Yang](https://charigyang.github.io/) for providing the segmentation results [Google Drive](https://drive.google.com/drive/folders/1x-owzr9Voz65NQghrN_H1LEYDaaQP5n1?usp=sharing).
> - [2020/06/15] Update results for DAVIS-17 test-dev set!
> - [2020/03/04] Update results for DAVIS-17 validation set!
> - [2019/11/17] Codes released!

This is a PyTorch implementation of our MATNet for unsupervised video object segmentation.

**Motion-Attentive Transition for Zero-Shot Video Object Segmentation.** [[Arxiv](https://arxiv.org/abs/2003.04253)] [[TIP](https://ieeexplore.ieee.org/document/9165947)]

## Prerequisites

The training and testing experiments are conducted using PyTorch 1.0.1 with a single GeForce RTX 2080Ti GPU with 11GB Memory.
- [PyTorch 1.0.1](https://github.com/pytorch/pytorch)
                   
Other minor Python modules can be installed by running

```bash
pip install -r requirements.txt
```

## Train

### Clone
```git clone --recursive https://github.com/tfzhou/MATNet.git```

### Download Datasets
In the paper, we use the following two public available dataset for training. Here are some steps to prepare the data:
- [DAVIS-17](https://davischallenge.org/davis2017/code.html): we use all the data in the train subset of DAVIS-16. 
    However, please download DAVIS-17 to fit the code. It will automatically choose the subset of DAVIS-16 for training. 
- [YoutubeVOS-2018](https://youtube-vos.org/dataset/): we sample the training data every 10 frames in YoutubeVOS-2018. We use the dataset version with 6fps rather than 30fps.
- Create soft links:

    ```cd data; ln -s your/davis17/path DAVIS2017; ln -s your/youtubevos/path YouTubeVOS_2018;```
    
### Prepare Edge Annotations
I have provided some matlab scripts to generate edge annotations from mask. Please run ```data/run_davis2017.m``` 
and ```data/run_youtube.m```.

### Prepare HED Results
I have provided the pytorch codes to generate HED results for the two datasets (see ```3rdparty/pytorch-hed```).
Please run ```run_davis.py``` and ```run_youtube.py```. 

The codes are borrowed from https://github.com/sniklaus/pytorch-hed. 

### Prepare Optical Flow
I have provided the pytorch codes to generate optical flow results for the two datasets (see ```3rdparty/pytorch-pwc```).
Please run ```run_davis_flow.py``` and ```run_youtubevos_flow.py```. 

The codes are borrowed from https://github.com/sniklaus/pytorch-pwc. 
Please follow the [setup](https://github.com/sniklaus/pytorch-pwc#setup) section to install ```cupy```. 

`warning: Total size of optical flow results of Youtube-VOS is more than 30GB.`

### Train
Once all data is prepared, please run ```python train_MATNet.py``` for training.

## Test
1. Run ```python test_MATNet.py``` to obtain the saliency results on DAVIS-16 val set.
2. Run ```python apply_densecrf_davis.py``` for binary segmentation results.


## Segmentation Results

1. The segmentation results on DAVIS-16 and Youtube-objects can be downloaded from [Google Drive](https://drive.google.com/file/d/1d23TGBtrr11g8KFAStwewTyxLq2nX4PT/view?usp=sharing).
2. The segmentation results on DAVIS-17 __val__ can be downloaded from [Google Drive](https://drive.google.com/open?id=1GTqjWc7tktw92tBNKln2eFmb9WzdcVrz). We achieved __58.6__ in terms of _Mean J&F_.
3. The segmentation results on DAVIS-17 __test-dev__ can be downloaded from [Google Drive](https://drive.google.com/file/d/1Ood-rr0d4YRFSrGGh6yVpYvOvE_h0tVK/view?usp=sharing). We achieved __59.8__ in terms of _Mean J&F_. The method also achieved the second place in DAVIS-20 unsupervised object segmentation challenge. Please refer to [paper](https://davischallenge.org/challenge2020/papers/DAVIS-Unsupervised-Challenge-2nd-Team.pdf) for more details of our challenge solution.

## Pretrained Models

The pre-trained model can be downloaded from [Google Drive](https://drive.google.com/file/d/1XlenYXgQjoThgRUbffCUEADS6kE4lvV_/view?usp=sharing).

## Citation
If you find MATNet useful for your research, please consider citing the following papers:
```
@inproceedings{zhou2020motion,
  title={Motion-Attentive Transition for Zero-Shot Video Object Segmentation},
  author={Zhou, Tianfei and Wang, Shunzhou and Zhou, Yi and Yao, Yazhou and Li, Jianwu and Shao, Ling},
  booktitle={Proceedings of the 34th AAAI Conference on Artificial Intelligence (AAAI)},
  year={2020},
  pages={13066--13073}
}

@article{zhou2020matnet,
  title={MATNet: Motion-Attentive Transition Network for Zero-Shot Video Object Segmentation},
  author={Zhou, Tianfei and Li, Jianwu and Wang, Shunzhou and Tao, Ran and Shen, Jianbing},
  journal={IEEE Transactions on Image Processing},
  volume={29},
  pages={8326-8338},
  year={2020}
}

@inproceedings{zhou2021unsupervised,
  author = {Zhou, Tianfei and Li, Jianwu and Li, Xueyi and Shao, Ling},
  title = {Target-Aware Object Discovery and Association for Unsupervised Video Multi-Object Segmentation},
  booktitle = {CVPR},
  year = {2021}
}
```




================================================
FILE: apply_densecrf_davis.py
================================================
import pydensecrf.densecrf as dcrf
import numpy as np
import sys
import time

import os
from tqdm import tqdm
from skimage.io import imread, imsave
from pydensecrf.utils import unary_from_labels, create_pairwise_bilateral,\
    create_pairwise_gaussian, unary_from_softmax

from os import listdir, makedirs
from os.path import isfile, join

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

image_dir = 'data/DAVIS2017/JPEGImages/480p'
davis_result_dir = 'output/davis16'
model_name = 'MATNet_epoch0' # specify the folder name of saliency results
mask_dir = os.path.join(davis_result_dir, model_name)
save_dir = join(davis_result_dir, model_name + '_crf')

for seq in tqdm(listdir(mask_dir)):
    seq_dir = join(image_dir, seq)
    seq_mask_dir = join(mask_dir, seq)
    res_dir = join(save_dir, seq)
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)

    for f in listdir(seq_mask_dir):

        frameName = f[:-4]

        image = imread(join(seq_dir, f[:-4] + '.jpg'))
        mask = imread(join(seq_mask_dir, f))

        H, W = mask.shape

        min_val = np.min(mask.ravel())
        max_val = np.max(mask.ravel())
        out = (mask.astype('float') - min_val) / (max_val - min_val)
        labels = np.zeros((2, image.shape[0], image.shape[1]))
        labels[1, :, :] = out
        labels[0, :, :] = 1 - out

        tau = 1.05
        EPSILON = 1e-8
        anno_norm = mask / 255
        n_energy = -np.log((1.0 - anno_norm + EPSILON)) / (tau * sigmoid(1 - anno_norm))
        p_energy = -np.log(anno_norm + EPSILON) / (tau * sigmoid(anno_norm))
        labels[1, :, :] = n_energy
        labels[0, :, :] = p_energy

        colors = [0, 255]
        colorize = np.empty((len(colors), 1), np.uint8)
        colorize[:, 0] = colors

        n_labels = 2

        crf = dcrf.DenseCRF(image.shape[1] * image.shape[0], n_labels)

        U = unary_from_softmax(labels)
        crf.setUnaryEnergy(U)

        feats = create_pairwise_gaussian(sdims=(3, 3), shape=image.shape[:2])
        crf.addPairwiseEnergy(feats, compat=3,
                              kernel=dcrf.DIAG_KERNEL,
                              normalization=dcrf.NORMALIZE_SYMMETRIC)

        feats = create_pairwise_bilateral(sdims=(30, 30), schan=(5, 5, 5),
                                          img=image, chdim=2)
        crf.addPairwiseEnergy(feats, compat=5,
                              kernel=dcrf.DIAG_KERNEL,
                              normalization=dcrf.NORMALIZE_SYMMETRIC)

        Q, tmp1, tmp2 = crf.startInference()
        for i in range(5):
            temp = crf.klDivergence(Q)
            crf.stepInference(Q, tmp1, tmp2)
            if abs(crf.klDivergence(Q)-temp) < 500:
                break

        MAP = np.argmax(Q, axis=0)
        MAP = colorize[MAP]

        imsave(res_dir + '/' + frameName + '.png', MAP.reshape(mask.shape))
        #print("Saving: " + res_dir + '/' + frameName + '.png')


================================================
FILE: args.py
================================================
import argparse


def get_parser():
    parser = argparse.ArgumentParser(description='MATNet')

    parser.add_argument('-year', dest='year', default='2017')
    parser.add_argument('-imsize', dest='imsize', default=480, type=int)
    parser.add_argument('-batch_size', dest='batch_size', default=4, type=int)
    parser.add_argument('-num_workers', dest='num_workers', default=4,
                        type=int)

    ## TRAINING parameters ##
    parser.add_argument('--resume', dest='resume', action='store_true',
                        help=('whether to resume training an existing model'
                              '(the one with name model_name will be used)'))
    parser.set_defaults(resume=False)
    parser.add_argument('-seed', dest='seed', default=123, type=int)
    parser.add_argument('-gpu_id', dest='gpu_id', default=0, type=int)
    parser.add_argument('-lr', dest='lr', default=1e-3, type=float)
    parser.add_argument('-lr_cnn', dest='lr_cnn', default=1e-4, type=float)
    parser.add_argument('-optim_cnn', dest='optim_cnn', default='sgd',
                        choices=['adam', 'sgd', 'rmsprop'])
    parser.add_argument('-momentum', dest='momentum', default=0.9, type=float)
    parser.add_argument('-weight_decay', dest='weight_decay', default=5e-4,
                        type=float)
    parser.add_argument('-weight_decay_cnn', dest='weight_decay_cnn',
                        default=5e-4, type=float)
    parser.add_argument('-optim', dest='optim', default='sgd',
                        choices=['adam', 'sgd', 'rmsprop'])
    parser.add_argument('--crop', dest='crop', action='store_true')
    parser.set_defaults(crop=False)

    parser.add_argument('--update_encoder', dest='update_encoder',
                        action='store_true',
                        help='used in sync with finetune_after.'
                             ' no need to activate.')
    parser.set_defaults(update_encoder=True)

    parser.add_argument('-max_epoch', dest='max_epoch', default=20, type=int)

    # visualization and logging
    parser.add_argument('-print_every', dest='print_every', default=10,
                        type=int)

    # loss weights
    parser.add_argument('-iou_weight', dest='iou_weight', default=1.0,
                        type=float)
    # augmentation
    parser.add_argument('--augment', dest='augment', action='store_true')
    parser.set_defaults(augment=True)
    parser.add_argument('-rotation', dest='rotation', default=10, type=int)
    parser.add_argument('-translation', dest='translation', default=0.1,
                        type=float)
    parser.add_argument('-shear', dest='shear', default=0.1, type=float)
    parser.add_argument('-zoom', dest='zoom', default=0.7, type=float)

    # GPU
    parser.add_argument('--cpu', dest='use_gpu', action='store_false')
    parser.set_defaults(use_gpu=True)
    parser.add_argument('-ngpus', dest='ngpus', default=1, type=int)

    parser.add_argument('-model_name', dest='model_name', default='model')
    parser.add_argument('-log_file', dest='log_file', default='train.log')

    # dataset parameters
    parser.add_argument('--resize', dest='resize', action='store_true')
    parser.set_defaults(resize=False)
    parser.add_argument('-dataset', dest='dataset', default='davis2017',
                        choices=['davis2017', 'youtube'])

    # testing
    parser.add_argument('-eval_split', dest='eval_split', default='test')
    parser.add_argument('-mask_th', dest='mask_th', default=0.5, type=float)
    parser.add_argument('-max_dets', dest='max_dets', default=100, type=int)
    parser.add_argument('-min_size', dest='min_size', default=0.001,
                        type=float)
    parser.add_argument('--display', dest='display', action='store_true')
    parser.add_argument('--no_display_text', dest='no_display_text',
                        action='store_true')
    parser.set_defaults(display=False)
    parser.set_defaults(display_route=False)
    parser.set_defaults(no_display_text=False)
    parser.set_defaults(use_gt_masks=False)
    return parser


if __name__ =="__main__":
    parser = get_parser()
    args_dict = parser.parse_args()


================================================
FILE: data/db_info.yaml
================================================
attributes: [AC, BC, CS, DB, DEF, EA, FM, HO, IO, LR, MB, OCC, OV, ROT, SC, SV]
sets:  [train, val, val-dev]
years: [2016, 2017]

sequences:

- name: aerobatics
  attributes: []
  num_frames: 71
  set: test-dev
  eval_t: False
  year: 2017

- name: bear
  attributes: [DEF]
  num_frames: 82
  set: train
  eval_t: True
  year: 2016

- name: bike-packing
  attributes: []
  num_frames: 69
  set: val
  eval_t: False
  year: 2017

- name: blackswan
  attributes: []
  num_frames: 50
  set: val
  eval_t: True
  year: 2016

- name: bmx-bumps
  attributes: [LR, SV, SC, FM, CS, IO, MB, OCC, HO, EA, OV]
  num_frames: 90
  set: train
  eval_t: False
  year: 2016

- name: bmx-trees
  attributes: [LR, SV, SC, FM, CS, IO, MB, DEF, OCC, HO, EA, BC]
  num_frames: 80
  set: val
  eval_t: False
  year: 2016

- name: boat
  attributes: [SC, DB, EA, BC]
  num_frames: 75
  set: train
  eval_t: True
  year: 2016

- name: boxing-fisheye
  attributes: []
  num_frames: 87
  set: train
  eval_t: False
  year: 2017

- name: breakdance
  attributes: [FM, DB, MB, DEF, HO, ROT, OV, AC]
  num_frames: 84
  set: val
  eval_t: False
  year: 2016

- name: breakdance-flare
  attributes: [FM, CS, MB, DEF, HO, ROT]
  num_frames: 71
  set: train
  eval_t: False
  year: 2016

- name: bus
  attributes: [SC, OCC, HO, EA]
  num_frames: 80
  set: train
  eval_t: True
  year: 2016

- name: camel
  attributes: [CS, IO, DEF, ROT]
  num_frames: 90
  set: val
  eval_t: True
  year: 2016

- name: car-race
  attributes: []
  num_frames: 31
  set: test-dev
  eval_t: False
  year: 2017

- name: car-roundabout
  attributes: [ROT, BC]
  num_frames: 75
  set: val
  eval_t: True
  year: 2016

- name: car-shadow
  attributes: [LR, EA, AC, BC]
  num_frames: 40
  set: val
  eval_t: True
  year: 2016

- name: car-turn
  attributes: [SV, ROT, BC]
  num_frames: 80
  set: train
  eval_t: True
  year: 2016

- name: carousel
  attributes: []
  num_frames: 69
  set: test-dev
  eval_t: False
  year: 2017

- name: cat-girl
  attributes: []
  num_frames: 89
  set: train
  eval_t: False
  year: 2017

- name: cats-car
  attributes: []
  num_frames: 67
  set: test-dev
  eval_t: False
  year: 2017

- name: chamaleon
  attributes: []
  num_frames: 85
  set: test-dev
  eval_t: False
  year: 2017

- name: classic-car
  attributes: []
  num_frames: 63
  set: train
  eval_t: False
  year: 2017

- name: color-run
  attributes: []
  num_frames: 84
  set: train
  eval_t: False
  year: 2017

- name: cows
  attributes: [CS, IO, DEF, OCC, HO]
  num_frames: 104
  set: val
  eval_t: True
  year: 2016

- name: crossing
  attributes: []
  num_frames: 52
  set: train
  eval_t: False
  year: 2017

- name: dance-jump
  attributes: [SC, DB, MB, DEF, OCC, HO, ROT, EA]
  num_frames: 60
  set: train
  eval_t: True
  year: 2016

- name: dance-twirl
  attributes: [SC, CS, IO, MB, DEF, HO, ROT, OV]
  num_frames: 90
  set: val
  eval_t: False
  year: 2016

- name: dancing
  attributes: []
  num_frames: 62
  set: train
  eval_t: False
  year: 2017

- name: deer
  attributes: []
  num_frames: 79
  set: test-dev
  eval_t: False
  year: 2017

- name: disc-jockey
  attributes: []
  num_frames: 76
  set: train
  eval_t: False
  year: 2017

- name: dog
  attributes: [FM, CS, MB, DEF, ROT, EA]
  num_frames: 60
  set: val
  eval_t: False
  year: 2016

- name: dog-agility
  attributes: [FM, MB, DEF, OCC, HO, EA, OV, AC]
  num_frames: 25
  set: train
  eval_t: False
  year: 2016

- name: dog-gooses
  attributes: []
  num_frames: 86
  set: train
  eval_t: False
  year: 2017

- name: dogs-jump
  attributes: []
  num_frames: 66
  set: val
  eval_t: False
  year: 2017

- name: dogs-scale
  attributes: []
  num_frames: 83
  set: train
  eval_t: False
  year: 2017

- name: drift-chicane
  attributes: [LR, SV, FM, DB, HO, ROT, EA, AC]
  num_frames: 52
  set: val
  eval_t: False
  year: 2016

- name: drift-straight
  attributes: [LR, SV, FM, CS, MB, HO, ROT, EA, OV, AC]
  num_frames: 50
  set: val
  eval_t: True
  year: 2016

- name: drift-turn
  attributes: [SV, FM, IO, DB, HO, ROT, OV, AC]
  num_frames: 64
  set: train
  eval_t: True
  year: 2016

- name: drone
  attributes: []
  num_frames: 91
  set: train
  eval_t: False
  year: 2017

- name: elephant
  attributes: [CS, DB, DEF, EA]
  num_frames: 80
  set: train
  eval_t: True
  year: 2016

- name: flamingo
  attributes: [SC, IO, DB, DEF, HO]
  num_frames: 80
  set: train
  eval_t: True
  year: 2016

- name: giant-slalom
  attributes: []
  num_frames: 127
  set: test-dev
  eval_t: False
  year: 2017

- name: girl-dog
  attributes: []
  num_frames: 86
  set: test-dev
  eval_t: False
  year: 2017

- name: goat
  attributes: [CS, DEF, EA, BC]
  num_frames: 90
  set: val
  eval_t: False
  year: 2016

- name: gold-fish
  attributes: []
  num_frames: 78
  set: val
  eval_t: False
  year: 2017

- name: golf
  attributes: []
  num_frames: 79
  set: test-dev
  eval_t: False
  year: 2017

- name: guitar-violin
  attributes: []
  num_frames: 55
  set: test-dev
  eval_t: False
  year: 2017

- name: gym
  attributes: []
  num_frames: 60
  set: test-dev
  eval_t: False
  year: 2017

- name: helicopter
  attributes: []
  num_frames: 49
  set: test-dev
  eval_t: False
  year: 2017

- name: hike
  attributes: [LR, DEF, HO]
  num_frames: 80
  set: train
  eval_t: True
  year: 2016

- name: hockey
  attributes: [SC, IO, DEF, HO, ROT]
  num_frames: 75
  set: train
  eval_t: True
  year: 2016

- name: horsejump-high
  attributes: [SC, IO, DEF, OCC, HO]
  num_frames: 50
  set: val
  eval_t: False
  year: 2016

- name: horsejump-low
  attributes: [SC, IO, DEF, OCC, HO, EA]
  num_frames: 60
  set: train
  eval_t: False
  year: 2016

- name: horsejump-stick
  attributes: []
  num_frames: 58
  set: test-dev
  eval_t: False
  year: 2017

- name: hoverboard
  attributes: []
  num_frames: 81
  set: test-dev
  eval_t: False
  year: 2017

- name: india
  attributes: []
  num_frames: 81
  set: val
  eval_t: False
  year: 2017

- name: judo
  attributes: []
  num_frames: 34
  set: val
  eval_t: False
  year: 2017

- name: kid-football
  attributes: []
  num_frames: 68
  set: train
  eval_t: False
  year: 2017

- name: kite-surf
  attributes: [SV, SC, IO, DB, MB, OCC, HO, EA]
  num_frames: 50
  set: val
  eval_t: True
  year: 2016

- name: kite-walk
  attributes: [SC, IO, DB, DEF, OCC, HO]
  num_frames: 80
  set: train
  eval_t: True
  year: 2016

- name: koala
  attributes: []
  num_frames: 100
  set: train
  eval_t: False
  year: 2017

- name: lab-coat
  attributes: []
  num_frames: 47
  set: val
  eval_t: False
  year: 2017

- name: lady-running
  attributes: []
  num_frames: 65
  set: train
  eval_t: False
  year: 2017

- name: libby
  attributes: [SC, MB, DEF, OCC, HO, EA]
  num_frames: 49
  set: val
  eval_t: False
  year: 2016

- name: lindy-hop
  attributes: []
  num_frames: 73
  set: train
  eval_t: False
  year: 2017

- name: loading
  attributes: []
  num_frames: 50
  set: val
  eval_t: False
  year: 2017

- name: lock
  attributes: []
  num_frames: 43
  set: test-dev
  eval_t: False
  year: 2017

- name: longboard
  attributes: []
  num_frames: 52
  set: train
  eval_t: False
  year: 2017


- name: lucia
  attributes: [DEF, OCC, HO]
  num_frames: 70
  set: train
  eval_t: False
  year: 2016

- name: mallard-fly
  attributes: [LR, SV, FM, DB, MB, DEF, ROT, EA, OV, AC]
  num_frames: 70
  set: train
  eval_t: False
  year: 2016

- name: mallard-water
  attributes: [LR, IO, DB, EA]
  num_frames: 80
  set: train
  eval_t: True
  year: 2016

- name: man-bike
  attributes: []
  num_frames: 75
  set: test-dev
  eval_t: False
  year: 2017


- name: mbike-trick
  attributes: []
  num_frames: 79
  set: val
  eval_t: False
  year: 2017

- name: miami-surf
  attributes: []
  num_frames: 70
  set: train
  eval_t: False
  year: 2017

- name: monkeys-trees
  attributes: []
  num_frames: 83
  set: test-dev
  eval_t: False
  year: 2017


- name: motocross-bumps
  attributes: [SV, FM, IO, HO, ROT, OV, AC, BC]
  num_frames: 60
  set: train
  eval_t: True
  year: 2016

- name: motocross-jump
  attributes: [SV, SC, FM, IO, MB, DEF, HO, ROT, EA, OV, AC]
  num_frames: 40
  set: val
  eval_t: False
  year: 2016

- name: motorbike
  attributes: [LR, SV, SC, FM, IO, OCC, HO, ROT, EA]
  num_frames: 43
  set: train
  eval_t: False
  year: 2016

- name: mtb-race
  attributes: []
  num_frames: 69
  set: test-dev
  eval_t: False
  year: 2017

- name: night-race
  attributes: []
  num_frames: 46
  set: train
  eval_t: False
  year: 2017

- name: orchid
  attributes: []
  num_frames: 57
  set: test-dev
  eval_t: False
  year: 2017

- name: paragliding
  attributes: [LR, SC, IO, HO]
  num_frames: 70
  set: train
  eval_t: False
  year: 2016

- name: paragliding-launch
  attributes: [SC, IO, DEF, HO, EA]
  num_frames: 80
  set: val
  eval_t: True
  year: 2016

- name: parkour
  attributes: [LR, SV, FM, DEF, OCC, HO, ROT, AC]
  num_frames: 100
  set: val
  eval_t: False
  year: 2016

- name: people-sunset
  attributes: []
  num_frames: 67
  set: test-dev
  eval_t: False
  year: 2017

- name: pigs
  attributes: []
  num_frames: 79
  set: val
  eval_t: False
  year: 2017

- name: planes-crossing
  attributes: []
  num_frames: 31
  set: test-dev
  eval_t: False
  year: 2017

- name: planes-water
  attributes: []
  num_frames: 38
  set: train
  eval_t: False
  year: 2017

- name: rallye
  attributes: []
  num_frames: 50
  set: train
  eval_t: False
  year: 2017

- name: rhino
  attributes: [DEF, OCC, BC]
  num_frames: 90
  set: train
  eval_t: True
  year: 2016

- name: rollerblade
  attributes: [LR, FM, CS, MB, DEF, HO]
  num_frames: 35
  set: train
  eval_t: False
  year: 2016

- name: rollercoaster
  attributes: []
  num_frames: 70
  set: test-dev
  eval_t: False
  year: 2017

- name: salsa
  attributes: []
  num_frames: 86
  set: test-dev
  eval_t: False
  year: 2017

- name: schoolgirls
  attributes: []
  num_frames: 80
  set: train
  eval_t: False
  year: 2017

- name: scooter-black
  attributes: [SV, IO, HO, EA]
  num_frames: 43
  set: val
  eval_t: True
  year: 2016

- name: scooter-board
  attributes: []
  num_frames: 91
  set: train
  eval_t: False
  year: 2017

- name: scooter-gray
  attributes: [SC, FM, IO, OCC, HO, ROT, EA, BC]
  num_frames: 75
  set: train
  eval_t: False
  year: 2016

- name: seasnake
  attributes: []
  num_frames: 80
  set: test-dev
  eval_t: False
  year: 2017

- name: sheep
  attributes: []
  num_frames: 68
  set: train
  eval_t: False
  year: 2017

- name: shooting
  attributes: []
  num_frames: 40
  set: val
  eval_t: False
  year: 2017

- name: skate-jump
  attributes: []
  num_frames: 68
  set: test-dev
  eval_t: False
  year: 2017

- name: skate-park
  attributes: []
  num_frames: 80
  set: train
  eval_t: False
  year: 2017

- name: slackline
  attributes: []
  num_frames: 60
  set: test-dev
  eval_t: False
  year: 2017

- name: snowboard
  attributes: []
  num_frames: 66
  set: train
  eval_t: False
  year: 2017

- name: soapbox
  attributes: [SV, IO, MB, DEF, HO, ROT, AC]
  num_frames: 99
  set: val
  eval_t: True
  year: 2016

- name: soccerball
  attributes: [LR, FM, MB, OCC, HO]
  num_frames: 48
  set: train
  eval_t: False
  year: 2016

- name: stroller
  attributes: [SC, FM, CS, IO, DEF, HO]
  num_frames: 91
  set: train
  eval_t: True
  year: 2016

- name: stunt
  attributes: []
  num_frames: 71
  set: train
  eval_t: False
  year: 2017

- name: subway
  attributes: []
  num_frames: 88
  set: test-dev
  eval_t: False
  year: 2017

- name: surf
  attributes: [SV, FM, CS, IO, DB, HO, OV]
  num_frames: 55
  set: train
  eval_t: True
  year: 2016

- name: swing
  attributes: [SC, FM, IO, DEF, OCC, HO]
  num_frames: 60
  set: train
  eval_t: False
  year: 2016

- name: tandem
  attributes: []
  num_frames: 72
  set: test-dev
  eval_t: False
  year: 2017


- name: tennis
  attributes: [SV, FM, IO, MB, DEF, HO]
  num_frames: 70
  set: train
  eval_t: False
  year: 2016

- name: tennis-vest
  attributes: []
  num_frames: 75
  set: test-dev
  eval_t: False
  year: 2017

- name: tractor
  attributes: []
  num_frames: 65
  set: test-dev
  eval_t: False
  year: 2017

- name: tractor-sand
  attributes: []
  num_frames: 76
  set: train
  eval_t: False
  year: 2017

- name: train
  attributes: [SC, HO, EA]
  num_frames: 80
  set: train
  eval_t: True
  year: 2016

- name: tuk-tuk
  attributes: []
  num_frames: 59
  set: train
  eval_t: False
  year: 2017

- name: upside-down
  attributes: []
  num_frames: 65
  set: train
  eval_t: False
  year: 2017

- name: varanus-cage
  attributes: []
  num_frames: 67
  set: train
  eval_t: False
  year: 2017

- name: walking
  attributes: []
  num_frames: 72
  set: train
  eval_t: False
  year: 2017

================================================
FILE: data/palette.txt
================================================
0 0 0
128 0 0
0 128 0
128 128 0
0 0 128
128 0 128
0 128 128
128 128 128
64 0 0
191 0 0
64 128 0
191 128 0
64 0 128
191 0 128
64 128 128
191 128 128
0 64 0
128 64 0
0 191 0
128 191 0
0 64 128
128 64 128
22 22 22
23 23 23
24 24 24
25 25 25
26 26 26
27 27 27
28 28 28
29 29 29
30 30 30
31 31 31
32 32 32
33 33 33
34 34 34
35 35 35
36 36 36
37 37 37
38 38 38
39 39 39
40 40 40
41 41 41
42 42 42
43 43 43
44 44 44
45 45 45
46 46 46
47 47 47
48 48 48
49 49 49
50 50 50
51 51 51
52 52 52
53 53 53
54 54 54
55 55 55
56 56 56
57 57 57
58 58 58
59 59 59
60 60 60
61 61 61
62 62 62
63 63 63
64 64 64
65 65 65
66 66 66
67 67 67
68 68 68
69 69 69
70 70 70
71 71 71
72 72 72
73 73 73
74 74 74
75 75 75
76 76 76
77 77 77
78 78 78
79 79 79
80 80 80
81 81 81
82 82 82
83 83 83
84 84 84
85 85 85
86 86 86
87 87 87
88 88 88
89 89 89
90 90 90
91 91 91
92 92 92
93 93 93
94 94 94
95 95 95
96 96 96
97 97 97
98 98 98
99 99 99
100 100 100
101 101 101
102 102 102
103 103 103
104 104 104
105 105 105
106 106 106
107 107 107
108 108 108
109 109 109
110 110 110
111 111 111
112 112 112
113 113 113
114 114 114
115 115 115
116 116 116
117 117 117
118 118 118
119 119 119
120 120 120
121 121 121
122 122 122
123 123 123
124 124 124
125 125 125
126 126 126
127 127 127
128 128 128
129 129 129
130 130 130
131 131 131
132 132 132
133 133 133
134 134 134
135 135 135
136 136 136
137 137 137
138 138 138
139 139 139
140 140 140
141 141 141
142 142 142
143 143 143
144 144 144
145 145 145
146 146 146
147 147 147
148 148 148
149 149 149
150 150 150
151 151 151
152 152 152
153 153 153
154 154 154
155 155 155
156 156 156
157 157 157
158 158 158
159 159 159
160 160 160
161 161 161
162 162 162
163 163 163
164 164 164
165 165 165
166 166 166
167 167 167
168 168 168
169 169 169
170 170 170
171 171 171
172 172 172
173 173 173
174 174 174
175 175 175
176 176 176
177 177 177
178 178 178
179 179 179
180 180 180
181 181 181
182 182 182
183 183 183
184 184 184
185 185 185
186 186 186
187 187 187
188 188 188
189 189 189
190 190 190
191 191 191
192 192 192
193 193 193
194 194 194
195 195 195
196 196 196
197 197 197
198 198 198
199 199 199
200 200 200
201 201 201
202 202 202
203 203 203
204 204 204
205 205 205
206 206 206
207 207 207
208 208 208
209 209 209
210 210 210
211 211 211
212 212 212
213 213 213
214 214 214
215 215 215
216 216 216
217 217 217
218 218 218
219 219 219
220 220 220
221 221 221
222 222 222
223 223 223
224 224 224
225 225 225
226 226 226
227 227 227
228 228 228
229 229 229
230 230 230
231 231 231
232 232 232
233 233 233
234 234 234
235 235 235
236 236 236
237 237 237
238 238 238
239 239 239
240 240 240
241 241 241
242 242 242
243 243 243
244 244 244
245 245 245
246 246 246
247 247 247
248 248 248
249 249 249
250 250 250
251 251 251
252 252 252
253 253 253
254 254 254
255 255 255

================================================
FILE: data/run_davis2017.m
================================================
clc

src_dir = 'data/DAVIS2017/Annotations/480p';
save_dir = 'data/DAVIS2017/Annotations_edge/480p';

seqs = dir(src_dir);
seqs = {seqs.name};
seqs = seqs(3:end);

for i = 1 : length(seqs)
    seq_path = [src_dir, '/', seqs{i}];
    images = dir([seq_path, '/*.png']);
    images = {images.name};
    
    for j = 1 : length(images)
        imagefile = fullfile(seq_path, images{j});
        im = imread(imagefile);
        
        res = seg2edge(im, 2, [], 'regular');
        
        save_path = fullfile(save_dir, seqs{i});
        if ~exist(save_path, 'dir')
            mkdir(save_path)
        end
        imwrite(res, fullfile(save_path, images{j}))
    end
end


================================================
FILE: data/run_youtube.m
================================================
clc

src_dir = 'data/YouTubeVOS_2018/train/Annotations';
save_dir = 'data/YouTubeVOS_2018/train/Annotations_edge';

seqs = dir(src_dir);
seqs = {seqs.name};
seqs = seqs(3:end);

for i = 1 : length(seqs)
    disp(seqs{i})
    seq_path = [src_dir, '/', seqs{i}];
    images = dir([seq_path, '/*.png']);
    images = {images.name};
    
    for j = 1 : length(images)
        imagefile = fullfile(seq_path, images{j});
        im = imread(imagefile);
        
        res = seg2edge(im, 2, [], 'regular');
        
        save_path = fullfile(save_dir, seqs{i});
        if ~exist(save_path, 'dir')
            mkdir(save_path)
        end
        imwrite(res, fullfile(save_path, images{j}))
    end
end


================================================
FILE: data/seg2edge.m
================================================
% This function takes an input segment and produces binary boundaries.
% Multi-channel input segments are supported by the function.
function [idxEdge] = seg2edge(seg, radius, labelIgnore, edge_type)
% Get dimensions
[height, width, chn] = size(seg);
if(~isempty(labelIgnore))
    if(chn~=size(labelIgnore, 2))
        error('Channel dimension not matching ignored label dimension!')
    end
end

% Set the considered neighborhood.
radius_search = max(ceil(radius), 1);
[X, Y] = meshgrid(1:width, 1:height);
[x, y] = meshgrid(-radius_search:radius_search, -radius_search:radius_search);

% Columnize everything
X = X(:); Y = Y(:);
x = x(:); y = y(:);
if(chn == 1)
    seg = seg(:);
else
    seg = reshape(seg, [height*width chn]);
end

% Build circular neighborhood
idxNeigh = sqrt(x.^2 + y.^2) <= radius;
x = x(idxNeigh); y = y(idxNeigh);
numPxlImg = length(X);
numPxlNeigh = length(x);

% Compute Gaussian weight
idxEdge = false(numPxlImg, 1);
for i = 1:numPxlNeigh
    XNeigh = X+x(i);
    YNeigh = Y+y(i);
    idxValid = find( XNeigh >= 1 & XNeigh <= width & YNeigh >=1 & YNeigh <= height );
    
    XCenter = X(idxValid);
    YCenter = Y(idxValid);
    XNeigh = XNeigh(idxValid);
    YNeigh = YNeigh(idxValid);
    LCenter = seg(sub2ind([height width], YCenter, XCenter), :);
    LNeigh = seg(sub2ind([height width], YNeigh, XNeigh), :);
    
    if(strcmp(edge_type, 'regular'))
        idxDiff = find(any(LCenter~=LNeigh, 2));
    elseif(strcmp(edge_type, 'inner'))
        idxDiff = find(any(LCenter~=LNeigh, 2) & any(LCenter~=0, 2) & all(LNeigh==0, 2) );
    elseif(strcmp(edge_type, 'outer'))
        idxDiff = find(any(LCenter~=LNeigh, 2) & all(LCenter==0, 2) & any(LNeigh~=0, 2) );
    else
        error('Wrong edge type input!');
    end    
    
    LCenterEdge = LCenter(idxDiff, :);
    LNeighEdge = LNeigh(idxDiff, :);
    idxIgnore2 = false(length(idxDiff), 1);
    for j = 1:size(labelIgnore, 1)
        idxIgnore2 = idxIgnore2 | ( all(bsxfun(@eq, LCenterEdge, labelIgnore(j, :)), 2) | all(bsxfun(@eq, LNeighEdge, labelIgnore(j, :)), 2) );
    end
    
    idxDiffGT = idxDiff(~idxIgnore2);
    idxEdge(idxValid(idxDiffGT)) = true;
end
idxEdge = reshape(idxEdge, [height, width]);


================================================
FILE: dataloader/base.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# ----------------------------------------------------------------------------

import functools
import os.path as osp

import numpy as np

from skimage.io import ImageCollection

from misc.config import cfg
from misc.io_aux import imread_indexed

#################################
# HELPER FUNCTIONS
#################################


def _load_annotation(filename, single_object):
    """ Load image given filename."""

    annotation,_ = imread_indexed(filename)

    if single_object:
        annotation = (annotation != 0).astype(np.uint8)

    return annotation


def _get_num_objects(annotation):
    """ Count number of objects from segmentation mask"""

    ids = sorted(np.unique(annotation))

    # Remove unknown-label
    ids = ids[:-1] if ids[-1] == 255 else ids

    # Handle no-background case
    ids = ids if ids[0] else ids[1:]

    return len(ids)

#################################
# LOADER CLASSES
#################################


class BaseLoader(ImageCollection):
    """
    Base class to load image sets (inherit from skimage.ImageCollection).

    Arguments:
      path      (string): path to sequence folder.
      regex     (string): regular expression to define image search pattern.
      load_func (func)  : function to load image from disk (see skimage.ImageCollection).

    """

    def __init__(self, split, path, regex, load_func=None, lmdb_env=None):

        if not lmdb_env == None:
            key_db = osp.basename(path)
            with lmdb_env.begin() as txn:
                _files_vec = txn.get(key_db.encode()).decode().split('|')
                _files = [bytes(osp.join(path, f).encode())
                          for f in _files_vec]
            super(BaseLoader, self).__init__(_files, load_func=load_func)
        else:
            super(BaseLoader, self).__init__(
                osp.join(path + '/' + regex),load_func=load_func)

        # Sequence name
        self.name = osp.basename(path)
        self.split = split

    def __str__(self):
        return "< class: '{}' name: '{}', frames: {} >".format(
            type(self).__name__, self.name, len(self))


class Sequence(BaseLoader):
    """
    Load image sequences.

    Arguments:
      name  (string): sequence name.
      regex (string): regular expression to define image search pattern.

    """

    def __init__(self, split, name, regex="*.jpg", lmdb_env=None):

        super(Sequence, self).__init__(
            split, osp.join(cfg.PATH.SEQUENCES, name), regex,
            lmdb_env=lmdb_env)


class SequenceClip_simple:
    """
    Load image sequences.

    Arguments:
      name  (string): sequence name.
      regex (string): regular expression to define image search pattern.

    """

    def __init__(self, seq, starting_frame):

        self.__dict__.update(seq.__dict__)
        self.starting_frame = starting_frame

    def __str__(self):
        return "< class: '{}' name: '{}'," \
               " startingframe: {}, frames: {} >".format(type(self).__name__,
                                                         self.name,
                                                         self.starting_frame,
                                                         len(self))

class SequenceClip(BaseLoader):
    """
    Load image sequences.

    Arguments:
      name  (string): sequence name.
      regex (string): regular expression to define image search pattern.

    """

    def __init__(self, split, name, starting_frame, regex="*.jpg",
                 lmdb_env=None):
        super(SequenceClip, self).__init__(
            split, osp.join(cfg.PATH.SEQUENCES, name), regex, lmdb_env=lmdb_env)

        self.starting_frame = starting_frame

    def __str__(self):
        return "< class: '{}' name: '{}'," \
               " startingframe: {}, frames: {} >".format(type(self).__name__,
                                                         self.name,
                                                         self.starting_frame,
                                                         len(self))


class Segmentation(BaseLoader):

  """
  Load image sequences.

  Arguments:
    path          (string): path to sequence folder.
    single_object (bool):   assign same id=1 to each object.
    regex         (string): regular expression to define image search pattern.

  """

  def __init__(self,split,path,single_object,regex="*.png", lmdb_env=None):
    super(Segmentation, self).__init__(split,path,regex,
       functools.partial(_load_annotation,single_object=single_object), lmdb_env=lmdb_env)

    self.n_objects = _get_num_objects(self[0])

  def iter_objects_id(self):
    """
    Iterate over objects providing object id for each of them.
    """
    for obj_id in range(1,self.n_objects+1):
      yield obj_id

  def iter_objects(self):
    """
    Iterate over objects providing binary masks for each of them.
    """

    for obj_id in self.iter_objects_id():
      bn_segmentation = [(s==obj_id).astype(np.uint8) for s in self]
      yield bn_segmentation

class Annotation(Segmentation):

  """
  Load ground-truth annotations.

  Arguments:
    name          (string): sequence name.
    single_object (bool):   assign same id=1 to each object.
    regex         (string): regular expression to define image search pattern.

  """

  def __init__(self,split,name,single_object,regex="*.png", lmdb_env=None):
    
    super(Annotation, self).__init__(
        split,osp.join(cfg.PATH.ANNOTATIONS,name),single_object,regex, lmdb_env=lmdb_env)


class AnnotationClip_simple:
    """
    Load ground-truth annotations.

    Arguments:
      name          (string): sequence name.
      single_object (bool):   assign same id=1 to each object.
      regex         (string): regular expression to define image search pattern.

    """

    def __init__(self, annot, starting_frame):

        self.__dict__.update(annot.__dict__)
        self.starting_frame = starting_frame

class AnnotationClip(Segmentation):

  """
  Load ground-truth annotations.

  Arguments:
    name          (string): sequence name.
    single_object (bool):   assign same id=1 to each object.
    regex         (string): regular expression to define image search pattern.

  """

  def __init__(self,split,name,starting_frame,single_object,regex="*.png", lmdb_env=None):

    super(AnnotationClip, self).__init__(
        split,osp.join(cfg.PATH.ANNOTATIONS,name),single_object,regex, lmdb_env=lmdb_env)
    self.starting_frame = starting_frame
    

================================================
FILE: dataloader/base_youtube.py
================================================
import functools
import os.path as osp

import numpy as np

from PIL import Image
from skimage.io import ImageCollection

from misc.config_youtubeVOS import cfg, phase
from misc.io_aux import imread_indexed,imwrite_indexed

#################################
# HELPER FUNCTIONS
#################################

def _load_annotation(filename,single_object):
  """ Load image given filename."""

  annotation,_ = imread_indexed(filename)

  if single_object:
    annotation = (annotation != 0).astype(np.uint8)

  return annotation

def _get_num_objects(annotation):
  """ Count number of objects from segmentation mask"""

  ids = sorted(np.unique(annotation))

  # Remove unknown-label
  ids = ids[:-1] if ids[-1] == 255 else ids

  # Handle no-background case
  ids = ids if ids[0] else ids[1:]

  return len(ids)

#################################
# LOADER CLASSES
#################################

class BaseLoader(ImageCollection):

  """
  Base class to load image sets (inherit from skimage.ImageCollection).

  Arguments:
    path      (string): path to sequence folder.
    regex     (string): regular expression to define image search pattern.
    load_func (func)  : function to load image from disk (see skimage.ImageCollection).

  """

  def __init__(self,split,path,regex,load_func=None, lmdb_env=None):
    
    if not lmdb_env == None:
        key_db = osp.basename(path)
        with lmdb_env.begin() as txn:
            _files_vec = txn.get(key_db.encode()).decode().split('|')
            _files = [bytes(osp.join(path, f).encode()) for f in _files_vec]
        super(BaseLoader, self).__init__(_files, load_func=load_func)
    else:  
        super(BaseLoader, self).__init__(
            osp.join(path + '/' + regex),load_func=load_func)

    # Sequence name
    self.name = osp.basename(path)
    self.split = split

    # Check sequence name
    if split == phase.TRAIN.value:
        if not self.name in cfg.SEQUENCES_TRAIN:
            raise Exception("Sequence name \'{}\' not found.".format(self.name))
    elif split == phase.VAL.value:
        if not self.name in cfg.SEQUENCES_VAL:
            raise Exception("Sequence name \'{}\' not found.".format(self.name))
    elif split == phase.TRAINVAL.value:
        if not self.name in cfg.SEQUENCES_TRAINVAL:
            raise Exception("Sequence name \'{}\' not found.".format(self.name))
    else:
        if not self.name in cfg.SEQUENCES_TEST:
            raise Exception("Sequence name \'{}\' not found.".format(self.name))

  def __str__(self):
    return "< class: '{}' name: '{}', frames: {} >".format(
        type(self).__name__,self.name,len(self))

class Sequence(BaseLoader):

  """
  Load image sequences.

  Arguments:
    name  (string): sequence name.
    regex (string): regular expression to define image search pattern.

  """

  def __init__(self,split,name,regex="*.jpg", lmdb_env=None):
      
    if split == phase.TRAIN.value:   
        super(Sequence, self).__init__(
            split,osp.join(cfg.PATH.SEQUENCES_TRAIN,name),regex, lmdb_env=lmdb_env)
    elif split == phase.VAL.value:
        super(Sequence, self).__init__(
            split,osp.join(cfg.PATH.SEQUENCES_VAL,name),regex, lmdb_env=lmdb_env)
    elif split == phase.TRAINVAL.value:
        super(Sequence, self).__init__(
            split,osp.join(cfg.PATH.SEQUENCES_TRAINVAL,name),regex, lmdb_env=lmdb_env)
    else: #split == 'test':
        super(Sequence, self).__init__(
            split,osp.join(cfg.PATH.SEQUENCES_TEST,name),regex, lmdb_env=lmdb_env)

class SequenceClip_simple:
    """
    Load image sequences.

    Arguments:
      name  (string): sequence name.
      regex (string): regular expression to define image search pattern.

    """

    def __init__(self, seq, starting_frame):
        if seq.split == phase.TRAIN.value:
            self.__dict__.update(seq.__dict__)
        else:
            self.__dict__.update(seq.__dict__)
        self.starting_frame = starting_frame

    def __str__(self):
        return "< class: '{}' name: '{}', startingframe: {}, frames: {} >".format(
            type(self).__name__, self.name, self.starting_frame, len(self))

class SequenceClip(BaseLoader):

  """
  Load image sequences.

  Arguments:
    name  (string): sequence name.
    regex (string): regular expression to define image search pattern.

  """

  def __init__(self,split,name,starting_frame,regex="*.jpg", lmdb_env=None):
    if split == phase.TRAIN.value:   
        super(SequenceClip, self).__init__(
            split,osp.join(cfg.PATH.SEQUENCES_TRAIN,name),regex, lmdb_env=lmdb_env)
    elif split == phase.VAL.value:
        super(SequenceClip, self).__init__(
            split,osp.join(cfg.PATH.SEQUENCES_VAL,name),regex, lmdb_env=lmdb_env)
    elif split == phase.TRAINVAL.value:
        super(SequenceClip, self).__init__(
            split,osp.join(cfg.PATH.SEQUENCES_TRAINVAL,name),regex, lmdb_env=lmdb_env)
    else: #split == 'test':
        super(SequenceClip, self).__init__(
            split,osp.join(cfg.PATH.SEQUENCES_TEST,name),regex, lmdb_env=lmdb_env)
    self.starting_frame = starting_frame
    
  def __str__(self):
    return "< class: '{}' name: '{}', startingframe: {}, frames: {} >".format(
        type(self).__name__,self.name,self.starting_frame,len(self))

class Segmentation(BaseLoader):

  """
  Load image sequences.

  Arguments:
    path          (string): path to sequence folder.
    single_object (bool):   assign same id=1 to each object.
    regex         (string): regular expression to define image search pattern.

  """

  def __init__(self,split,path,single_object,regex="*.png", lmdb_env=None):
    super(Segmentation, self).__init__(split,path,regex,
       functools.partial(_load_annotation,single_object=single_object), lmdb_env=lmdb_env)

    self.n_objects = _get_num_objects(self[0])

  def iter_objects_id(self):
    """
    Iterate over objects providing object id for each of them.
    """
    for obj_id in range(1,self.n_objects+1):
      yield obj_id

  def iter_objects(self):
    """
    Iterate over objects providing binary masks for each of them.
    """

    for obj_id in self.iter_objects_id():
      bn_segmentation = [(s==obj_id).astype(np.uint8) for s in self]
      yield bn_segmentation

class Annotation(Segmentation):

  """
  Load ground-truth annotations.

  Arguments:
    name          (string): sequence name.
    single_object (bool):   assign same id=1 to each object.
    regex         (string): regular expression to define image search pattern.

  """

  def __init__(self,split,name,single_object,regex="*.png", lmdb_env=None):
    
    if split == phase.TRAIN.value:   
        super(Annotation, self).__init__(
            split,osp.join(cfg.PATH.ANNOTATIONS_TRAIN,name),single_object,regex, lmdb_env=lmdb_env)
    elif split == phase.VAL.value:
        super(Annotation, self).__init__(
            split,osp.join(cfg.PATH.ANNOTATIONS_VAL,name),single_object,regex, lmdb_env=lmdb_env)
    elif split == phase.TRAINVAL.value:
        super(Annotation, self).__init__(
            split,osp.join(cfg.PATH.ANNOTATIONS_TRAINVAL,name),single_object,regex, lmdb_env=lmdb_env)
    else: #split == 'test':
        super(Annotation, self).__init__(
            split,osp.join(cfg.PATH.ANNOTATIONS_TEST,name),single_object,regex, lmdb_env=lmdb_env)

class AnnotationClip_simple:
    """
    Load ground-truth annotations.

    Arguments:
      name          (string): sequence name.
      single_object (bool):   assign same id=1 to each object.
      regex         (string): regular expression to define image search pattern.

    """
    
    def __init__(self, annot, starting_frame):
        if annot.split == phase.TRAIN.value:
            self.__dict__.update(annot.__dict__)
        else:
            self.__dict__.update(annot.__dict__)
        self.starting_frame = starting_frame

class AnnotationClip(Segmentation):

  """
  Load ground-truth annotations.

  Arguments:
    name          (string): sequence name.
    single_object (bool):   assign same id=1 to each object.
    regex         (string): regular expression to define image search pattern.

  """

  def __init__(self,split,name,starting_frame,single_object,regex="*.png", lmdb_env=None):
    if split == phase.TRAIN.value:   
        super(AnnotationClip, self).__init__(
            split,osp.join(cfg.PATH.ANNOTATIONS_TRAIN,name),single_object,regex, lmdb_env=lmdb_env)
    elif split == phase.VAL.value:
        super(AnnotationClip, self).__init__(
            split,osp.join(cfg.PATH.ANNOTATIONS_VAL,name),single_object,regex, lmdb_env=lmdb_env)
    elif split == phase.TRAINVAL.value:
        super(AnnotationClip, self).__init__(
            split,osp.join(cfg.PATH.ANNOTATIONS_TRAINVAL,name),single_object,regex, lmdb_env=lmdb_env)
    else: #split == 'test':
        super(AnnotationClip, self).__init__(
            split,osp.join(cfg.PATH.ANNOTATIONS_TEST,name),single_object,regex, lmdb_env=lmdb_env)

    self.starting_frame = starting_frame
    


================================================
FILE: dataloader/custom_transforms.py
================================================
import random
import cv2
import numpy as np
import torch
import math


class ScaleNRotate(object):
    """Scale (zoom-in, zoom-out) and Rotate the image and the ground truth.
    Args:
        two possibilities:
        1.  rots (tuple): (minimum, maximum) rotation angle
            scales (tuple): (minimum, maximum) scale
        2.  rots [list]: list of fixed possible rotation angles
            scales [list]: list of fixed possible scales
    """
    def __init__(self, rots=(-30, 30), scales=(.75, 1.25)):
        assert (isinstance(rots, type(scales)))
        self.rots = rots
        self.scales = scales

    def __call__(self, sample):

        if type(self.rots) == tuple:
            # Continuous range of scales and rotations
            rot = (self.rots[1] - self.rots[0]) * random.random() - \
                  (self.rots[1] - self.rots[0])/2

            sc = (self.scales[1] - self.scales[0]) * random.random() - \
                 (self.scales[1] - self.scales[0]) / 2 + 1
        elif type(self.rots) == list:
            # Fixed range of scales and rotations
            rot = self.rots[random.randint(0, len(self.rots))]
            sc = self.scales[random.randint(0, len(self.scales))]

        for elem in sample.keys():
            if 'fname' in elem:
                continue

            tmp = sample[elem]

            h, w = tmp.shape[:2]
            center = (w / 2, h / 2)
            assert(center != 0)  # Strange behaviour warpAffine
            M = cv2.getRotationMatrix2D(center, rot, sc)

            if ((tmp == 0) | (tmp == 1)).all():
                flagval = cv2.INTER_NEAREST
            else:
                flagval = cv2.INTER_CUBIC
            tmp = cv2.warpAffine(tmp, M, (w, h), flags=flagval)

            sample[elem] = tmp

        return sample


class Resize(object):
    """Randomly resize the image and the ground truth to specified scales.
    Args:
        scales (list): the list of scales
    """
    def __init__(self, scales=[0.5, 0.8, 1]):
        self.scales = scales

    def __call__(self, sample):

        # Fixed range of scales
        sc = self.scales[random.randint(0, len(self.scales) - 1)]

        for elem in sample.keys():
            if 'fname' in elem:
                continue
            tmp = sample[elem]

            if tmp.ndim == 2:
                flagval = cv2.INTER_NEAREST
            else:
                flagval = cv2.INTER_CUBIC

            tmp = cv2.resize(tmp, None, fx=sc, fy=sc, interpolation=flagval)

            sample[elem] = tmp

        return sample


class RandomHorizontalFlip(object):
    """Horizontally flip the given image and ground truth randomly with a probability of 0.5."""

    def __call__(self, sample):

        if random.random() < 0.5:
            for elem in sample.keys():
                if 'fname' in elem:
                    continue
                tmp = sample[elem]
                tmp = cv2.flip(tmp, flipCode=1)
                sample[elem] = tmp

        return sample

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):

        for elem in sample.keys():
            if 'fname' in elem:
                continue
            tmp = sample[elem]

            if tmp.ndim == 2:
                tmp = tmp[:, :, np.newaxis]
            print('111111111111:', tmp.shape)

            # swap color axis because
            # numpy image: H x W x C
            # torch image: C X H X W

            tmp = tmp.transpose((2, 0, 1))
            sample[elem] = torch.from_numpy(tmp)

        return sample


================================================
FILE: dataloader/dataset_utils.py
================================================
def get_dataset_davis_youtube_ehem(args, split, image_transforms=None,
                                   target_transforms=None, augment=False,
                                   inputRes=None):

    from .davis2017_youtubevos_ehem import DAVISLoader as MyChosenDataset

    dataset = MyChosenDataset(args, split=split, transform=image_transforms,
                              target_transform=target_transforms,
                              augment=augment, inputRes=inputRes)
    return dataset


================================================
FILE: dataloader/davis2017.py
================================================
from __future__ import division

import torch
from torch.utils import data

import os
import cv2
import glob
import lmdb
import numpy as np
from PIL import Image
import os.path as osp
from scipy.misc import imresize
from matplotlib import pyplot as plt

from torchvision import transforms
from dataloader import custom_transforms as tr
from .base import Sequence, Annotation
from misc.config import cfg, phase, db_read_sequences

class DAVISLoader(data.Dataset):
    '''
    Dataset for DAVIS
    '''

    def __init__(self, args, split, inputRes, augment=False,
                 transform=None, target_transform=None):
        self._year = args.year
        self._phase = split
        self.transform = transform
        self.target_transform = target_transform
        self.inputRes = inputRes
        self.augment = augment
        self.augment_transform = None
        self._single_object = False

        assert args.year == "2017" or args.year == "2016"

        if augment:
            self.augment_transform = transforms.Compose([
                tr.RandomHorizontalFlip(),
                tr.ScaleNRotate(rots=(-args.rotation, args.rotation),
                                scales=(.75, 1.25))])

        self._db_sequences = db_read_sequences(args.year, self._phase)

        # Check lmdb existance. If not proceed with standard dataloader.
        lmdb_env_seq_dir = osp.join(cfg.PATH.DATA, 'lmdb_seq')
        lmdb_env_annot_dir = osp.join(cfg.PATH.DATA, 'lmdb_annot')

        if osp.isdir(lmdb_env_seq_dir) and osp.isdir(lmdb_env_annot_dir):
            lmdb_env_seq = lmdb.open(lmdb_env_seq_dir)
            lmdb_env_annot = lmdb.open(lmdb_env_annot_dir)
        else:
            lmdb_env_seq = None
            lmdb_env_annot = None
            print('LMDB not found. This could affect the data loading time. It is recommended to use LMDB.')

        self.sequences = [Sequence(self._phase, s.name, lmdb_env=lmdb_env_seq) for s in self._db_sequences]
        self._db_sequences = db_read_sequences(args.year, self._phase)

        # Load annotations
        self.annotations = [Annotation(self._phase,s.name, self._single_object, lmdb_env=lmdb_env_annot) for s in self._db_sequences]
        self._db_sequences = db_read_sequences(args.year, self._phase)

        # Load Videos
        self.videos = []
        for seq, s in zip(self.sequences, self._db_sequences):
            if s['set'] == self._phase:
                self.videos.append(s['name'])

        self.imagefiles = []
        self.maskfiles = []
        self.flowfiles = []
        self.edgefiles = []

        for _video in self.videos:
            imagefiles = sorted(glob.glob(os.path.join(cfg.PATH.SEQUENCES, _video, '*.jpg')))
            maskfiles = sorted(glob.glob(os.path.join(cfg.PATH.ANNOTATIONS, _video, '*.png')))
            flowfiles = sorted(glob.glob(os.path.join(cfg.PATH.FLOW, _video, '*.png')))
            edgefiles = sorted(glob.glob(os.path.join(cfg.PATH.ANNOTATIONS_EDGE, _video, '*.png')))

            self.imagefiles.extend(imagefiles[:-1])
            self.maskfiles.extend(maskfiles[:-1])
            self.flowfiles.extend(flowfiles)
            self.edgefiles.extend(edgefiles[:-1])

        print('images: ', len(self.imagefiles))
        print('masks: ', len(self.maskfiles))

        assert(len(self.imagefiles) == len(self.maskfiles) == len(self.flowfiles) == len(self.edgefiles))


    def __len__(self):
        return len(self.imagefiles)


    def __getitem__(self, index):
        imagefile = self.imagefiles[index]
        maskfile = self.maskfiles[index]
        flowfile = self.flowfiles[index]
        edgefile = self.edgefiles[index]

        image = Image.open(imagefile).convert('RGB')
        flow = Image.open(flowfile).convert('RGB')

        mask = cv2.imread(maskfile, 0)
        mask[mask > 0] = 255

        bdry = cv2.imread(edgefile, 0)

        #plt.imshow(bdry)
        #plt.show()

        mask = Image.fromarray(mask)
        bdry = Image.fromarray(bdry)

        if self.inputRes is not None:
            image = imresize(image, self.inputRes)
            flow = imresize(flow, self.inputRes)
            mask = imresize(mask, self.inputRes, interp='nearest')
            bdry = imresize(bdry, self.inputRes, interp='nearest')

        sample = {'image': image, 'flow': flow, 'mask': mask, 'bdry': bdry}

        if self.augment_transform is not None:
            sample = self.augment_transform(sample)

        image, flow, mask, bdry = sample['image'], sample['flow'], sample['mask'], sample['bdry']

        if self.transform is not None:
            image = self.transform(image)
            flow = self.transform(flow)

        if self.target_transform is not None:
            mask = self.target_transform(mask)
            bdry = self.target_transform(bdry)

        return image, flow, mask, bdry


================================================
FILE: dataloader/davis2017_youtubevos_ehem.py
================================================
from __future__ import division

from torch.utils import data

import os
import cv2
import glob
import lmdb
import numpy as np
from PIL import Image
import os.path as osp
from scipy.misc import imresize

from torchvision import transforms
from dataloader import custom_transforms as tr
from .base import Sequence, Annotation

from misc.config import cfg as cfg_davis
from misc.config_youtubeVOS import cfg as cfg_youtube
from misc.config import db_read_sequences as db_read_sequences_davis
from misc.config_youtubeVOS import db_read_sequences_train as db_read_sequences_train_youtube


class DAVISLoader(data.Dataset):
    '''
    Dataset for DAVIS
    '''

    def __init__(self, args, split, inputRes, augment=False,
                 transform=None, target_transform=None):
        self._year = args.year
        self._phase = split
        self.transform = transform
        self.target_transform = target_transform
        self.inputRes = inputRes
        self.augment = augment
        self.augment_transform = None
        self._single_object = False

        assert args.year == "2017" or args.year == "2016"

        if augment:
            self.augment_transform = transforms.Compose([
                tr.RandomHorizontalFlip(),
                tr.ScaleNRotate(rots=(-args.rotation, args.rotation),
                                scales=(.75, 1.25))])

        self.imagefiles = []
        self.maskfiles = []
        self.flowfiles = []
        self.edgefiles = []
        self.hedfiles = []

        if split == 'train':
            self.load_davis(args)
            self.load_youtubevos(args)
        else:
            self.load_davis(args)

    def __len__(self):
        return len(self.imagefiles)

    def __getitem__(self, index):
        imagefile = self.imagefiles[index]
        maskfile = self.maskfiles[index]
        flowfile = self.flowfiles[index]
        edgefile = self.edgefiles[index]
        hedfile = self.hedfiles[index]

        image = Image.open(imagefile).convert('RGB')
        flow = Image.open(flowfile).convert('RGB')

        mask = cv2.imread(maskfile, 0)
        mask[mask > 0] = 255

        bdry = cv2.imread(edgefile, 0)
        hed = cv2.imread(hedfile, 0)

        # enlarge the object mask
        kernel = np.ones((11, 11), np.uint8)  # use a large kernel
        dilated_mask = cv2.dilate(mask, kernel, iterations=1)
        inverse_dilated_mask = (255.0 - dilated_mask) / 255.0
        negative_pixels = hed * inverse_dilated_mask
        kernel = np.ones((5, 5), np.uint8)  # use a small kernel
        negative_pixels = cv2.dilate(negative_pixels, kernel, iterations=1)

        mask = Image.fromarray(mask)
        bdry = Image.fromarray(bdry)
        negative_pixels = Image.fromarray(negative_pixels)

        if self.inputRes is not None:
            image = imresize(image, self.inputRes)
            flow = imresize(flow, self.inputRes)
            mask = imresize(mask, self.inputRes, interp='nearest')
            bdry = imresize(bdry, self.inputRes, interp='nearest')
            negative_pixels = imresize(negative_pixels, self.inputRes,
                                       interp='nearest')

        sample = {'image': image, 'flow': flow, 'mask': mask, 'bdry': bdry,
                  'negative_pixels': negative_pixels}

        if self.augment_transform is not None:
            sample = self.augment_transform(sample)

        image, flow, mask, bdry, negative_pixels =\
            sample['image'], sample['flow'],\
            sample['mask'], sample['bdry'], sample['negative_pixels']

        if self.transform is not None:
            image = self.transform(image)
            flow = self.transform(flow)

        if self.target_transform is not None:
            mask = mask[:, :, np.newaxis]
            bdry = bdry[:, :, np.newaxis]
            negative_pixels = negative_pixels[:, :, np.newaxis]
            mask = self.target_transform(mask)
            bdry = self.target_transform(bdry)
            negative_pixels = self.target_transform(negative_pixels)

        return image, flow, mask, bdry, negative_pixels

    def load_youtubevos(self, args):
        self._db_sequences = db_read_sequences_train_youtube()

        # Check lmdb existance. If not proceed with standard dataloader.
        lmdb_env_seq_dir = osp.join(cfg_youtube.PATH.DATA, 'lmdb_seq')
        lmdb_env_annot_dir = osp.join(cfg_youtube.PATH.DATA, 'lmdb_annot')

        if osp.isdir(lmdb_env_seq_dir) and osp.isdir(lmdb_env_annot_dir):
            lmdb_env_seq = lmdb.open(lmdb_env_seq_dir)
            lmdb_env_annot = lmdb.open(lmdb_env_annot_dir)
        else:
            lmdb_env_seq = None
            lmdb_env_annot = None
            print('LMDB not found. This could affect the data loading time.'
                  ' It is recommended to use LMDB.')

        # Load sequences
        self.sequences = [Sequence(self._phase, s, lmdb_env=lmdb_env_seq)
                          for s in self._db_sequences]

        # Load sequences
        videos = []
        for seq, s in zip(self.sequences, self._db_sequences):
            videos.append(s)

        for _video in videos:
            imagefile = sorted(glob.glob(os.path.join(
                cfg_youtube.PATH.SEQUENCES_TRAIN, _video, '*.jpg')))
            maskfile = sorted(glob.glob(os.path.join(
                cfg_youtube.PATH.ANNOTATIONS_TRAIN, _video, '*.png')))
            flowfile = sorted(glob.glob(os.path.join(
                cfg_youtube.PATH.FLOW, _video, '*.png')))
            edgefile = sorted(glob.glob(os.path.join(
                cfg_youtube.PATH.ANNOTATIONS_TRAIN_EDGE, _video, '*.png')))
            hedfile = sorted(glob.glob(os.path.join(
                cfg_youtube.PATH.HED, _video, '*.jpg')))

            self.imagefiles.extend(imagefile[:-1:10])
            self.maskfiles.extend(maskfile[:-1:10])
            self.flowfiles.extend(flowfile[::10])
            self.edgefiles.extend(edgefile[:-1:10])
            self.hedfiles.extend(hedfile[:-1:10])

        print('images: ', len(self.imagefiles))
        print('masks: ', len(self.maskfiles))
        print('hed: ', len(self.hedfiles))
        print('flow: ', len(self.flowfiles))
        print('edge: ', len(self.edgefiles))

        assert(len(self.imagefiles) == len(self.maskfiles) ==
               len(self.flowfiles) == len(self.edgefiles) ==
               len(self.hedfiles))

    def load_davis(self, args):
        self._db_sequences = db_read_sequences_davis(args.year, self._phase)

        # Check lmdb existance. If not proceed with standard dataloader.
        lmdb_env_seq_dir = osp.join(cfg_davis.PATH.DATA, 'lmdb_seq')
        lmdb_env_annot_dir = osp.join(cfg_davis.PATH.DATA, 'lmdb_annot')

        if osp.isdir(lmdb_env_seq_dir) and osp.isdir(lmdb_env_annot_dir):
            lmdb_env_seq = lmdb.open(lmdb_env_seq_dir)
            lmdb_env_annot = lmdb.open(lmdb_env_annot_dir)
        else:
            lmdb_env_seq = None
            lmdb_env_annot = None
            print('LMDB not found. This could affect the data loading time.'
                  ' It is recommended to use LMDB.')

        self.sequences = [Sequence(self._phase, s.name, lmdb_env=lmdb_env_seq)
                          for s in self._db_sequences]
        self._db_sequences = db_read_sequences_davis(args.year, self._phase)

        # Load annotations
        self.annotations = [Annotation(
            self._phase, s.name, self._single_object, lmdb_env=lmdb_env_annot)
            for s in self._db_sequences]
        self._db_sequences = db_read_sequences_davis(args.year, self._phase)

        # Load Videos
        videos = []
        for seq, s in zip(self.sequences, self._db_sequences):
            if s['set'] == self._phase:
                videos.append(s['name'])

        for _video in videos:
            imagefile = sorted(glob.glob(os.path.join(
                cfg_davis.PATH.SEQUENCES, _video, '*.jpg')))
            maskfile = sorted(glob.glob(os.path.join(
                cfg_davis.PATH.ANNOTATIONS, _video, '*.png')))
            flowfile = sorted(glob.glob(os.path.join(
                cfg_davis.PATH.FLOW, _video, '*.png')))
            edgefile = sorted(glob.glob(os.path.join(
                cfg_davis.PATH.ANNOTATIONS_EDGE, _video, '*.png')))
            hedfile = sorted(glob.glob(os.path.join(
                cfg_davis.PATH.HED, _video, '*.jpg')))

            self.imagefiles.extend(imagefile[:-1])
            self.maskfiles.extend(maskfile[:-1])
            self.flowfiles.extend(flowfile)
            self.edgefiles.extend(edgefile[:-1])
            self.hedfiles.extend(hedfile[:-1])

        print('images: ', len(self.imagefiles))
        print('masks: ', len(self.maskfiles))
        print('hed: ', len(self.hedfiles))
        print('flow: ', len(self.flowfiles))
        print('edge: ', len(self.edgefiles))

        assert(len(self.imagefiles) == len(self.maskfiles) ==
               len(self.flowfiles) == len(self.edgefiles) ==
               len(self.hedfiles))

================================================
FILE: dataloader/hed_bsds.py
================================================
import os
from PIL import Image

from torch.utils import data

class HEDBSDSTrain(data.Dataset):
    def __init__(self, root_dir, transform=None, target_transform=None):
        self.transform = transform
        self.target_transform = target_transform

        train_pair = os.path.join(root_dir, 'train_pair.lst')
        with open(train_pair) as f:
            lines = f.readlines()

        self.imagefiles = []
        self.labelfiles = []
        for line in lines:
            splits = line.split()
            self.imagefiles.append(os.path.join(root_dir, splits[0]))
            self.labelfiles.append(os.path.join(root_dir, splits[1]))


    def __len__(self):
        return len(self.imagefiles)


    def __getitem__(self, index):
        imagefile = self.imagefiles[index]
        labelfile = self.labelfiles[index]

        image = Image.open(imagefile).convert('RGB')
        label = Image.open(labelfile).convert('L')

        if self.transform is not None:
            image = self.transform(image)

        if self.target_transform is not None:
            label = self.target_transform(label)

        return image, label


class HEDBSDSTest(data.Dataset):
    def __init__(self, root_dir, transform=None):
        self.transform = transform

        train_pair = os.path.join(root_dir, 'test.lst')
        with open(train_pair) as f:
            lines = f.readlines()

        self.imagefiles = []
        for line in lines:
            splits = line.split()
            self.imagefiles.append(os.path.join(root_dir, splits[0]))

    def __len__(self):
        return len(self.imagefiles)

    def __getitem__(self, index):
        imagefile = self.imagefiles[index]

        image = Image.open(imagefile).convert('RGB')

        if self.transform is not None:
            image = self.transform(image)

        return image, imagefile

================================================
FILE: dataloader/youtubeVOS.py
================================================
#TODO

from collections import namedtuple

import os
import cv2
import numpy as np

from PIL import Image
from .base_youtube import Sequence, SequenceClip, Annotation, AnnotationClip, BaseLoader, Segmentation, SequenceClip_simple, AnnotationClip_simple
from misc.config_youtubeVOS import cfg,phase,db_read_sequences_train,db_read_sequences_val, db_read_sequences_test, db_read_sequences_trainval
import os.path as osp
import glob
import lmdb

from scipy.misc import imresize

from torch.utils import data
from torchvision import transforms
from dataloader import custom_transforms as tr

class YoutubeVOSLoader(data.Dataset):
    def __init__(self,
                 args,
                 transform=None,
                 target_transform=None,
                 augment=False,
                 split = 'train',
                 inputRes = None):

        self._phase = split
        self._single_object = args.single_object
        self._length_clip = args.length_clip
        self.transform = transform
        self.target_transform = target_transform
        self.split = split
        self.inputRes = inputRes
        self.max_seq_len = args.gt_maxseqlen
        self.dataset = args.dataset
        self.flip = augment

        if augment:
            self.augment_transform = transforms.Compose([
                tr.RandomHorizontalFlip(),
                tr.ScaleNRotate(rots=(-args.rotation, args.rotation), scales=(.75, 1.25))])
        else:
            self.augment_transform = None

        if self._phase == phase.TRAIN.value:
            self._db_sequences = db_read_sequences_train()
        elif self._phase == phase.VAL.value:
            self._db_sequences = db_read_sequences_val()
        elif self._phase == phase.TRAINVAL.value:
            self._db_sequences = db_read_sequences_trainval()
        else: #self._phase == 'test':
            self._db_sequences = db_read_sequences_test()

        # Check lmdb existance. If not proceed with standard dataloader.
        lmdb_env_seq_dir = osp.join(cfg.PATH.DATA, 'lmdb_seq')
        lmdb_env_annot_dir = osp.join(cfg.PATH.DATA, 'lmdb_annot')

        if osp.isdir(lmdb_env_seq_dir) and osp.isdir(lmdb_env_annot_dir):
            lmdb_env_seq = lmdb.open(lmdb_env_seq_dir)
            lmdb_env_annot = lmdb.open(lmdb_env_annot_dir)
        else:
            lmdb_env_seq = None
            lmdb_env_annot = None
            print('LMDB not found. This could affect the data loading time. It is recommended to use LMDB.')

        # Load sequences
        self.sequences = [Sequence(self._phase, s, lmdb_env=lmdb_env_seq) for s in self._db_sequences]

        # Load annotations
        self.annotations = [Annotation(self._phase,s,self._single_object, lmdb_env=lmdb_env_annot) for s in self._db_sequences]

        # Load sequences
        self.videos = []
        for seq, s in zip(self.sequences, self._db_sequences):
            self.videos.append(s)

        self.imagefiles = []
        self.maskfiles = []

        for _video in self.videos:
            imagefiles = sorted(glob.glob(os.path.join(cfg.PATH.SEQUENCES_TRAIN, _video, '*.jpg')))
            maskfiles = sorted(glob.glob(os.path.join(cfg.PATH.ANNOTATIONS_TRAIN, _video, '*.png')))

            self.imagefiles.extend(imagefiles)
            self.maskfiles.extend(maskfiles)
        print('images: ', len(self.imagefiles))
        print('masks: ', len(self.maskfiles))

    def __len__(self):
        return len(self.imagefiles)

    def __getitem__(self, index):
        imagefile = self.imagefiles[index]
        maskfile = self.maskfiles[index]

        image = Image.open(imagefile).convert('RGB')
        mask = cv2.imread(maskfile, 0)
        mask[mask > 0] = 255
        mask = Image.fromarray(mask)

        if self.inputRes is not None:
            image = imresize(image, self.inputRes)
            mask = imresize(mask, self.inputRes, interp='nearest')

        sample = {'image': image, 'gt': mask}

        if self.augment_transform is not None:
            sample = self.augment_transform(sample)

        image, mask = sample['image'], sample['gt']
        if self.transform is not None:
            image = self.transform(image)
        if self.target_transform is not None:
            mask = self.target_transform(mask)

        return image, mask

================================================
FILE: dataset_lmdb_generator.py
================================================
# This script generates a data structure in the form of key-value storage. This is made in the huge amount of 
# calls to the function os.listdir inside base_youtube.py

import os
import lmdb

from args import get_parser


class LMDBGenerator:
    def __init__(self, ext='.jpg', gen_type='seq'):
        self.ext = ext
        self.gen_type = gen_type

    def generate_lmdb_file(self, root_dir, frames_dir):
        env = lmdb.open(os.path.join(root_dir, 'lmdb_' + self.gen_type))
        root_in_dirs = os.listdir(frames_dir)

        for d in root_in_dirs:
            folder_dir = os.path.join(frames_dir, d)

            _files_basename = sorted([f for f in os.listdir(folder_dir) if f.endswith(self.ext)])
            files_str_vec = '|'.join(_files_basename)

            print( "Generating lmdb for: " + folder_dir)
            with env.begin(write=True) as txn:
                txn.put(d.encode('ascii'), files_str_vec.encode())


if __name__ == "__main__":
    parser = get_parser()
    args = parser.parse_args()
    
    if args.dataset == 'youtube':
        from misc.config_youtubeVOS import cfg

        frame_lmdb_generator_sequences = LMDBGenerator(ext='.jpg', gen_type='seq')
        frame_lmdb_generator_sequences.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.SEQUENCES_TRAIN)
        frame_lmdb_generator_sequences.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.SEQUENCES_TEST)
    
        frame_lmdb_generator_annotations = LMDBGenerator(ext='.png', gen_type='annot')
        frame_lmdb_generator_annotations.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.ANNOTATIONS_TRAIN)
        frame_lmdb_generator_annotations.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.ANNOTATIONS_TEST)
        
    else:
        from misc.config import cfg

        frame_lmdb_generator_sequences = LMDBGenerator(ext='.jpg', gen_type='seq')
        frame_lmdb_generator_sequences.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.SEQUENCES)
    
        frame_lmdb_generator_annotations = LMDBGenerator(ext='.png', gen_type='annot')
        frame_lmdb_generator_annotations.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.ANNOTATIONS)


================================================
FILE: measures/__init__.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from DAVIS 2016 (Federico Perazzi)
# ----------------------------------------------------------------------------

from .jaccard     import db_eval_iou
from .f_boundary  import db_eval_boundary
from misc import log

try:
  from .t_stability import db_eval_t_stab
except:
  log.warning("Temporal stability not available")

from .statistics import _statistics


================================================
FILE: measures/f_boundary.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from DAVIS 2016 (Federico Perazzi)
# ----------------------------------------------------------------------------

import numpy as np

""" Utilities for computing, reading and saving benchmark evaluation."""

def db_eval_boundary(foreground_mask,gt_mask,bound_th=0.008):
	"""
	Compute mean,recall and decay from per-frame evaluation.
	Calculates precision/recall for boundaries between foreground_mask and
	gt_mask using morphological operators to speed it up.
	Arguments:
		foreground_mask (ndarray): binary segmentation image.
		gt_mask         (ndarray): binary annotated image.
	Returns:
		F (float): boundaries F-measure
		P (float): boundaries precision
		R (float): boundaries recall
	"""
	assert np.atleast_3d(foreground_mask).shape[2] == 1

	bound_pix = bound_th if bound_th >= 1 else \
			np.ceil(bound_th*np.linalg.norm(foreground_mask.shape))

	# Get the pixel boundaries of both masks
	fg_boundary = seg2bmap(foreground_mask);
	gt_boundary = seg2bmap(gt_mask);

	from skimage.morphology import binary_dilation,disk

	fg_dil = binary_dilation(fg_boundary,disk(bound_pix))
	gt_dil = binary_dilation(gt_boundary,disk(bound_pix))

	# Get the intersection
	gt_match = gt_boundary * fg_dil
	fg_match = fg_boundary * gt_dil

	# Area of the intersection
	n_fg     = np.sum(fg_boundary)
	n_gt     = np.sum(gt_boundary)

	#% Compute precision and recall
	if n_fg == 0 and  n_gt > 0:
		precision = 1
		recall = 0
	elif n_fg > 0 and n_gt == 0:
		precision = 0
		recall = 1
	elif n_fg == 0  and n_gt == 0:
		precision = 1
		recall = 1
	else:
		precision = np.sum(fg_match)/float(n_fg)
		recall    = np.sum(gt_match)/float(n_gt)

	# Compute F measure
	if precision + recall == 0:
		F = 0
	else:
		F = 2*precision*recall/(precision+recall);

	return F

def seg2bmap(seg,width=None,height=None):
	"""
	From a segmentation, compute a binary boundary map with 1 pixel wide
	boundaries.  The boundary pixels are offset by 1/2 pixel towards the
	origin from the actual segment boundary.
	Arguments:
		seg     : Segments labeled from 1..k.
		width	  :	Width of desired bmap  <= seg.shape[1]
		height  :	Height of desired bmap <= seg.shape[0]
	Returns:
		bmap (ndarray):	Binary boundary map.
	 David Martin <dmartin@eecs.berkeley.edu>
	 January 2003
 """

	seg = seg.astype(np.bool)
	seg[seg>0] = 1

	assert np.atleast_3d(seg).shape[2] == 1

	width  = seg.shape[1] if width  is None else width
	height = seg.shape[0] if height is None else height

	h,w = seg.shape[:2]

	ar1 = float(width) / float(height)
	ar2 = float(w) / float(h)

	assert not (width>w | height>h | abs(ar1-ar2)>0.01),\
			'Can''t convert %dx%d seg to %dx%d bmap.'%(w,h,width,height)

	e  = np.zeros_like(seg)
	s  = np.zeros_like(seg)
	se = np.zeros_like(seg)

	e[:,:-1]    = seg[:,1:]
	s[:-1,:]    = seg[1:,:]
	se[:-1,:-1] = seg[1:,1:]

	b        = seg^e | seg^s | seg^se
	b[-1,:]  = seg[-1,:]^e[-1,:]
	b[:,-1]  = seg[:,-1]^s[:,-1]
	b[-1,-1] = 0

	if w == width and h == height:
		bmap = b
	else:
		bmap = np.zeros((height,width))
		for x in range(w):
			for y in range(h):
				if b[y,x]:
					j = 1+floor((y-1)+height / h)
					i = 1+floor((x-1)+width  / h)
					bmap[j,i] = 1;

	return bmap

================================================
FILE: measures/jaccard.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from DAVIS 2016 (Federico Perazzi)
# ----------------------------------------------------------------------------

""" Compute Jaccard Index. """

import numpy as np
import matplotlib.pyplot as plt


def db_eval_iou_multi(annotations, segmentations):
    iou = 0.0
    batch_size = annotations.shape[0]

    for i in range(batch_size):
        annotation = annotations[i, 0, :, :]
        segmentation = segmentations[i, 0, :, :]

        iou += db_eval_iou(annotation, segmentation)

    iou /= batch_size
    return iou


def db_eval_iou(annotation,segmentation):

    """ Compute region similarity as the Jaccard Index.
    Arguments:
        annotation   (ndarray): binary annotation   map.
        segmentation (ndarray): binary segmentation map.
    Return:
        jaccard (float): region similarity
 """

    annotation = annotation > 0.5
    segmentation = segmentation > 0.5

    if np.isclose(np.sum(annotation), 0) and\
            np.isclose(np.sum(segmentation), 0):
        return 1
    else:
        return np.sum((annotation & segmentation)) / \
                np.sum((annotation | segmentation), dtype=np.float32)

================================================
FILE: measures/statistics.py
================================================
import numpy as np
import warnings

def mean(X):
  """
  Compute average ignoring NaN values.
  """

  return np.nanmean(X)

def recall(X,threshold=0.5):
  """
  Fraction of values of X scoring higher than 'threshold'
  """
  return mean(np.array(X)>threshold)

def decay(X,n_bins=4):
  """
  Performance loss over time.
  """

  ids = np.round(np.linspace(1,len(X),n_bins+1)+1e-10)-1;
  ids = ids.astype(np.uint8)

  D_bins = [X[ids[i]:ids[i+1]+1] for i in range(0,4)]

  with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    D = np.nanmean(D_bins[0])-np.mean(D_bins[3])
  return D

def std(X):
  """
  Compute standard deviation.
  """
  return np.std(X)

_statistics = {
      'decay' : decay,
      'mean'  : mean,
      'recall': recall,
      'std'   : std
      }

def get(name):
  return _statistics[name]


================================================
FILE: measures/t_stability.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from DAVIS 2016 (Federico Perazzi)
# ----------------------------------------------------------------------------

import sys
import numpy as np
import scipy.spatial.distance as ssd
from tstab import *

def get_bijective_pairs(pairs,costmat):
	bij_pairs = bij_pairs_one_dim(pairs, costmat,0)
	bij_pairs = bij_pairs_one_dim(bij_pairs, costmat.T,1)
	return bij_pairs

def bij_pairs_one_dim(pairs, costmat, left_or_right):

	bij_pairs = []
	ids1      = np.unique(pairs[:,left_or_right])

	for ii in range(len(ids1)):
		curr_pairs = pairs[pairs[:,left_or_right]==ids1[ii],:].astype(np.int)
		curr_costs = costmat[curr_pairs[:,left_or_right], curr_pairs[:,1-left_or_right]]
		b = np.argmin(curr_costs)
		bij_pairs.append(curr_pairs[b])

	return np.array(bij_pairs)

def hist_cost_2(BH1,BH2):

	nsamp1,nbins=BH1.shape
	nsamp2,nbins=BH2.shape

	eps  = 2.2204e-16
	BH1n = BH1 / (np.sum(BH1,axis=1,keepdims=True)+eps)
	BH2n = BH2 / (np.sum(BH2,axis=1,keepdims=True)+eps)

	tmp1 = np.tile(np.transpose(np.atleast_3d(BH1n),[0,2,1]),(1,nsamp2,1))
	tmp2 = np.tile(np.transpose(np.atleast_3d(BH2n.T),[2,1,0]),(nsamp1,1,1))
	HC = 0.5*np.sum((tmp1-tmp2)**2/(tmp1+tmp2+eps),axis=2)

	return HC

def sc_compute(Bsamp,Tsamp,mean_dist,nbins_theta,nbins_r,r_inner,r_outer,out_vec):
	in_vec = (out_vec==0).ravel()
	nsamp = Bsamp.shape[1]
	r_array=ssd.squareform(ssd.pdist(Bsamp.T)).T

	theta_array_abs0=Bsamp[1,:].reshape(-1,1).dot(np.ones((1,nsamp))) - \
			np.ones((nsamp,1)).dot(Bsamp[1,:].reshape(1,-1))

	theta_array_abs1=Bsamp[0,:].reshape(-1,1).dot(np.ones((1,nsamp))) - \
			np.ones((nsamp,1)).dot(Bsamp[0,:].reshape(1,-1))

	theta_array_abs = np.arctan2(theta_array_abs0,theta_array_abs1).T
	theta_array=theta_array_abs-Tsamp.T.dot(np.ones((1,nsamp)))

	if mean_dist is None:
		mean_dist = np.mean(r_array[in_vec].T[in_vec].T)

	r_array_n = r_array / mean_dist

	r_bin_edges=np.logspace(np.log10(r_inner),np.log10(r_outer),nbins_r)
	r_array_q=np.zeros((nsamp,nsamp))

	for m in range(int(nbins_r)):
		r_array_q=r_array_q+(r_array_n<r_bin_edges[m])

	fz = r_array_q > 0
	theta_array_2 = np.fmod(np.fmod(theta_array,2*np.pi)+2*np.pi,2*np.pi)
	theta_array_q = 1+np.floor(theta_array_2/(2*np.pi/nbins_theta))

	nbins=nbins_theta*nbins_r
	BH=np.zeros((nsamp,nbins))
	count = 0
	for n in range(nsamp):
		fzn=fz[n]&in_vec
		Sn = np.zeros((nbins_theta,nbins_r))
		coords = np.hstack((theta_array_q[n,fzn].reshape(-1,1),
			r_array_q[n,fzn].astype(np.int).reshape(-1,1)))

		# SLOW...
		#for i,j in coords:
			#Sn[i-1,j-1] += 1

		# FASTER
		ids = np.ravel_multi_index((coords.T-1).astype(np.int),Sn.shape)
		Sn  = np.bincount(ids.ravel(),minlength = np.prod(Sn.shape)).reshape(Sn.shape)


		BH[n,:] = Sn.T[:].ravel()

	return BH.astype(np.int),mean_dist

def db_eval_t_stab(fgmask,ground_truth,timing=True):
	"""
	Calculates the temporal stability index between two masks
	Arguments:
					fgmask (ndarray):  Foreground Object mask at frame t
		ground_truth (ndarray):  Foreground Object mask at frame t+1
	Return:
							 T (ndarray):  Temporal (in-)stability
	   raw_results (ndarray):  Supplemental values
	"""

	cont_th = 3
	cont_th_up = 3

	# Shape context parameters
	r_inner     = 1.0/8.0
	r_outer     = 2.0
	nbins_r     = 5.0
	nbins_theta = 12.0

	poly1 = mask2poly(fgmask,cont_th)
	poly2 = mask2poly(ground_truth,cont_th)

	if len(poly1.contour_coords) == 0 or \
			len(poly2.contour_coords) == 0:
		return np.nan

	Cs1 = get_longest_cont(poly1.contour_coords)
	Cs2 = get_longest_cont(poly2.contour_coords)

	upCs1 = contour_upsample(Cs1,cont_th_up)
	upCs2 = contour_upsample(Cs2,cont_th_up)

	scs1,_=sc_compute(upCs1.T,np.zeros((1,upCs1.shape[0])),None,
			nbins_theta,nbins_r,r_inner,r_outer,np.zeros((1,upCs1.shape[0])))

	scs2,_=sc_compute(upCs2.T,np.zeros((1,upCs2.shape[0])),None,
			nbins_theta,nbins_r,r_inner,r_outer,np.zeros((1,upCs2.shape[0])))

	# Match with the 0-0 alignment
	costmat              = hist_cost_2(scs1,scs2)
	pairs ,max_sx,max_sy = match_dijkstra(np.ascontiguousarray(costmat))


	# Shift costmat
	costmat2 = np.roll(costmat ,-(max_sy+1),axis=1)
	costmat2 = np.roll(costmat2,-(max_sx+1),axis=0)

	# Redo again with the correct alignment
	pairs,_,_ = match_dijkstra(costmat2)

	# Put the pairs back to the original place
	pairs[:,0] = np.mod(pairs[:,0]+max_sx+1, costmat.shape[0])
	pairs[:,1] = np.mod(pairs[:,1]+max_sy+1, costmat.shape[1])

	pairs = get_bijective_pairs(pairs,costmat)

	pairs_cost = costmat[pairs[:,0], pairs[:,1]]
	min_cost   = np.average(pairs_cost)

	return min_cost

================================================
FILE: misc/__init__.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from DAVIS 2016 (Federico Perazzi)
# ----------------------------------------------------------------------------

from .timer  import Timer
from .logger import logging as log
from .config import phase,cfg,db_read_sequences
from .visual import overlay


================================================
FILE: misc/config.py
================================================
#!/usr/bin/env python
import yaml
import os.path as osp
import numpy as np
from easydict import EasyDict as edict

from enum import Enum

# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from FAST-RCNN (Ross Girshick)
# ----------------------------------------------------------------------------

""" Configuration file."""


class phase(Enum):
    TRAIN = 'train'
    VAL = 'val'
    TESTDEV = 'test-dev'
    TRAINVAL = 'train-val'


__C = edict()

# Public access to configuration settings
cfg = __C

# Number of CPU cores used to parallelize evaluation.
__C.N_JOBS = 32

# Paths to dataset folders
__C.PATH = edict()

# Dataset resolution: ("480p","1080p")
__C.RESOLUTION="480p"

# Dataset year: ("2016","2017")
__C.YEAR = "2017"

__C.PHASE = phase.VAL

# Multiobject evaluation (Set to False only when evaluating DAVIS 2016)
__C.MULTIOBJECT = True

# Root folder of project
__C.PATH.ROOT = osp.abspath('.')

# Data folder
__C.PATH.DATA = osp.abspath('./data/DAVIS2017')

# Path to input images
__C.PATH.SEQUENCES = osp.join(__C.PATH.DATA, "JPEGImages", __C.RESOLUTION)

# Path to annotations
__C.PATH.ANNOTATIONS = osp.join(__C.PATH.DATA, "Annotations", __C.RESOLUTION)

__C.PATH.ANNOTATIONS_EDGE = osp.join(__C.PATH.DATA, "Annotations_edge",
                                     __C.RESOLUTION)

__C.PATH.FLOW = osp.join(__C.PATH.DATA, 'davis2017-flow/')

__C.PATH.HED = osp.join(__C.PATH.DATA, 'davis2017-hed/')

# Color palette
__C.PATH.PALETTE = osp.abspath(osp.join(__C.PATH.ROOT, 'data/palette.txt'))

# Paths to files
__C.FILES = edict()

# Path to property file, holding information on evaluation sequences.
__C.FILES.DB_INFO = osp.abspath(osp.join(__C.PATH.ROOT,
                                         "data/db_info.yaml"))

# Measures and Statistics
__C.EVAL = edict()

# Metrics: J: region similarity, F: contour accuracy, T: temporal stability
__C.EVAL.METRICS = ['J', 'F']

# Statistics computed for each of the metrics listed above
__C.EVAL.STATISTICS = ['mean', 'recall', 'decay']


def db_read_info():
    """ Read dataset properties from file."""
    with open(cfg.FILES.DB_INFO, 'r') as f:
        return edict(yaml.load(f))


def db_read_attributes():
    """ Read list of sequences. """
    return db_read_info().attributes


def db_read_years():
    """ Read list of sequences. """
    return db_read_info().years


def db_read_sequences(year=None,db_phase=None):
    """ Read list of sequences. """

    sequences = db_read_info().sequences

    if year is not None:
        sequences = filter(
            lambda s: int(s.year) <= int(year), sequences)

    if db_phase is not None:
        if db_phase == phase.TRAINVAL:
            sequences = filter(lambda s: ((s.set == phase.VAL) or
                                          (s.set == phase.TRAIN)), sequences)
        else:
            sequences = filter(lambda s: s.set == db_phase, sequences)
    return sequences


# Load all sequences
__C.SEQUENCES = dict([(sequence.name, sequence)
                      for sequence in db_read_sequences()])

__C.palette = np.loadtxt(__C.PATH.PALETTE, dtype=np.uint8).reshape(-1, 3)


================================================
FILE: misc/config_youtubeVOS.py
================================================
#!/usr/bin/env python
import json
import os.path as osp
from easydict import EasyDict as edict
from enum import Enum
import numpy as np

""" Configuration file."""


class phase(Enum):
    TRAIN = 'train'
    VAL = 'val'
    TESTDEV = 'test-dev'
    TRAINVAL = 'trainval'


__C = edict()

# Public access to configuration settings
cfg = __C

# Number of CPU cores used to parallelize evaluation.
__C.N_JOBS = 32

# Paths to dataset folders
__C.PATH = edict()

__C.PHASE = phase.TRAIN

# Multiobject evaluation (Set to False only when evaluating DAVIS 2016)
__C.MULTIOBJECT = True

# Root folder of project
__C.PATH.ROOT = osp.abspath('.')

# Data folder
__C.PATH.DATA = osp.abspath('./data/YouTubeVOS_2018')


# Path to input images
__C.PATH.SEQUENCES_TRAIN = osp.join(__C.PATH.DATA, phase.TRAIN.value,
                                    "JPEGImages")
__C.PATH.SEQUENCES_VAL = osp.join(__C.PATH.DATA, phase.TRAIN.value,
                                  "JPEGImages")
__C.PATH.SEQUENCES_TRAINVAL = osp.join(__C.PATH.DATA, phase.TRAIN.value,
                                       "JPEGImages")
__C.PATH.SEQUENCES_TEST = osp.join(__C.PATH.DATA, phase.VAL.value,
                                   "JPEGImages")

# Path to annotations
__C.PATH.ANNOTATIONS_TRAIN = osp.join(__C.PATH.DATA, phase.TRAIN.value,
                                      "Annotations")
__C.PATH.ANNOTATIONS_VAL = osp.join(__C.PATH.DATA, phase.TRAIN.value,
                                    "Annotations")
__C.PATH.ANNOTATIONS_TRAINVAL = osp.join(__C.PATH.DATA, phase.TRAIN.value,
                                         "Annotations")
__C.PATH.ANNOTATIONS_TEST = osp.join(__C.PATH.DATA, phase.VAL.value,
                                     "Annotations")

__C.PATH.FLOW = osp.join(__C.PATH.DATA, 'YouTubeVOS_2018-flow/')
__C.PATH.HED = osp.join(__C.PATH.DATA, 'YouTubeVOS_2018-hed/')
__C.PATH.ANNOTATIONS_TRAIN_EDGE = osp.join(__C.PATH.DATA, phase.TRAIN.value,
                                           "Annotations_edge")

__C.PATH.PALETTE = osp.abspath(osp.join(__C.PATH.ROOT, 'data/palette.txt'))

# Paths to files
__C.FILES = edict()

# Path to property file, holding information on evaluation sequences.
__C.FILES.DB_INFO_TRAIN = osp.abspath(
    osp.join(__C.PATH.DATA, phase.TRAIN.value, "train-train-meta.json"))
__C.FILES.DB_INFO_VAL = osp.abspath(
    osp.join(__C.PATH.DATA, phase.TRAIN.value, "train-val-meta.json"))
__C.FILES.DB_INFO_TRAINVAL = osp.abspath(
    osp.join(__C.PATH.DATA, phase.TRAIN.value, "meta.json"))
__C.FILES.DB_INFO_TEST = osp.abspath(
    osp.join(__C.PATH.DATA, phase.VAL.value, "meta.json"))

# Measures and Statistics
__C.EVAL = edict()

# Metrics: J: region similarity, F: contour accuracy, T: temporal stability
__C.EVAL.METRICS = ['J', 'F']

# Statistics computed for each of the metrics listed above
__C.EVAL.STATISTICS = ['mean', 'recall', 'decay']


def db_read_sequences_train():
    """ Read list of sequences. """

    json_data = open(__C.FILES.DB_INFO_TRAIN)
    data = json.load(json_data)
    sequences = data['videos'].keys()
  
    return sequences


def db_read_sequences_val():
    """ Read list of sequences. """

    json_data = open(__C.FILES.DB_INFO_VAL)
    data = json.load(json_data)
    sequences = data['videos'].keys()
  
    return sequences


def db_read_sequences_trainval():
    """ Read list of sequences. """

    json_data = open(__C.FILES.DB_INFO_TRAINVAL)
    data = json.load(json_data)
    sequences = data['videos'].keys()
  
    return sequences


def db_read_sequences_test():
    """ Read list of sequences. """

    json_data = open(__C.FILES.DB_INFO_TEST)
    data = json.load(json_data)
    sequences = data['videos'].keys()

    return sequences


# Load all sequences
__C.SEQUENCES_TRAIN = db_read_sequences_train()
__C.SEQUENCES_VAL = db_read_sequences_val()
__C.SEQUENCES_TRAINVAL = db_read_sequences_trainval()
__C.SEQUENCES_TEST = db_read_sequences_test()

__C.palette = np.loadtxt(__C.PATH.PALETTE, dtype=np.uint8).reshape(-1, 3)


================================================
FILE: misc/io_aux.py
================================================
from PIL import Image
import numpy as np

from misc.config import cfg

def imread_indexed(filename):
  """ Load image given filename."""

  im = Image.open(filename)

  annotation = np.atleast_3d(im)[...,0]
  return annotation,np.array(im.getpalette()).reshape((-1,3))

def imwrite_indexed(filename,array,color_palette=cfg.palette):
  """ Save indexed png."""

  if np.atleast_3d(array).shape[2] != 1:
    raise Exception("Saving indexed PNGs requires 2D array.")

  im = Image.fromarray(array)
  im.putpalette(color_palette.ravel())
  im.save(filename, format='PNG')

================================================
FILE: misc/logger.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from DAVIS 2016 (Federico Perazzi)
# ----------------------------------------------------------------------------

import logging

FORMAT = '[%(levelname)s]%(asctime)-15s %(message)s'
logging.basicConfig(
  level=logging.INFO,format=FORMAT,datefmt='[%d-%m-%Y %H:%M:%S]')



================================================
FILE: misc/parallel.py
================================================
try:
    from joblib import Parallel, delayed
except Exception as e:
    print(e) #TODO
    # if joblib does not exist just run it in a single thread
    delayed = lambda x: x
    def Parallel( *args, **kwargs ):
        return list

# Allow pickling member functions
def _pickle_method(method):
    func_name = method.__name__
    obj = method.__self__
    return _unpickle_method, (func_name, obj)

def _unpickle_method(func_name, obj):
    try:
        return obj.__getattribute__(func_name)
    except AttributeError:
        return None

#The copyreg module offers a way to define functions used while pickling specific objects
# Requires to install future module
import types
try:
    import copyreg
except:
    import six.moves.copyreg as copyreg

copyreg.pickle(types.MethodType, _pickle_method, _unpickle_method)

================================================
FILE: misc/timer.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from FAST-RCNN (Ross Girshick)
# ----------------------------------------------------------------------------

"""
  A simple wrapper to the built-in python timer.
"""

import time

class Timer(object):
  """docstring for Timer"""
  def __init__(self):
    super(Timer, self).__init__()
    self._start = None

  def tic(self):
    self._start = time.time()
    return self

  def toc(self):
    assert self._start != None,\
        'Timer uninitialized. Call "toc()" first.'
    return time.time() - self._start


================================================
FILE: misc/visual.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# ----------------------------------------------------------------------------

import skimage
import numpy as np


def overlay(image, mask, colors=[255, 0, 0], cscale=2, alpha=0.4):
    """ Overlay segmentation on top of RGB image. """

    colors = np.atleast_2d(colors) * cscale

    im_overlay = image.copy()
    object_ids = np.unique(mask)

    for object_id in object_ids[1:]:
        # Overlay color on  binary mask

        foreground = image*alpha + np.ones(image.shape)*(1-alpha) * np.array(colors[object_id])
        binary_mask = mask == object_id

        # Compose image
        im_overlay[binary_mask] = foreground[binary_mask]

        countours = skimage.morphology.binary.binary_dilation(binary_mask) - binary_mask
        im_overlay[countours, :] = 0

    return im_overlay.astype(image.dtype)


================================================
FILE: modules/MATNet.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models


class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()

        resnet_im = models.resnet101(pretrained=True)
        self.conv1_1 = resnet_im.conv1
        self.bn1_1 = resnet_im.bn1
        self.relu_1 = resnet_im.relu
        self.maxpool_1 = resnet_im.maxpool

        self.res2_1 = resnet_im.layer1
        self.res3_1 = resnet_im.layer2
        self.res4_1 = resnet_im.layer3
        self.res5_1 = resnet_im.layer4

        resnet_fl = models.resnet101(pretrained=True)
        self.conv1_2 = resnet_fl.conv1
        self.bn1_2 = resnet_fl.bn1
        self.relu_2 = resnet_fl.relu
        self.maxpool_2 = resnet_fl.maxpool

        self.res2_2 = resnet_fl.layer1
        self.res3_2 = resnet_fl.layer2
        self.res4_2 = resnet_fl.layer3
        self.res5_2 = resnet_fl.layer4

        self.gated_res2 = Gated(256*2)
        self.gated_res3 = Gated(512*2)
        self.gated_res4 = Gated(1024*2)
        self.gated_res5 = Gated(2048*2)

        self.coa_res3 = CoAttention(channel=512)
        self.coa_res4 = CoAttention(channel=1024)
        self.coa_res5 = CoAttention(channel=2048)

    def forward_res2(self, f1, f2):
        x1 = self.conv1_1(f1)
        x1 = self.bn1_1(x1)
        x1 = self.relu_1(x1)
        x1 = self.maxpool_1(x1)
        r2_1 = self.res2_1(x1)

        x2 = self.conv1_2(f2)
        x2 = self.bn1_2(x2)
        x2 = self.relu_2(x2)
        x2 = self.maxpool_2(x2)
        r2_2 = self.res2_2(x2)

        return r2_1, r2_2

    def forward(self, f1, f2):
        r2_1, r2_2 = self.forward_res2(f1, f2)
        r2 = torch.cat([r2_1, r2_2], dim=1)

        # res3
        r3_1 = self.res3_1(r2_1)
        r3_2 = self.res3_2(r2_2)

        Za, Zb, Qa, Qb = self.coa_res3(r3_1, r3_2)
        r3_1 = F.relu(Zb + r3_1)
        r3_2 = F.relu(Qb + r3_2)
        r3 = torch.cat([r3_1, r3_2], dim=1)

        # res4
        r4_1 = self.res4_1(r3_1)
        r4_2 = self.res4_2(r3_2)

        Za, Zb, Qa, Qb = self.coa_res4(r4_1, r4_2)
        r4_1 = F.relu(Zb + r4_1)
        r4_2 = F.relu(Qb + r4_2)
        r4 = torch.cat([r4_1, r4_2], dim=1)

        # res5
        r5_1 = self.res5_1(r4_1)
        r5_2 = self.res5_2(r4_2)

        Za, Zb, Qa, Qb = self.coa_res5(r5_1, r5_2)
        r5_1 = F.relu(Zb + r5_1)
        r5_2 = F.relu(Qb + r5_2)
        r5 = torch.cat([r5_1, r5_2], dim=1)

        r5_gated = self.gated_res5(r5)
        r4_gated = self.gated_res4(r4)
        r3_gated = self.gated_res3(r3)
        r2_gated = self.gated_res2(r2)

        return r5_gated, r4_gated, r3_gated, r2_gated


class CoAttention(nn.Module):
    def __init__(self, channel):
        super(CoAttention, self).__init__()

        d = channel // 16
        self.proja = nn.Conv2d(channel, d, kernel_size=1)
        self.projb = nn.Conv2d(channel, d, kernel_size=1)

        self.bottolneck1 = nn.Sequential(
                nn.Conv2d(channel, channel, kernel_size=1),
                nn.BatchNorm2d(channel),
                nn.ReLU(inplace=True),
                )

        self.bottolneck2 = nn.Sequential(
                nn.Conv2d(channel, channel, kernel_size=1),
                nn.BatchNorm2d(channel),
                nn.ReLU(inplace=True),
                )

        self.proj1 = nn.Conv2d(channel, 1, kernel_size=1)
        self.proj2 = nn.Conv2d(channel, 1, kernel_size=1)

        self.bna = nn.BatchNorm2d(channel)
        self.bnb = nn.BatchNorm2d(channel)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, Qa, Qb):
        # cascade 1
        Qa_1, Qb_1 = self.forward_sa(Qa, Qb)
        _, Zb = self.forward_co(Qa_1, Qb_1)

        Pa = F.relu(Zb + Qa)
        Pb = F.relu(Qb_1 + Qb)

        # cascade 2
        Qa_2, Qb_2 = self.forward_sa(Pa, Pb)
        _, Zb = self.forward_co(Qa_2, Qb_2)

        Pa = F.relu(Zb + Pa)
        Pb = F.relu(Qb_2 + Pb)

        # cascade 3
        Qa_3, Qb_3 = self.forward_sa(Pa, Pb)
        Za, Zb = self.forward_co(Qa_3, Qb_3)

        Pa = F.relu(Zb + Pa)
        Pb = F.relu(Qb_3 + Pb)

        # cascade 4
        Qa_4, Qb_4 = self.forward_sa(Pa, Pb)
        Za, Zb = self.forward_co(Qa_4, Qb_4)

        Pa = F.relu(Zb + Pa)
        Pb = F.relu(Qb_4 + Pb)

        # cascade 5
        Qa_5, Qb_5 = self.forward_sa(Pa, Pb)
        Za, Zb = self.forward_co(Qa_5, Qb_5)

        return Za, Zb, Qa_5, Qb_5

    def forward_sa(self, Qa, Qb):
        Aa = self.proj1(Qa)
        Ab = self.proj2(Qb)

        n, c, h, w = Aa.shape
        Aa = Aa.view(-1, h*w)
        Ab = Ab.view(-1, h*w)

        Aa = F.softmax(Aa)
        Ab = F.softmax(Ab)

        Aa = Aa.view(n, c, h, w)
        Ab = Ab.view(n, c, h, w)

        Qa_attened = Aa * Qa
        Qb_attened = Ab * Qb

        return Qa_attened, Qb_attened

    def forward_co(self, Qa, Qb):
        Qa_low = self.proja(Qa)
        Qb_low = self.projb(Qb)

        N, C, H, W = Qa_low.shape
        Qa_low = Qa_low.view(N, C, H * W)
        Qb_low = Qb_low.view(N, C, H * W)
        Qb_low = torch.transpose(Qb_low, 1, 2)

        L = torch.bmm(Qb_low, Qa_low)

        Aa = F.tanh(L)
        Ab = torch.transpose(Aa, 1, 2)

        N, C, H, W = Qa.shape

        Qa_ = Qa.view(N, C, H * W)
        Qb_ = Qb.view(N, C, H * W)

        Za = torch.bmm(Qb_, Aa)
        Zb = torch.bmm(Qa_, Ab)
        Za = Za.view(N, C, H, W)
        Zb = Zb.view(N, C, H, W)

        Za = F.normalize(Za)
        Zb = F.normalize(Zb)

        return Za, Zb


class Gated(nn.Module):
    def __init__(self, channel, reduction=16):
        super(Gated, self).__init__()

        self.avg_pool = nn.AdaptiveAvgPool2d(1)

        self.excitation_1 = nn.Sequential(
            nn.Linear(channel, channel // reduction),
            nn.ReLU(inplace=True))

        self.excitation_2 = nn.Sequential(
            nn.Linear(channel // reduction, channel),
            nn.Sigmoid()
        )

        self.global_attention = nn.Sequential(
            nn.Linear(channel // reduction, 1),
            nn.Sigmoid()
        )

        kernel_size = 7
        self.spatial = BasicConv(1, 1, kernel_size, stride=1,
                                 padding=(kernel_size-1) // 2, relu=False)

    def forward(self, U):
        # se layer
        b, c, h, w = U.shape
        S = self.avg_pool(U).view(b, c)
        E_1 = self.excitation_1(S)

        E_local = self.excitation_2(E_1).view(b, c, 1, 1)
        U_se = E_local * U

        # spatial layer
        U_se_max = torch.max(U_se, 1)[0].unsqueeze(1)
        SP_Att = self.spatial(U_se_max)
        U_se_sp = SP_Att * U_se

        # global layer
        E_global = self.global_attention(E_1).view(b, 1, 1, 1)
        V = E_global * U_se_sp

        # residual layer
        O = U + V

        return O

class BasicConv(nn.Module):
    def __init__(self, in_planes, out_planes, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1, relu=True, bn=True,
                 bias=False):
        super(BasicConv, self).__init__()
        self.out_channels = out_planes
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size,
                              stride=stride, padding=padding,
                              dilation=dilation, groups=groups, bias=bias)
        self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01,
                                 affine=True) if bn else None
        self.relu = nn.ReLU() if relu else None

    def forward(self, x):
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)
        if self.relu is not None:
            x = self.relu(x)
        return x


class BoundaryModule(nn.Module):
    def __init__(self, inchannel):
        super(BoundaryModule, self).__init__()

        self.bn1 = nn.BatchNorm2d(inchannel)
        self.conv1 = nn.Conv2d(inchannel, 64, kernel_size=3, stride=1,
                               padding=1)
        self.relu = nn.ReLU()

        self.bn2 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)

        self.conv3 = nn.Conv2d(64, 1, kernel_size=1, stride=1, padding=0)
        self.sigmoid = nn.Sigmoid()

        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.weight.data.normal_(0, 0.001)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

    def forward(self, x):
        x = self.bn1(x)
        x = self.conv1(x)
        x = self.relu(x)
        x = self.bn2(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.conv3(x)

        return x


class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        mdim = 256
        self.GC = GC(4096+1, mdim)
        self.convG1 = nn.Conv2d(mdim, mdim, kernel_size=3, padding=1)
        self.convG2 = nn.Conv2d(mdim, mdim, kernel_size=3, padding=1)
        self.RF4 = Refine(2048+1, mdim)
        self.RF3 = Refine(1024+1, mdim)
        self.RF2 = Refine(512+1, mdim)

        self.pred5 = nn.Conv2d(mdim, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)
        self.pred4 = nn.Conv2d(mdim, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)
        self.pred3 = nn.Conv2d(mdim, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)
        self.pred2 = nn.Conv2d(mdim, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)

        self.concat = nn.Conv2d(4, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)

        self.bdry5 = BoundaryModule(4096)
        self.bdry4 = BoundaryModule(2048)
        self.bdry3 = BoundaryModule(1024)
        self.bdry2 = BoundaryModule(512)

        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.weight.data.normal_(0, 0.001)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()

    def forward(self, r5, r4, r3, r2):
        p5 = self.bdry5(r5)
        p4 = self.bdry4(r4)
        p3 = self.bdry3(r3)
        p2 = self.bdry2(r2)

        p2_up = F.interpolate(p2, size=(473, 473), mode='bilinear')
        p3_up = F.interpolate(p3, size=(473, 473), mode='bilinear')
        p4_up = F.interpolate(p4, size=(473, 473), mode='bilinear')
        p5_up = F.interpolate(p5, size=(473, 473), mode='bilinear')

        concat = torch.cat([p2_up, p3_up, p4_up, p5_up], dim=1)
        p = self.concat(concat)

        p2_up = torch.sigmoid(p2_up)
        p3_up = torch.sigmoid(p3_up)
        p4_up = torch.sigmoid(p4_up)
        p5_up = torch.sigmoid(p5_up)
        p = torch.sigmoid(p)

        r5 = torch.cat((r5, p5), dim=1)
        r4 = torch.cat((r4, p4), dim=1)
        r3 = torch.cat((r3, p3), dim=1)
        r2 = torch.cat((r2, p2), dim=1)

        m = self.forward_mask(r5, r4, r3, r2)

        return m, p, p2_up, p3_up, p4_up, p5_up

    def forward_mask(self, x, r4, r3, r2):
        x = self.GC(x)
        r = self.convG1(F.relu(x))
        r = self.convG2(F.relu(r))
        m5 = x + r
        m4 = self.RF4(r4, m5)
        m3 = self.RF3(r3, m4)
        m2 = self.RF2(r2, m3)

        p2 = self.pred2(F.relu(m2))
        p2_up = F.interpolate(p2, size=(473, 473), mode='bilinear')
        p2_s = torch.sigmoid(p2_up)

        return p2_s


class GC(nn.Module):
    def __init__(self, inplanes, planes, kh=7, kw=7):
        super(GC, self).__init__()
        self.conv_l1 = nn.Conv2d(inplanes, 256, kernel_size=(kh, 1),
                                 padding=(int(kh/2), 0))
        self.conv_l2 = nn.Conv2d(256, planes, kernel_size=(1, kw),
                                 padding=(0, int(kw/2)))
        self.conv_r1 = nn.Conv2d(inplanes, 256, kernel_size=(1, kw),
                                 padding=(0, int(kw/2)))
        self.conv_r2 = nn.Conv2d(256, planes, kernel_size=(kh, 1),
                                 padding=(int(kh/2), 0))

    def forward(self, x):
        x_l = self.conv_l2(self.conv_l1(x))
        x_r = self.conv_r2(self.conv_r1(x))
        x = x_l + x_r
        return x


class AtrousBlock(nn.Module):
    def __init__(self, inplanes, planes, rate, stride=1):
        super(AtrousBlock, self).__init__()

        self.conv = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
                              dilation=rate, padding=rate)

    def forward(self, x):
        return self.conv(x)


class PyramidDilationConv(nn.Module):
    def __init__(self, inplanes, planes):
        super(PyramidDilationConv, self).__init__()

        rate = [3, 5, 7]

        self.block0 = nn.Conv2d(inplanes, planes, kernel_size=1)
        self.block1 = AtrousBlock(inplanes, planes, rate[0])
        self.block2 = AtrousBlock(inplanes, planes, rate[1])
        self.block3 = AtrousBlock(inplanes, planes, rate[2])
        self.bn = nn.BatchNorm2d(planes*4)

    def forward(self, x):
        x0 = self.block0(x)
        x1 = self.block1(x)
        x2 = self.block1(x)
        x3 = self.block1(x)

        xx = torch.cat([x0, x1, x2, x3], dim=1)
        xx = self.bn(xx)
        return xx


class Refine(nn.Module):
    def __init__(self, inplanes, planes, scale_factor=2):
        super(Refine, self).__init__()
        self.convFS1 = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1)
        self.convFS2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
        self.convFS3 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
        self.convMM1 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
        self.convMM2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
        self.scale_factor = scale_factor

        outplanes = int(planes / 4)
        self.pdc = PyramidDilationConv(inplanes, outplanes)

    def forward(self, f, pm):
        s = self.pdc(f)
        sr = self.convFS2(F.relu(s))
        sr = self.convFS3(F.relu(sr))
        s = s + sr

        m = s + F.interpolate(pm, size=s.shape[2:4], mode='bilinear')

        mr = self.convMM1(F.relu(m))
        mr = self.convMM2(F.relu(mr))
        m = m + mr
        return m


================================================
FILE: modules/cbam.py
================================================
import torch
import math
import torch.nn as nn
import torch.nn.functional as F

class BasicConv(nn.Module):
    def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
        super(BasicConv, self).__init__()
        self.out_channels = out_planes
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
        self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None
        self.relu = nn.ReLU() if relu else None

    def forward(self, x):
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)
        if self.relu is not None:
            x = self.relu(x)
        return x

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)

class ChannelGate(nn.Module):
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']):
        super(ChannelGate, self).__init__()
        self.gate_channels = gate_channels
        self.mlp = nn.Sequential(
            Flatten(),
            nn.Linear(gate_channels, gate_channels // reduction_ratio),
            nn.ReLU(),
            nn.Linear(gate_channels // reduction_ratio, gate_channels)
            )
        self.pool_types = pool_types
    def forward(self, x):
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type=='avg':
                avg_pool = F.avg_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( avg_pool )
            elif pool_type=='max':
                max_pool = F.max_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( max_pool )
            elif pool_type=='lp':
                lp_pool = F.lp_pool2d( x, 2, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( lp_pool )
            elif pool_type=='lse':
                # LSE pool only
                lse_pool = logsumexp_2d(x)
                channel_att_raw = self.mlp( lse_pool )

            if channel_att_sum is None:
                channel_att_sum = channel_att_raw
            else:
                channel_att_sum = channel_att_sum + channel_att_raw

        scale = F.sigmoid( channel_att_sum ).unsqueeze(2).unsqueeze(3).expand_as(x)
        return x * scale

def logsumexp_2d(tensor):
    tensor_flatten = tensor.view(tensor.size(0), tensor.size(1), -1)
    s, _ = torch.max(tensor_flatten, dim=2, keepdim=True)
    outputs = s + (tensor_flatten - s).exp().sum(dim=2, keepdim=True).log()
    return outputs

class ChannelPool(nn.Module):
    def forward(self, x):
        return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )

class SpatialGate(nn.Module):
    def __init__(self):
        super(SpatialGate, self).__init__()
        kernel_size = 7
        self.compress = ChannelPool()
        self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)
    def forward(self, x):
        x_compress = self.compress(x)
        x_out = self.spatial(x_compress)
        scale = F.sigmoid(x_out) # broadcasting
        return x * scale

class CBAM(nn.Module):
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False):
        super(CBAM, self).__init__()
        self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial=no_spatial
        if not no_spatial:
            self.SpatialGate = SpatialGate()
    def forward(self, x):
        x_out = self.ChannelGate(x)
        if not self.no_spatial:
            x_out = self.SpatialGate(x_out)
        return x_out


================================================
FILE: requirements.txt
================================================
easydict==1.9
lmdb==0.94
torchvision==0.2.2
opencv_contrib_python==4.0.0.21
scipy==1.2.1
six==1.12.0
torch==1.0.1.post2
pydensecrf==1.0rc2
tqdm==4.31.1
scikit_image==0.15.0
joblib==0.13.2
numpy==1.16.2
matplotlib==3.0.3
Pillow==8.1.1
PyYAML==5.1.2
skimage==0.0


================================================
FILE: test_MATNet.py
================================================
import torch
from torchvision import transforms

import os
import glob
from tqdm import tqdm
from PIL import Image
from scipy.misc import imresize

from modules.MATNet import Encoder, Decoder
from utils.utils import check_parallel
from utils.utils import load_checkpoint_epoch


def flip(x, dim):
    if x.is_cuda:
        return torch.index_select(x, dim, torch.arange(x.size(dim) - 1, -1, -1).long().cuda(0))
    else:
        return torch.index_select(x, dim, torch.arange(x.size(dim) - 1, -1, -1).long())


inputRes = (473, 473)
use_flip = True

to_tensor = transforms.ToTensor()
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
image_transforms = transforms.Compose([to_tensor, normalize])

model_name = 'MATNet' # specify the model name
epoch = 0 # specify the epoch number
davis_result_dir = './output/davis16'

encoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, load_args =\
    load_checkpoint_epoch(model_name, epoch, True, False)
encoder = Encoder()
decoder = Decoder()
encoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict)
encoder.load_state_dict(encoder_dict)
decoder.load_state_dict(decoder_dict)

encoder.cuda()
decoder.cuda()

encoder.train(False)
decoder.train(False)

val_set = 'data/DAVIS2017/ImageSets/2016/val.txt'
with open(val_set) as f:
    seqs = f.readlines()
    seqs = [seq.strip() for seq in seqs]

for video in tqdm(seqs):
    davis_root_dir = 'data/DAVIS2017/JPEGImages/480p'
    davis_flow_dir = 'data/DAVIS2017/davis2017-flow'

    image_dir = os.path.join(davis_root_dir, video)
    flow_dir = os.path.join(davis_flow_dir, video)

    imagefiles = sorted(glob.glob(os.path.join(image_dir, '*.jpg')))[:-1]
    flowfiles = sorted(glob.glob(os.path.join(flow_dir, '*.png')))

    with torch.no_grad():
        for imagefile, flowfile in zip(imagefiles, flowfiles):
            image = Image.open(imagefile).convert('RGB')
            flow = Image.open(flowfile).convert('RGB')

            width, height = image.size

            image = imresize(image, inputRes)
            flow = imresize(flow, inputRes)

            image = image_transforms(image)
            flow = image_transforms(flow)

            image = image.unsqueeze(0)
            flow = flow.unsqueeze(0)

            image, flow = image.cuda(), flow.cuda()

            r5, r4, r3, r2 = encoder(image, flow)
            mask_pred, bdry_pred, p2, p3, p4, p5 = decoder(r5, r4, r3, r2)

            if use_flip:
                image_flip = flip(image, 3)
                flow_flip = flip(flow, 3)
                r5, r4, r3, r2 = encoder(image_flip, flow_flip)
                mask_pred_flip, bdry_pred_flip, p2, p3, p4, p5 =\
                    decoder(r5, r4, r3, r2)

                mask_pred_flip = flip(mask_pred_flip, 3)
                bdry_pred_flip = flip(bdry_pred_flip, 3)

                mask_pred = (mask_pred + mask_pred_flip) / 2.0
                bdry_pred = (bdry_pred + bdry_pred_flip) / 2.0

            mask_pred = mask_pred[0, 0, :, :]
            mask_pred = Image.fromarray(mask_pred.cpu().detach().numpy() * 255).convert('L')

            save_folder = '{}/{}_epoch{}/{}'.format(davis_result_dir,
                                                    model_name, epoch, video)
            if not os.path.exists(save_folder):
                os.makedirs(save_folder)

            save_file = os.path.join(save_folder,
                                     os.path.basename(imagefile)[:-4] + '.png')
            mask_pred = mask_pred.resize((width, height))
            mask_pred.save(save_file)




================================================
FILE: train_MATNet.py
================================================
import torch
from torch.utils import data
from torchvision import transforms

import os
import sys
import time
import random
import numpy as np

from modules.MATNet import Encoder, Decoder
from args import get_parser
from utils.utils import get_optimizer
from utils.utils import make_dir, check_parallel
from dataloader.dataset_utils import get_dataset_davis_youtube_ehem
from utils.utils import save_checkpoint_epoch, load_checkpoint_epoch
from utils.objectives import WeightedBCE2d
from measures.jaccard import db_eval_iou_multi


def init_dataloaders(args):
    loaders = {}

    # init dataloaders for training and validation
    for split in ['train', 'val']:
        batch_size = args.batch_size
        to_tensor = transforms.ToTensor()
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        image_transforms = transforms.Compose([to_tensor, normalize])
        target_transforms = transforms.Compose([to_tensor])

        dataset = get_dataset_davis_youtube_ehem(
            args, split=split, image_transforms=image_transforms,
            target_transforms=target_transforms,
            augment=args.augment and split == 'train',
            inputRes=(473, 473))

        shuffle = True if split == 'train' else False
        loaders[split] = data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=shuffle,
                                         num_workers=args.num_workers,
                                         drop_last=True)

    return loaders


def trainIters(args):
    print(args)

    model_dir = os.path.join('ckpt/', args.model_name)
    make_dir(model_dir)

    epoch_resume = 0
    if args.resume:
        encoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, load_args = \
            load_checkpoint_epoch(args.model_name, args.epoch_resume,
                                  args.use_gpu)

        epoch_resume = args.epoch_resume

        encoder = Encoder()
        decoder = Decoder()

        encoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict)
        encoder.load_state_dict(encoder_dict)
        decoder.load_state_dict(decoder_dict)
    else:
        encoder = Encoder()
        decoder = Decoder()

    criterion = WeightedBCE2d()

    if args.use_gpu:
        encoder.cuda()
        decoder.cuda()
        criterion.cuda()

    encoder_params = list(encoder.parameters())
    decoder_params = list(decoder.parameters())
    dec_opt = get_optimizer(args.optim, args.lr, decoder_params,
                            args.weight_decay)
    enc_opt = get_optimizer(args.optim_cnn, args.lr_cnn, encoder_params,
                            args.weight_decay_cnn)

    loaders = init_dataloaders(args)

    best_iou = 0

    start = time.time()
    for e in range(epoch_resume, args.max_epoch):
        print("Epoch", e)
        epoch_losses = {'train': {'total': [], 'iou': [],
                                  'mask_loss': [], 'bdry_loss': []},
                        'val': {'total': [], 'iou': [],
                                'mask_loss': [], 'bdry_loss': []}}

        for split in ['train', 'val']:
            if split == 'train':
                encoder.train(True)
                decoder.train(True)
            else:
                encoder.train(False)
                decoder.train(False)

            for batch_idx, (image, flow, mask, bdry, negative_pixels) in\
                    enumerate(loaders[split]):
                image, flow, mask, bdry, negative_pixels = \
                    image.cuda(), flow.cuda(), mask.cuda(), bdry.cuda(),\
                    negative_pixels.cuda()

                if split == 'train':
                    r5, r4, r3, r2 = encoder(image, flow)
                    mask_pred, p1, p2, p3, p4, p5 = decoder(r5, r4, r3, r2)

                    mask_loss = criterion(mask_pred, mask, negative_pixels)
                    bdry_loss = criterion(p1, bdry, negative_pixels) + \
                                criterion(p2, bdry, negative_pixels) + \
                                criterion(p3, bdry, negative_pixels) + \
                                criterion(p4, bdry, negative_pixels) + \
                                criterion(p5, bdry, negative_pixels)
                    loss = mask_loss + 0.2 * bdry_loss

                    iou = db_eval_iou_multi(mask.cpu().detach().numpy(),
                                            mask_pred.cpu().detach().numpy())

                    dec_opt.zero_grad()
                    enc_opt.zero_grad()
                    loss.backward()
                    enc_opt.step()
                    dec_opt.step()
                else:
                    with torch.no_grad():
                        r5, r4, r3, r2 = encoder(image, flow)
                        mask_pred, p1, p2, p3, p4, p5 = decoder(r5, r4, r3, r2)

                        mask_loss = criterion(mask_pred, mask, negative_pixels)
                        bdry_loss = criterion(p1, bdry, negative_pixels) + \
                                    criterion(p2, bdry, negative_pixels) + \
                                    criterion(p3, bdry, negative_pixels) + \
                                    criterion(p4, bdry, negative_pixels) + \
                                    criterion(p5, bdry, negative_pixels)
                        loss = mask_loss + 0.2 * bdry_loss

                    iou = db_eval_iou_multi(mask.cpu().detach().numpy(),
                                            mask_pred.cpu().detach().numpy())

                epoch_losses[split]['total'].append(loss.data.item())
                epoch_losses[split]['mask_loss'].append(mask_loss.data.item())
                epoch_losses[split]['bdry_loss'].append(bdry_loss.data.item())
                epoch_losses[split]['iou'].append(iou)

                if (batch_idx + 1) % args.print_every == 0:
                    mt = np.mean(epoch_losses[split]['total'])
                    mmask = np.mean(epoch_losses[split]['mask_loss'])
                    mbdry = np.mean(epoch_losses[split]['bdry_loss'])
                    miou = np.mean(epoch_losses[split]['iou'])

                    te = time.time() - start
                    print('Epoch: [{}/{}][{}/{}]\tTime {:.3f}s\tLoss: {:.4f}'
                          '\tMask Loss: {:.4f}\tBdry Loss: {:.4f}'
                          '\tIOU: {:.4f}'.format(e, args.max_epoch, batch_idx,
                                                 len(loaders[split]), te, mt,
                                                 mmask, mbdry, miou))

                    start = time.time()

        miou = np.mean(epoch_losses['val']['iou'])
        if miou > best_iou:
            best_iou = miou
            save_checkpoint_epoch(args, encoder, decoder,
                                  enc_opt, dec_opt, e, False)


if __name__ == '__main__':
    parser = get_parser()
    args = parser.parse_args()
    torch.manual_seed(args.seed)
    random.seed(args.seed)

    args.model_name = 'MATNet'
    args.batch_size = 2
    args.max_epoch = 25
    args.year = '2016'

    gpu_id = args.gpu_id
    print('gpu_id: ', gpu_id)
    print('use_gpu: ', args.use_gpu)
    if args.use_gpu:
        torch.cuda.set_device(device=gpu_id)
        torch.cuda.manual_seed(args.seed)
    trainIters(args)


================================================
FILE: utils/__init__.py
================================================


================================================
FILE: utils/objectives.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F


class WeightedBCE2d(nn.Module):
    def __init__(self):
        super(WeightedBCE2d, self).__init__()

    def forward(self, input, target, negative_pixels):
        log_p = input.transpose(1, 2).transpose(2, 3).contiguous().view(1, -1)
        target_t = target.transpose(1, 2).transpose(2, 3).contiguous().view(1, -1)
        negative_pixels_t = negative_pixels.transpose(1, 2).transpose(2, 3).contiguous().view(1, -1)

        pos_index = (target_t > 0)
        hard_negative_index = (negative_pixels_t > 0)
        easy_negative_index = (negative_pixels_t == 0)

        pos_index = pos_index.data.cpu().numpy().astype(bool)
        easy_negative_index = easy_negative_index.data.cpu().numpy().astype(bool)
        hard_negative_index = hard_negative_index.data.cpu().numpy().astype(bool)
        hard_negative_index_ = hard_negative_index.nonzero()

        weight = torch.Tensor(log_p.size()).fill_(0)
        weight = weight.numpy()

        weight[pos_index] = 1.0
        weight[easy_negative_index] = 1.0
        weight[hard_negative_index] = 1.0 + negative_pixels_t[hard_negative_index_].cpu().numpy()

        weight = torch.from_numpy(weight)
        weight = weight.cuda()
        loss = F.binary_cross_entropy(log_p, target_t, weight, size_average=True)

        return loss


================================================
FILE: utils/utils.py
================================================
import torch
import os
import pickle
from collections import OrderedDict


def make_dir(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)


def check_parallel(encoder_dict, decoder_dict):
    trained_parallel = False
    for k, v in encoder_dict.items():
        if k[:7] == "module.":
            trained_parallel = True
        break
    if trained_parallel:
        # create new OrderedDict that does not contain "module."
        new_encoder_state_dict = OrderedDict()
        new_decoder_state_dict = OrderedDict()
        for k, v in encoder_dict.items():
            name = k[7:]  # remove "module."
            new_encoder_state_dict[name] = v
        for k, v in decoder_dict.items():
            name = k[7:]  # remove "module."
            new_decoder_state_dict[name] = v
        encoder_dict = new_encoder_state_dict
        decoder_dict = new_decoder_state_dict

    return encoder_dict, decoder_dict


def get_base_params(args, model):
    b = []
    b.append(model.conv1)
    b.append(model.bn1)
    b.append(model.res2)
    b.append(model.res3)
    b.append(model.res4)
    b.append(model.res5)

    for i in range(len(b)):
        for j in b[i].modules():
            jj = 0
            for k in j.parameters():
                jj+=1
                if k.requires_grad:
                    yield k


def get_skip_params(model):
    b = []
    b.append(model.sk2.parameters())
    b.append(model.sk3.parameters())
    b.append(model.sk4.parameters())
    b.append(model.sk5.parameters())
    b.append(model.bn2.parameters())
    b.append(model.bn3.parameters())
    b.append(model.bn4.parameters())
    b.append(model.bn5.parameters())

    for j in range(len(b)):
        for i in b[j]:
            yield i


def merge_params(params):
    for j in range(len(params)):
        for i in params[j]:
            yield i


def get_optimizer(optim_name, lr, parameters, weight_decay=0, momentum=0.9):
    if optim_name == 'sgd':
        opt = torch.optim.SGD(filter(lambda p: p.requires_grad, parameters),
                              lr=lr, weight_decay=weight_decay,
                              momentum=momentum)
    elif optim_name == 'adam':
        opt = torch.optim.Adam(filter(lambda p: p.requires_grad, parameters),
                               lr=lr, weight_decay=weight_decay)
    return opt


def save_checkpoint_epoch(args, encoder, decoder, enc_opt, dec_opt, epoch, best=False):
    torch.save(encoder.state_dict(), os.path.join('ckpt', args.model_name, 'encoder_{}.pt'.format(epoch)))
    torch.save(decoder.state_dict(), os.path.join('ckpt', args.model_name, 'decoder_{}.pt'.format(epoch)))
    torch.save(enc_opt.state_dict(), os.path.join('ckpt', args.model_name, 'enc_opt_{}.pt'.format(epoch)))
    torch.save(dec_opt.state_dict(), os.path.join('ckpt', args.model_name, 'dec_opt_{}.pt'.format(epoch)))

    if best:
        torch.save(encoder.state_dict(), os.path.join('ckpt', args.model_name, 'encoder.pt'))
        torch.save(decoder.state_dict(), os.path.join('ckpt', args.model_name, 'decoder.pt'))
        torch.save(enc_opt.state_dict(), os.path.join('ckpt', args.model_name, 'enc_opt.pt'))
        torch.save(dec_opt.state_dict(), os.path.join('ckpt', args.model_name, 'dec_opt.pt'))

    # save parameters for future use
    pickle.dump(args, open(os.path.join('ckpt', args.model_name, 'args.pkl'), 'wb'))


def load_checkpoint_epoch(model_name, epoch, use_gpu=True, load_opt=True):
    if use_gpu:
        encoder_dict = torch.load(os.path.join('ckpt', model_name, 'encoder_{}.pt'.format(epoch)))
        decoder_dict = torch.load(os.path.join('ckpt', model_name, 'decoder_{}.pt'.format(epoch)))
        if load_opt:
            enc_opt_dict = torch.load(os.path.join('ckpt', model_name, 'enc_opt_{}.pt'.format(epoch)))
            dec_opt_dict = torch.load(os.path.join('ckpt', model_name, 'dec_opt_{}.pt'.format(epoch)))
    else:
        encoder_dict = torch.load(os.path.join('ckpt', model_name, 'encoder_{}.pt'.format(epoch)), map_location=lambda storage, location: storage)
        decoder_dict = torch.load(os.path.join('ckpt', model_name, 'decoder_{}.pt'.format(epoch)), map_location=lambda storage, location: storage)
        enc_opt_dict = torch.load(os.path.join('ckpt', model_name, 'enc_opt_{}.pt'.format(epoch)), map_location=lambda storage, location: storage)
        dec_opt_dict = torch.load(os.path.join('ckpt', model_name, 'dec_opt_{}.pt'.format(epoch)), map_location=lambda storage, location: storage)
    # save parameters for future use
    if load_opt:
        args = pickle.load(open(os.path.join('ckpt', model_name, 'args.pkl'), 'rb'))

        return encoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, args
    else:
        return encoder_dict, decoder_dict, None, None, None

Download .txt

gitextract_05390pb8/

├── .gitignore
├── .idea/
│   ├── aaai20-zero-ShotVOS-github.iml
│   ├── inspectionProfiles/
│   │   └── Project_Default.xml
│   ├── misc.xml
│   ├── modules.xml
│   ├── vcs.xml
│   └── workspace.xml
├── 3rdparty/
│   ├── run_davis.py
│   ├── run_davis2017.m
│   ├── run_davis_flow.py
│   ├── run_youtube.m
│   ├── run_youtube.py
│   ├── run_youtube_flow.py
│   └── seg2edge.m
├── README.md
├── apply_densecrf_davis.py
├── args.py
├── data/
│   ├── db_info.yaml
│   ├── palette.txt
│   ├── run_davis2017.m
│   ├── run_youtube.m
│   └── seg2edge.m
├── dataloader/
│   ├── base.py
│   ├── base_youtube.py
│   ├── custom_transforms.py
│   ├── dataset_utils.py
│   ├── davis2017.py
│   ├── davis2017_youtubevos_ehem.py
│   ├── hed_bsds.py
│   └── youtubeVOS.py
├── dataset_lmdb_generator.py
├── measures/
│   ├── __init__.py
│   ├── f_boundary.py
│   ├── jaccard.py
│   ├── statistics.py
│   └── t_stability.py
├── misc/
│   ├── __init__.py
│   ├── config.py
│   ├── config_youtubeVOS.py
│   ├── io_aux.py
│   ├── logger.py
│   ├── parallel.py
│   ├── timer.py
│   └── visual.py
├── modules/
│   ├── MATNet.py
│   └── cbam.py
├── requirements.txt
├── test_MATNet.py
├── train_MATNet.py
└── utils/
    ├── __init__.py
    ├── objectives.py
    └── utils.py

Download .txt

SYMBOL INDEX (191 symbols across 31 files)

FILE: 3rdparty/run_davis.py
  function main (line 8) | def main():

FILE: 3rdparty/run_davis_flow.py
  function main (line 13) | def main():
  function run (line 36) | def run(imagefile1, imagefile2, save_file):

FILE: 3rdparty/run_youtube.py
  function main (line 8) | def main():

FILE: 3rdparty/run_youtube_flow.py
  function main (line 14) | def main():
  function run (line 37) | def run(imagefile1, imagefile2, save_file):

FILE: apply_densecrf_davis.py
  function sigmoid (line 15) | def sigmoid(x):

FILE: args.py
  function get_parser (line 4) | def get_parser():

FILE: dataloader/base.py
  function _load_annotation (line 24) | def _load_annotation(filename, single_object):
  function _get_num_objects (line 35) | def _get_num_objects(annotation):
  class BaseLoader (line 53) | class BaseLoader(ImageCollection):
    method __init__ (line 64) | def __init__(self, split, path, regex, load_func=None, lmdb_env=None):
    method __str__ (line 81) | def __str__(self):
  class Sequence (line 86) | class Sequence(BaseLoader):
    method __init__ (line 96) | def __init__(self, split, name, regex="*.jpg", lmdb_env=None):
  class SequenceClip_simple (line 103) | class SequenceClip_simple:
    method __init__ (line 113) | def __init__(self, seq, starting_frame):
    method __str__ (line 118) | def __str__(self):
  class SequenceClip (line 125) | class SequenceClip(BaseLoader):
    method __init__ (line 135) | def __init__(self, split, name, starting_frame, regex="*.jpg",
    method __str__ (line 142) | def __str__(self):
  class Segmentation (line 150) | class Segmentation(BaseLoader):
    method __init__ (line 162) | def __init__(self,split,path,single_object,regex="*.png", lmdb_env=None):
    method iter_objects_id (line 168) | def iter_objects_id(self):
    method iter_objects (line 175) | def iter_objects(self):
  class Annotation (line 184) | class Annotation(Segmentation):
    method __init__ (line 196) | def __init__(self,split,name,single_object,regex="*.png", lmdb_env=None):
  class AnnotationClip_simple (line 202) | class AnnotationClip_simple:
    method __init__ (line 213) | def __init__(self, annot, starting_frame):
  class AnnotationClip (line 218) | class AnnotationClip(Segmentation):
    method __init__ (line 230) | def __init__(self,split,name,starting_frame,single_object,regex="*.png...

FILE: dataloader/base_youtube.py
  function _load_annotation (line 16) | def _load_annotation(filename,single_object):
  function _get_num_objects (line 26) | def _get_num_objects(annotation):
  class BaseLoader (line 43) | class BaseLoader(ImageCollection):
    method __init__ (line 55) | def __init__(self,split,path,regex,load_func=None, lmdb_env=None):
    method __str__ (line 85) | def __str__(self):
  class Sequence (line 89) | class Sequence(BaseLoader):
    method __init__ (line 100) | def __init__(self,split,name,regex="*.jpg", lmdb_env=None):
  class SequenceClip_simple (line 115) | class SequenceClip_simple:
    method __init__ (line 125) | def __init__(self, seq, starting_frame):
    method __str__ (line 132) | def __str__(self):
  class SequenceClip (line 136) | class SequenceClip(BaseLoader):
    method __init__ (line 147) | def __init__(self,split,name,starting_frame,regex="*.jpg", lmdb_env=No...
    method __str__ (line 162) | def __str__(self):
  class Segmentation (line 166) | class Segmentation(BaseLoader):
    method __init__ (line 178) | def __init__(self,split,path,single_object,regex="*.png", lmdb_env=None):
    method iter_objects_id (line 184) | def iter_objects_id(self):
    method iter_objects (line 191) | def iter_objects(self):
  class Annotation (line 200) | class Annotation(Segmentation):
    method __init__ (line 212) | def __init__(self,split,name,single_object,regex="*.png", lmdb_env=None):
  class AnnotationClip_simple (line 227) | class AnnotationClip_simple:
    method __init__ (line 238) | def __init__(self, annot, starting_frame):
  class AnnotationClip (line 245) | class AnnotationClip(Segmentation):
    method __init__ (line 257) | def __init__(self,split,name,starting_frame,single_object,regex="*.png...

FILE: dataloader/custom_transforms.py
  class ScaleNRotate (line 8) | class ScaleNRotate(object):
    method __init__ (line 17) | def __init__(self, rots=(-30, 30), scales=(.75, 1.25)):
    method __call__ (line 22) | def __call__(self, sample):
  class Resize (line 58) | class Resize(object):
    method __init__ (line 63) | def __init__(self, scales=[0.5, 0.8, 1]):
    method __call__ (line 66) | def __call__(self, sample):
  class RandomHorizontalFlip (line 88) | class RandomHorizontalFlip(object):
    method __call__ (line 91) | def __call__(self, sample):
  class ToTensor (line 103) | class ToTensor(object):
    method __call__ (line 106) | def __call__(self, sample):

FILE: dataloader/dataset_utils.py
  function get_dataset_davis_youtube_ehem (line 1) | def get_dataset_davis_youtube_ehem(args, split, image_transforms=None,

FILE: dataloader/davis2017.py
  class DAVISLoader (line 21) | class DAVISLoader(data.Dataset):
    method __init__ (line 26) | def __init__(self, args, split, inputRes, augment=False,
    method __len__ (line 94) | def __len__(self):
    method __getitem__ (line 98) | def __getitem__(self, index):

FILE: dataloader/davis2017_youtubevos_ehem.py
  class DAVISLoader (line 24) | class DAVISLoader(data.Dataset):
    method __init__ (line 29) | def __init__(self, args, split, inputRes, augment=False,
    method __len__ (line 60) | def __len__(self):
    method __getitem__ (line 63) | def __getitem__(self, index):
    method load_youtubevos (line 123) | def load_youtubevos(self, args):
    method load_davis (line 176) | def load_davis(self, args):

FILE: dataloader/hed_bsds.py
  class HEDBSDSTrain (line 6) | class HEDBSDSTrain(data.Dataset):
    method __init__ (line 7) | def __init__(self, root_dir, transform=None, target_transform=None):
    method __len__ (line 23) | def __len__(self):
    method __getitem__ (line 27) | def __getitem__(self, index):
  class HEDBSDSTest (line 43) | class HEDBSDSTest(data.Dataset):
    method __init__ (line 44) | def __init__(self, root_dir, transform=None):
    method __len__ (line 56) | def __len__(self):
    method __getitem__ (line 59) | def __getitem__(self, index):

FILE: dataloader/youtubeVOS.py
  class YoutubeVOSLoader (line 22) | class YoutubeVOSLoader(data.Dataset):
    method __init__ (line 23) | def __init__(self,
    method __len__ (line 93) | def __len__(self):
    method __getitem__ (line 96) | def __getitem__(self, index):

FILE: dataset_lmdb_generator.py
  class LMDBGenerator (line 10) | class LMDBGenerator:
    method __init__ (line 11) | def __init__(self, ext='.jpg', gen_type='seq'):
    method generate_lmdb_file (line 15) | def generate_lmdb_file(self, root_dir, frames_dir):

FILE: measures/f_boundary.py
  function db_eval_boundary (line 14) | def db_eval_boundary(foreground_mask,gt_mask,bound_th=0.008):
  function seg2bmap (line 71) | def seg2bmap(seg,width=None,height=None):

FILE: measures/jaccard.py
  function db_eval_iou_multi (line 16) | def db_eval_iou_multi(annotations, segmentations):
  function db_eval_iou (line 30) | def db_eval_iou(annotation,segmentation):

FILE: measures/statistics.py
  function mean (line 4) | def mean(X):
  function recall (line 11) | def recall(X,threshold=0.5):
  function decay (line 17) | def decay(X,n_bins=4):
  function std (line 32) | def std(X):
  function get (line 45) | def get(name):

FILE: measures/t_stability.py
  function get_bijective_pairs (line 15) | def get_bijective_pairs(pairs,costmat):
  function bij_pairs_one_dim (line 20) | def bij_pairs_one_dim(pairs, costmat, left_or_right):
  function hist_cost_2 (line 33) | def hist_cost_2(BH1,BH2):
  function sc_compute (line 48) | def sc_compute(Bsamp,Tsamp,mean_dist,nbins_theta,nbins_r,r_inner,r_outer...
  function db_eval_t_stab (line 99) | def db_eval_t_stab(fgmask,ground_truth,timing=True):

FILE: misc/config.py
  class phase (line 21) | class phase(Enum):
  function db_read_info (line 89) | def db_read_info():
  function db_read_attributes (line 95) | def db_read_attributes():
  function db_read_years (line 100) | def db_read_years():
  function db_read_sequences (line 105) | def db_read_sequences(year=None,db_phase=None):

FILE: misc/config_youtubeVOS.py
  class phase (line 11) | class phase(Enum):
  function db_read_sequences_train (line 91) | def db_read_sequences_train():
  function db_read_sequences_val (line 101) | def db_read_sequences_val():
  function db_read_sequences_trainval (line 111) | def db_read_sequences_trainval():
  function db_read_sequences_test (line 121) | def db_read_sequences_test():

FILE: misc/io_aux.py
  function imread_indexed (line 6) | def imread_indexed(filename):
  function imwrite_indexed (line 14) | def imwrite_indexed(filename,array,color_palette=cfg.palette):

FILE: misc/parallel.py
  function Parallel (line 7) | def Parallel( *args, **kwargs ):
  function _pickle_method (line 11) | def _pickle_method(method):
  function _unpickle_method (line 16) | def _unpickle_method(func_name, obj):

FILE: misc/timer.py
  class Timer (line 16) | class Timer(object):
    method __init__ (line 18) | def __init__(self):
    method tic (line 22) | def tic(self):
    method toc (line 26) | def toc(self):

FILE: misc/visual.py
  function overlay (line 13) | def overlay(image, mask, colors=[255, 0, 0], cscale=2, alpha=0.4):

FILE: modules/MATNet.py
  class Encoder (line 7) | class Encoder(nn.Module):
    method __init__ (line 8) | def __init__(self):
    method forward_res2 (line 42) | def forward_res2(self, f1, f2):
    method forward (line 57) | def forward(self, f1, f2):
  class CoAttention (line 96) | class CoAttention(nn.Module):
    method __init__ (line 97) | def __init__(self, channel):
    method forward (line 123) | def forward(self, Qa, Qb):
    method forward_sa (line 158) | def forward_sa(self, Qa, Qb):
    method forward_co (line 177) | def forward_co(self, Qa, Qb):
  class Gated (line 207) | class Gated(nn.Module):
    method __init__ (line 208) | def __init__(self, channel, reduction=16):
    method forward (line 231) | def forward(self, U):
  class BasicConv (line 254) | class BasicConv(nn.Module):
    method __init__ (line 255) | def __init__(self, in_planes, out_planes, kernel_size, stride=1,
    method forward (line 267) | def forward(self, x):
  class BoundaryModule (line 276) | class BoundaryModule(nn.Module):
    method __init__ (line 277) | def __init__(self, inchannel):
    method _initialize_weights (line 293) | def _initialize_weights(self):
    method forward (line 306) | def forward(self, x):
  class Decoder (line 318) | class Decoder(nn.Module):
    method __init__ (line 319) | def __init__(self):
    method _initialize_weights (line 343) | def _initialize_weights(self):
    method forward (line 356) | def forward(self, r5, r4, r3, r2):
    method forward_mask (line 385) | def forward_mask(self, x, r4, r3, r2):
  class GC (line 401) | class GC(nn.Module):
    method __init__ (line 402) | def __init__(self, inplanes, planes, kh=7, kw=7):
    method forward (line 413) | def forward(self, x):
  class AtrousBlock (line 420) | class AtrousBlock(nn.Module):
    method __init__ (line 421) | def __init__(self, inplanes, planes, rate, stride=1):
    method forward (line 427) | def forward(self, x):
  class PyramidDilationConv (line 431) | class PyramidDilationConv(nn.Module):
    method __init__ (line 432) | def __init__(self, inplanes, planes):
    method forward (line 443) | def forward(self, x):
  class Refine (line 454) | class Refine(nn.Module):
    method __init__ (line 455) | def __init__(self, inplanes, planes, scale_factor=2):
    method forward (line 467) | def forward(self, f, pm):

FILE: modules/cbam.py
  class BasicConv (line 6) | class BasicConv(nn.Module):
    method __init__ (line 7) | def __init__(self, in_planes, out_planes, kernel_size, stride=1, paddi...
    method forward (line 14) | def forward(self, x):
  class Flatten (line 22) | class Flatten(nn.Module):
    method forward (line 23) | def forward(self, x):
  class ChannelGate (line 26) | class ChannelGate(nn.Module):
    method __init__ (line 27) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
    method forward (line 37) | def forward(self, x):
  function logsumexp_2d (line 62) | def logsumexp_2d(tensor):
  class ChannelPool (line 68) | class ChannelPool(nn.Module):
    method forward (line 69) | def forward(self, x):
  class SpatialGate (line 72) | class SpatialGate(nn.Module):
    method __init__ (line 73) | def __init__(self):
    method forward (line 78) | def forward(self, x):
  class CBAM (line 84) | class CBAM(nn.Module):
    method __init__ (line 85) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
    method forward (line 91) | def forward(self, x):

FILE: test_MATNet.py
  function flip (line 15) | def flip(x, dim):

FILE: train_MATNet.py
  function init_dataloaders (line 21) | def init_dataloaders(args):
  function trainIters (line 49) | def trainIters(args):

FILE: utils/objectives.py
  class WeightedBCE2d (line 6) | class WeightedBCE2d(nn.Module):
    method __init__ (line 7) | def __init__(self):
    method forward (line 10) | def forward(self, input, target, negative_pixels):

FILE: utils/utils.py
  function make_dir (line 7) | def make_dir(dir):
  function check_parallel (line 12) | def check_parallel(encoder_dict, decoder_dict):
  function get_base_params (line 34) | def get_base_params(args, model):
  function get_skip_params (line 52) | def get_skip_params(model):
  function merge_params (line 68) | def merge_params(params):
  function get_optimizer (line 74) | def get_optimizer(optim_name, lr, parameters, weight_decay=0, momentum=0...
  function save_checkpoint_epoch (line 85) | def save_checkpoint_epoch(args, encoder, decoder, enc_opt, dec_opt, epoc...
  function load_checkpoint_epoch (line 101) | def load_checkpoint_epoch(model_name, epoch, use_gpu=True, load_opt=True):

Download .json

Condensed preview — 52 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (182K chars).

[
  {
    "path": ".gitignore",
    "chars": 1209,
    "preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
  },
  {
    "path": ".idea/aaai20-zero-ShotVOS-github.iml",
    "chars": 447,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<module type=\"PYTHON_MODULE\" version=\"4\">\n  <component name=\"NewModuleRootManager"
  },
  {
    "path": ".idea/inspectionProfiles/Project_Default.xml",
    "chars": 654,
    "preview": "<component name=\"InspectionProjectProfileManager\">\n  <profile version=\"1.0\">\n    <option name=\"myName\" value=\"Project De"
  },
  {
    "path": ".idea/misc.xml",
    "chars": 304,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"JavaScriptSettings\">\n    <option name=\"l"
  },
  {
    "path": ".idea/modules.xml",
    "chars": 304,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"ProjectModuleManager\">\n    <modules>\n   "
  },
  {
    "path": ".idea/vcs.xml",
    "chars": 326,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"VcsDirectoryMappings\">\n    <mapping dire"
  },
  {
    "path": ".idea/workspace.xml",
    "chars": 24636,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"ChangeListManager\">\n    <list default=\"t"
  },
  {
    "path": "3rdparty/run_davis.py",
    "chars": 1163,
    "preview": "import os\nimport glob\nimport torch\nimport numpy\nimport PIL\nfrom run import estimate\n\ndef main():\n    davis_folder = '/me"
  },
  {
    "path": "3rdparty/run_davis2017.m",
    "chars": 704,
    "preview": "clc\n\nsave_dir = '/media/iiai/data/VOS/DAVIS2017/Annotations_edge/480p';\n\n\nsrc_dir = '/media/iiai/data/VOS/DAVIS2017/Anno"
  },
  {
    "path": "3rdparty/run_davis_flow.py",
    "chars": 1826,
    "preview": "import torch\nimport glob\nimport getopt\nimport math\nimport numpy\nimport os\nimport PIL\nimport PIL.Image\nimport sys\nfrom ru"
  },
  {
    "path": "3rdparty/run_youtube.m",
    "chars": 746,
    "preview": "clc\n\nsave_dir = '/media/tfzhou/Yi Passport/YouTubeVOS_2018/train/Annotations_edge';\n\n\nsrc_dir = '/media/tfzhou/Yi Passpo"
  },
  {
    "path": "3rdparty/run_youtube.py",
    "chars": 1176,
    "preview": "import os\nimport glob\nimport torch\nimport numpy\nimport PIL\nfrom run import estimate\n\ndef main():\n    davis_folder = '/me"
  },
  {
    "path": "3rdparty/run_youtube_flow.py",
    "chars": 1835,
    "preview": "import torch\nimport glob\nimport getopt\nimport math\nimport numpy\nimport os\nimport PIL\nimport PIL.Image\nimport sys\nimport "
  },
  {
    "path": "3rdparty/seg2edge.m",
    "chars": 2202,
    "preview": "% This function takes an input segment and produces binary boundaries.\n% Multi-channel input segments are supported by t"
  },
  {
    "path": "README.md",
    "chars": 5966,
    "preview": "## Motion-Attentive Transition for Zero-Shot Video Object Segmentation\n\n[![PWC](https://img.shields.io/endpoint.svg?url="
  },
  {
    "path": "apply_densecrf_davis.py",
    "chars": 2919,
    "preview": "import pydensecrf.densecrf as dcrf\nimport numpy as np\nimport sys\nimport time\n\nimport os\nfrom tqdm import tqdm\nfrom skima"
  },
  {
    "path": "args.py",
    "chars": 4172,
    "preview": "import argparse\n\n\ndef get_parser():\n    parser = argparse.ArgumentParser(description='MATNet')\n\n    parser.add_argument("
  },
  {
    "path": "data/db_info.yaml",
    "chars": 12687,
    "preview": "attributes: [AC, BC, CS, DB, DEF, EA, FM, HO, IO, LR, MB, OCC, OV, ROT, SC, SV]\nsets:  [train, val, val-dev]\nyears: [201"
  },
  {
    "path": "data/palette.txt",
    "chars": 2775,
    "preview": "0 0 0\n128 0 0\n0 128 0\n128 128 0\n0 0 128\n128 0 128\n0 128 128\n128 128 128\n64 0 0\n191 0 0\n64 128 0\n191 128 0\n64 0 128\n191 0"
  },
  {
    "path": "data/run_davis2017.m",
    "chars": 671,
    "preview": "clc\n\nsrc_dir = 'data/DAVIS2017/Annotations/480p';\nsave_dir = 'data/DAVIS2017/Annotations_edge/480p';\n\nseqs = dir(src_dir"
  },
  {
    "path": "data/run_youtube.m",
    "chars": 703,
    "preview": "clc\n\nsrc_dir = 'data/YouTubeVOS_2018/train/Annotations';\nsave_dir = 'data/YouTubeVOS_2018/train/Annotations_edge';\n\nseqs"
  },
  {
    "path": "data/seg2edge.m",
    "chars": 2202,
    "preview": "% This function takes an input segment and produces binary boundaries.\n% Multi-channel input segments are supported by t"
  },
  {
    "path": "dataloader/base.py",
    "chars": 6864,
    "preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
  },
  {
    "path": "dataloader/base_youtube.py",
    "chars": 9018,
    "preview": "import functools\nimport os.path as osp\n\nimport numpy as np\n\nfrom PIL import Image\nfrom skimage.io import ImageCollection"
  },
  {
    "path": "dataloader/custom_transforms.py",
    "chars": 3587,
    "preview": "import random\nimport cv2\nimport numpy as np\nimport torch\nimport math\n\n\nclass ScaleNRotate(object):\n    \"\"\"Scale (zoom-in"
  },
  {
    "path": "dataloader/dataset_utils.py",
    "chars": 500,
    "preview": "def get_dataset_davis_youtube_ehem(args, split, image_transforms=None,\n                                   target_transfo"
  },
  {
    "path": "dataloader/davis2017.py",
    "chars": 4848,
    "preview": "from __future__ import division\n\nimport torch\nfrom torch.utils import data\n\nimport os\nimport cv2\nimport glob\nimport lmdb"
  },
  {
    "path": "dataloader/davis2017_youtubevos_ehem.py",
    "chars": 9008,
    "preview": "from __future__ import division\n\nfrom torch.utils import data\n\nimport os\nimport cv2\nimport glob\nimport lmdb\nimport numpy"
  },
  {
    "path": "dataloader/hed_bsds.py",
    "chars": 1856,
    "preview": "import os\nfrom PIL import Image\n\nfrom torch.utils import data\n\nclass HEDBSDSTrain(data.Dataset):\n    def __init__(self, "
  },
  {
    "path": "dataloader/youtubeVOS.py",
    "chars": 4300,
    "preview": "#TODO\n\nfrom collections import namedtuple\n\nimport os\nimport cv2\nimport numpy as np\n\nfrom PIL import Image\nfrom .base_you"
  },
  {
    "path": "dataset_lmdb_generator.py",
    "chars": 2109,
    "preview": "# This script generates a data structure in the form of key-value storage. This is made in the huge amount of \n# calls t"
  },
  {
    "path": "measures/__init__.py",
    "chars": 739,
    "preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
  },
  {
    "path": "measures/f_boundary.py",
    "chars": 3534,
    "preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
  },
  {
    "path": "measures/jaccard.py",
    "chars": 1513,
    "preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
  },
  {
    "path": "measures/statistics.py",
    "chars": 857,
    "preview": "import numpy as np\nimport warnings\n\ndef mean(X):\n  \"\"\"\n  Compute average ignoring NaN values.\n  \"\"\"\n\n  return np.nanmean"
  },
  {
    "path": "measures/t_stability.py",
    "chars": 4882,
    "preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
  },
  {
    "path": "misc/__init__.py",
    "chars": 633,
    "preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
  },
  {
    "path": "misc/config.py",
    "chars": 3440,
    "preview": "#!/usr/bin/env python\nimport yaml\nimport os.path as osp\nimport numpy as np\nfrom easydict import EasyDict as edict\n\nfrom "
  },
  {
    "path": "misc/config_youtubeVOS.py",
    "chars": 3998,
    "preview": "#!/usr/bin/env python\nimport json\nimport os.path as osp\nfrom easydict import EasyDict as edict\nfrom enum import Enum\nimp"
  },
  {
    "path": "misc/io_aux.py",
    "chars": 567,
    "preview": "from PIL import Image\nimport numpy as np\n\nfrom misc.config import cfg\n\ndef imread_indexed(filename):\n  \"\"\" Load image gi"
  },
  {
    "path": "misc/logger.py",
    "chars": 653,
    "preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
  },
  {
    "path": "misc/parallel.py",
    "chars": 821,
    "preview": "try:\n    from joblib import Parallel, delayed\nexcept Exception as e:\n    print(e) #TODO\n    # if joblib does not exist j"
  },
  {
    "path": "misc/timer.py",
    "chars": 894,
    "preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
  },
  {
    "path": "misc/visual.py",
    "chars": 1185,
    "preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
  },
  {
    "path": "modules/MATNet.py",
    "chars": 14571,
    "preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torchvision import models\n\n\nclass Encoder(nn.Mod"
  },
  {
    "path": "modules/cbam.py",
    "chars": 3860,
    "preview": "import torch\nimport math\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nclass BasicConv(nn.Module):\n    def __in"
  },
  {
    "path": "requirements.txt",
    "chars": 261,
    "preview": "easydict==1.9\nlmdb==0.94\ntorchvision==0.2.2\nopencv_contrib_python==4.0.0.21\nscipy==1.2.1\nsix==1.12.0\ntorch==1.0.1.post2\n"
  },
  {
    "path": "test_MATNet.py",
    "chars": 3612,
    "preview": "import torch\nfrom torchvision import transforms\n\nimport os\nimport glob\nfrom tqdm import tqdm\nfrom PIL import Image\nfrom "
  },
  {
    "path": "train_MATNet.py",
    "chars": 7391,
    "preview": "import torch\nfrom torch.utils import data\nfrom torchvision import transforms\n\nimport os\nimport sys\nimport time\nimport ra"
  },
  {
    "path": "utils/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "utils/objectives.py",
    "chars": 1394,
    "preview": "import torch\r\nimport torch.nn as nn\r\nimport torch.nn.functional as F\r\n\r\n\r\nclass WeightedBCE2d(nn.Module):\r\n    def __ini"
  },
  {
    "path": "utils/utils.py",
    "chars": 4885,
    "preview": "import torch\r\nimport os\r\nimport pickle\r\nfrom collections import OrderedDict\r\n\r\n\r\ndef make_dir(dir):\r\n    if not os.path."
  }
]

About this extraction

This page contains the full source code of the tfzhou/MATNet GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 52 files (167.4 KB), approximately 48.2k tokens, and a symbol index with 191 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo