[
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\nckpt/\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n"
  },
  {
    "path": ".idea/aaai20-zero-ShotVOS-github.iml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<module type=\"PYTHON_MODULE\" version=\"4\">\n  <component name=\"NewModuleRootManager\">\n    <content url=\"file://$MODULE_DIR$\" />\n    <orderEntry type=\"jdk\" jdkName=\"Python 3.6 (pytorch-1.0.0)\" jdkType=\"Python SDK\" />\n    <orderEntry type=\"sourceFolder\" forTests=\"false\" />\n  </component>\n  <component name=\"TestRunnerService\">\n    <option name=\"PROJECT_TEST_RUNNER\" value=\"Unittests\" />\n  </component>\n</module>"
  },
  {
    "path": ".idea/inspectionProfiles/Project_Default.xml",
    "content": "<component name=\"InspectionProjectProfileManager\">\n  <profile version=\"1.0\">\n    <option name=\"myName\" value=\"Project Default\" />\n    <inspection_tool class=\"PyPackageRequirementsInspection\" enabled=\"true\" level=\"WARNING\" enabled_by_default=\"true\">\n      <option name=\"ignoredPackages\">\n        <value>\n          <list size=\"3\">\n            <item index=\"0\" class=\"java.lang.String\" itemvalue=\"PyYAML\" />\n            <item index=\"1\" class=\"java.lang.String\" itemvalue=\"skimage\" />\n            <item index=\"2\" class=\"java.lang.String\" itemvalue=\"Pillow\" />\n          </list>\n        </value>\n      </option>\n    </inspection_tool>\n  </profile>\n</component>"
  },
  {
    "path": ".idea/misc.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"JavaScriptSettings\">\n    <option name=\"languageLevel\" value=\"ES6\" />\n  </component>\n  <component name=\"ProjectRootManager\" version=\"2\" project-jdk-name=\"Python 3.6 (pytorch-1.0.0)\" project-jdk-type=\"Python SDK\" />\n</project>"
  },
  {
    "path": ".idea/modules.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"ProjectModuleManager\">\n    <modules>\n      <module fileurl=\"file://$PROJECT_DIR$/.idea/aaai20-zero-ShotVOS-github.iml\" filepath=\"$PROJECT_DIR$/.idea/aaai20-zero-ShotVOS-github.iml\" />\n    </modules>\n  </component>\n</project>"
  },
  {
    "path": ".idea/vcs.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"VcsDirectoryMappings\">\n    <mapping directory=\"$PROJECT_DIR$\" vcs=\"Git\" />\n    <mapping directory=\"$PROJECT_DIR$/3rdparty/pytorch-hed\" vcs=\"Git\" />\n    <mapping directory=\"$PROJECT_DIR$/3rdparty/pytorch-pwc\" vcs=\"Git\" />\n  </component>\n</project>"
  },
  {
    "path": ".idea/workspace.xml",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n  <component name=\"ChangeListManager\">\n    <list default=\"true\" id=\"9b6416b8-b073-4819-80b8-0da9065c5674\" name=\"Default Changelist\" comment=\"\">\n      <change afterPath=\"$PROJECT_DIR$/.idea/aaai20-zero-ShotVOS-github.iml\" afterDir=\"false\" />\n      <change afterPath=\"$PROJECT_DIR$/.idea/inspectionProfiles/Project_Default.xml\" afterDir=\"false\" />\n      <change afterPath=\"$PROJECT_DIR$/.idea/misc.xml\" afterDir=\"false\" />\n      <change afterPath=\"$PROJECT_DIR$/.idea/modules.xml\" afterDir=\"false\" />\n      <change afterPath=\"$PROJECT_DIR$/.idea/vcs.xml\" afterDir=\"false\" />\n      <change afterPath=\"$PROJECT_DIR$/dataloader/base.py\" afterDir=\"false\" />\n      <change afterPath=\"$PROJECT_DIR$/dataloader/base_youtube.py\" afterDir=\"false\" />\n      <change afterPath=\"$PROJECT_DIR$/dataloader/custom_transforms.py\" afterDir=\"false\" />\n      <change afterPath=\"$PROJECT_DIR$/dataloader/dataset_utils.py\" afterDir=\"false\" />\n      <change afterPath=\"$PROJECT_DIR$/dataloader/davis2017.py\" afterDir=\"false\" />\n      <change afterPath=\"$PROJECT_DIR$/dataloader/davis2017_youtubevos_ehem.py\" afterDir=\"false\" />\n      <change afterPath=\"$PROJECT_DIR$/dataloader/hed_bsds.py\" afterDir=\"false\" />\n      <change afterPath=\"$PROJECT_DIR$/dataloader/youtubeVOS.py\" afterDir=\"false\" />\n      <change beforePath=\"$PROJECT_DIR$/3rdparty/pytorch-hed/run.py\" beforeDir=\"false\" afterPath=\"$PROJECT_DIR$/3rdparty/pytorch-hed/run.py\" afterDir=\"false\" />\n      <change beforePath=\"$PROJECT_DIR$/3rdparty/pytorch-pwc/run.py\" beforeDir=\"false\" afterPath=\"$PROJECT_DIR$/3rdparty/pytorch-pwc/run.py\" afterDir=\"false\" />\n      <change beforePath=\"$PROJECT_DIR$/README.md\" beforeDir=\"false\" afterPath=\"$PROJECT_DIR$/README.md\" afterDir=\"false\" />\n    </list>\n    <option name=\"EXCLUDED_CONVERTED_TO_IGNORED\" value=\"true\" />\n    <option name=\"SHOW_DIALOG\" value=\"false\" />\n    <option name=\"HIGHLIGHT_CONFLICTS\" value=\"true\" />\n    <option name=\"HIGHLIGHT_NON_ACTIVE_CHANGELIST\" value=\"false\" />\n    <option name=\"LAST_RESOLUTION\" value=\"IGNORE\" />\n  </component>\n  <component name=\"FileEditorManager\">\n    <leaf SIDE_TABS_SIZE_LIMIT_KEY=\"300\">\n      <file pinned=\"false\" current-in-tab=\"true\">\n        <entry file=\"file://$PROJECT_DIR$/README.md\">\n          <provider selected=\"true\" editor-type-id=\"split-provider[text-editor;markdown-preview-editor]\">\n            <state split_layout=\"SPLIT\">\n              <first_editor relative-caret-position=\"126\">\n                <caret line=\"7\" column=\"44\" selection-start-line=\"7\" selection-start-column=\"44\" selection-end-line=\"7\" selection-end-column=\"44\" />\n              </first_editor>\n              <second_editor />\n            </state>\n          </provider>\n        </entry>\n      </file>\n      <file pinned=\"false\" current-in-tab=\"false\">\n        <entry file=\"file://$PROJECT_DIR$/test_MATNet.py\">\n          <provider selected=\"true\" editor-type-id=\"text-editor\">\n            <state relative-caret-position=\"486\">\n              <caret line=\"99\" column=\"32\" selection-start-line=\"99\" selection-start-column=\"32\" selection-end-line=\"99\" selection-end-column=\"32\" />\n              <folding>\n                <element signature=\"e#0#12#0\" expanded=\"true\" />\n              </folding>\n            </state>\n          </provider>\n        </entry>\n      </file>\n      <file pinned=\"false\" current-in-tab=\"false\">\n        <entry file=\"file://$PROJECT_DIR$/dataset_lmdb_generator.py\">\n          <provider selected=\"true\" editor-type-id=\"text-editor\" />\n        </entry>\n      </file>\n    </leaf>\n  </component>\n  <component name=\"Git.Settings\">\n    <option name=\"RECENT_GIT_ROOT_PATH\" value=\"$PROJECT_DIR$\" />\n  </component>\n  <component name=\"IdeDocumentHistory\">\n    <option name=\"CHANGED_PATHS\">\n      <list>\n        <option value=\"$PROJECT_DIR$/train_MANet_Residual_Cascade_SA_Interleave.py\" />\n        <option value=\"$PROJECT_DIR$/apply_densecrf_davis.py\" />\n        <option value=\"$PROJECT_DIR$/utils/objectives.py\" />\n        <option value=\"$PROJECT_DIR$/utils/utils.py\" />\n        <option value=\"$PROJECT_DIR$/misc/visual.py\" />\n        <option value=\"$PROJECT_DIR$/dataloader/davis2017_youtubevos_ehem.py\" />\n        <option value=\"$PROJECT_DIR$/dataloader/dataset_utils.py\" />\n        <option value=\"$PROJECT_DIR$/args.py\" />\n        <option value=\"$PROJECT_DIR$/dataloader/base.py\" />\n        <option value=\"$PROJECT_DIR$/misc/config_youtubeVOS.py\" />\n        <option value=\"$PROJECT_DIR$/misc/config.py\" />\n        <option value=\"$PROJECT_DIR$/modules/MATNet.py\" />\n        <option value=\"$PROJECT_DIR$/measures/jaccard.py\" />\n        <option value=\"$PROJECT_DIR$/train_MATNet.py\" />\n        <option value=\"$PROJECT_DIR$/test_MATNet.py\" />\n        <option value=\"$PROJECT_DIR$/README.md\" />\n      </list>\n    </option>\n  </component>\n  <component name=\"ProjectFrameBounds\" extendedState=\"6\">\n    <option name=\"x\" value=\"65\" />\n    <option name=\"y\" value=\"325\" />\n    <option name=\"width\" value=\"1853\" />\n    <option name=\"height\" value=\"1055\" />\n  </component>\n  <component name=\"ProjectLevelVcsManager\" settingsEditedManually=\"true\" />\n  <component name=\"ProjectView\">\n    <navigator proportions=\"\" version=\"1\">\n      <foldersAlwaysOnTop value=\"true\" />\n    </navigator>\n    <panes>\n      <pane id=\"ProjectPane\">\n        <subPane>\n          <expand>\n            <path>\n              <item name=\"aaai20-zero-ShotVOS-github\" type=\"b2602c69:ProjectViewProjectNode\" />\n              <item name=\"aaai20-zero-ShotVOS-github\" type=\"462c0819:PsiDirectoryNode\" />\n            </path>\n            <path>\n              <item name=\"aaai20-zero-ShotVOS-github\" type=\"b2602c69:ProjectViewProjectNode\" />\n              <item name=\"aaai20-zero-ShotVOS-github\" type=\"462c0819:PsiDirectoryNode\" />\n              <item name=\"utils\" type=\"462c0819:PsiDirectoryNode\" />\n            </path>\n          </expand>\n          <select />\n        </subPane>\n      </pane>\n      <pane id=\"Scope\" />\n    </panes>\n  </component>\n  <component name=\"PropertiesComponent\">\n    <property name=\"SHARE_PROJECT_CONFIGURATION_FILES\" value=\"true\" />\n    <property name=\"WebServerToolWindowFactoryState\" value=\"true\" />\n    <property name=\"last_opened_file_path\" value=\"$PROJECT_DIR$\" />\n    <property name=\"nodejs_interpreter_path.stuck_in_default_project\" value=\"undefined stuck path\" />\n    <property name=\"nodejs_npm_path_reset_for_default_project\" value=\"true\" />\n    <property name=\"settings.editor.selected.configurable\" value=\"com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable\" />\n  </component>\n  <component name=\"RecentsManager\">\n    <key name=\"CopyFile.RECENT_KEYS\">\n      <recent name=\"$PROJECT_DIR$\" />\n    </key>\n  </component>\n  <component name=\"RunDashboard\">\n    <option name=\"ruleStates\">\n      <list>\n        <RuleState>\n          <option name=\"name\" value=\"ConfigurationTypeDashboardGroupingRule\" />\n        </RuleState>\n        <RuleState>\n          <option name=\"name\" value=\"StatusDashboardGroupingRule\" />\n        </RuleState>\n      </list>\n    </option>\n  </component>\n  <component name=\"RunManager\" selected=\"Python.apply_densecrf_davis\">\n    <configuration name=\"apply_densecrf_davis\" type=\"PythonConfigurationType\" factoryName=\"Python\" temporary=\"true\">\n      <module name=\"aaai20-zero-ShotVOS-github\" />\n      <option name=\"INTERPRETER_OPTIONS\" value=\"\" />\n      <option name=\"PARENT_ENVS\" value=\"true\" />\n      <envs>\n        <env name=\"PYTHONUNBUFFERED\" value=\"1\" />\n      </envs>\n      <option name=\"SDK_HOME\" value=\"\" />\n      <option name=\"WORKING_DIRECTORY\" value=\"$PROJECT_DIR$\" />\n      <option name=\"IS_MODULE_SDK\" value=\"true\" />\n      <option name=\"ADD_CONTENT_ROOTS\" value=\"true\" />\n      <option name=\"ADD_SOURCE_ROOTS\" value=\"true\" />\n      <EXTENSION ID=\"PythonCoverageRunConfigurationExtension\" runner=\"coverage.py\" />\n      <option name=\"SCRIPT_NAME\" value=\"$PROJECT_DIR$/apply_densecrf_davis.py\" />\n      <option name=\"PARAMETERS\" value=\"\" />\n      <option name=\"SHOW_COMMAND_LINE\" value=\"false\" />\n      <option name=\"EMULATE_TERMINAL\" value=\"false\" />\n      <option name=\"MODULE_MODE\" value=\"false\" />\n      <option name=\"REDIRECT_INPUT\" value=\"false\" />\n      <option name=\"INPUT_FILE\" value=\"\" />\n      <method v=\"2\" />\n    </configuration>\n    <configuration name=\"test_MATNet\" type=\"PythonConfigurationType\" factoryName=\"Python\" temporary=\"true\">\n      <module name=\"aaai20-zero-ShotVOS-github\" />\n      <option name=\"INTERPRETER_OPTIONS\" value=\"\" />\n      <option name=\"PARENT_ENVS\" value=\"true\" />\n      <envs>\n        <env name=\"PYTHONUNBUFFERED\" value=\"1\" />\n      </envs>\n      <option name=\"SDK_HOME\" value=\"\" />\n      <option name=\"WORKING_DIRECTORY\" value=\"$PROJECT_DIR$\" />\n      <option name=\"IS_MODULE_SDK\" value=\"true\" />\n      <option name=\"ADD_CONTENT_ROOTS\" value=\"true\" />\n      <option name=\"ADD_SOURCE_ROOTS\" value=\"true\" />\n      <EXTENSION ID=\"PythonCoverageRunConfigurationExtension\" runner=\"coverage.py\" />\n      <option name=\"SCRIPT_NAME\" value=\"$PROJECT_DIR$/test_MATNet.py\" />\n      <option name=\"PARAMETERS\" value=\"\" />\n      <option name=\"SHOW_COMMAND_LINE\" value=\"false\" />\n      <option name=\"EMULATE_TERMINAL\" value=\"false\" />\n      <option name=\"MODULE_MODE\" value=\"false\" />\n      <option name=\"REDIRECT_INPUT\" value=\"false\" />\n      <option name=\"INPUT_FILE\" value=\"\" />\n      <method v=\"2\" />\n    </configuration>\n    <recent_temporary>\n      <list>\n        <item itemvalue=\"Python.apply_densecrf_davis\" />\n        <item itemvalue=\"Python.test_MATNet\" />\n      </list>\n    </recent_temporary>\n  </component>\n  <component name=\"SvnConfiguration\">\n    <configuration />\n  </component>\n  <component name=\"TaskManager\">\n    <task active=\"true\" id=\"Default\" summary=\"Default task\">\n      <changelist id=\"9b6416b8-b073-4819-80b8-0da9065c5674\" name=\"Default Changelist\" comment=\"\" />\n      <created>1574574216202</created>\n      <option name=\"number\" value=\"Default\" />\n      <option name=\"presentableId\" value=\"Default\" />\n      <updated>1574574216202</updated>\n      <workItem from=\"1574574222318\" duration=\"8114000\" />\n      <workItem from=\"1574586098553\" duration=\"647000\" />\n      <workItem from=\"1574600105252\" duration=\"1530000\" />\n      <workItem from=\"1574603699909\" duration=\"23031000\" />\n    </task>\n    <servers />\n  </component>\n  <component name=\"TimeTrackingManager\">\n    <option name=\"totallyTimeSpent\" value=\"33322000\" />\n  </component>\n  <component name=\"ToolWindowManager\">\n    <frame x=\"65\" y=\"-4\" width=\"1855\" height=\"1084\" extended-state=\"6\" />\n    <editor active=\"true\" />\n    <layout>\n      <window_info active=\"true\" content_ui=\"combo\" id=\"Project\" order=\"0\" visible=\"true\" weight=\"0.18815717\" />\n      <window_info id=\"Structure\" order=\"1\" side_tool=\"true\" weight=\"0.25\" />\n      <window_info id=\"Favorites\" order=\"2\" side_tool=\"true\" />\n      <window_info anchor=\"bottom\" id=\"Message\" order=\"0\" />\n      <window_info anchor=\"bottom\" id=\"Find\" order=\"1\" />\n      <window_info anchor=\"bottom\" id=\"Run\" order=\"2\" weight=\"0.32959184\" />\n      <window_info anchor=\"bottom\" id=\"Debug\" order=\"3\" weight=\"0.4\" />\n      <window_info anchor=\"bottom\" id=\"Cvs\" order=\"4\" weight=\"0.25\" />\n      <window_info anchor=\"bottom\" id=\"Inspection\" order=\"5\" weight=\"0.4\" />\n      <window_info anchor=\"bottom\" id=\"TODO\" order=\"6\" />\n      <window_info anchor=\"bottom\" id=\"Docker\" order=\"7\" show_stripe_button=\"false\" />\n      <window_info anchor=\"bottom\" id=\"Version Control\" order=\"8\" />\n      <window_info anchor=\"bottom\" id=\"Database Changes\" order=\"9\" />\n      <window_info anchor=\"bottom\" id=\"Event Log\" order=\"10\" side_tool=\"true\" visible=\"true\" weight=\"0.22040816\" />\n      <window_info anchor=\"bottom\" id=\"Terminal\" order=\"11\" />\n      <window_info anchor=\"bottom\" id=\"Python Console\" order=\"12\" />\n      <window_info anchor=\"right\" id=\"Commander\" internal_type=\"SLIDING\" order=\"0\" type=\"SLIDING\" weight=\"0.4\" />\n      <window_info anchor=\"right\" id=\"Ant Build\" order=\"1\" weight=\"0.25\" />\n      <window_info anchor=\"right\" content_ui=\"combo\" id=\"Hierarchy\" order=\"2\" weight=\"0.25\" />\n      <window_info anchor=\"right\" id=\"Remote Host\" order=\"3\" />\n      <window_info anchor=\"right\" id=\"SciView\" order=\"4\" />\n      <window_info anchor=\"right\" id=\"Database\" order=\"5\" />\n    </layout>\n  </component>\n  <component name=\"TypeScriptGeneratedFilesManager\">\n    <option name=\"version\" value=\"1\" />\n  </component>\n  <component name=\"com.intellij.coverage.CoverageDataManagerImpl\">\n    <SUITE FILE_PATH=\"coverage/aaai20_zero_ShotVOS_github$test_MATNet.coverage\" NAME=\"test_MATNet Coverage Results\" MODIFIED=\"1574582203491\" SOURCE_PROVIDER=\"com.intellij.coverage.DefaultCoverageFileProvider\" RUNNER=\"coverage.py\" COVERAGE_BY_TEST_ENABLED=\"true\" COVERAGE_TRACING_ENABLED=\"false\" WORKING_DIRECTORY=\"$PROJECT_DIR$\" />\n    <SUITE FILE_PATH=\"coverage/aaai20_zero_ShotVOS_github$apply_densecrf_davis.coverage\" NAME=\"apply_densecrf_davis Coverage Results\" MODIFIED=\"1574582766918\" SOURCE_PROVIDER=\"com.intellij.coverage.DefaultCoverageFileProvider\" RUNNER=\"coverage.py\" COVERAGE_BY_TEST_ENABLED=\"true\" COVERAGE_TRACING_ENABLED=\"false\" WORKING_DIRECTORY=\"$PROJECT_DIR$\" />\n  </component>\n  <component name=\"editorHistoryManager\">\n    <entry file=\"file://$PROJECT_DIR$/modules/MANet_Residual_Cascade_SA.py\" />\n    <entry file=\"file://$PROJECT_DIR$/modules/MANet.py\" />\n    <entry file=\"file://$PROJECT_DIR$/modules/MANet_Residual_Cascade_SA_Interleave_1layers.py\" />\n    <entry file=\"file://$PROJECT_DIR$/modules/seed.py\" />\n    <entry file=\"file://$PROJECT_DIR$/utils/hungarian.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"288\">\n          <caret line=\"16\" column=\"23\" lean-forward=\"true\" selection-start-line=\"16\" selection-start-column=\"23\" selection-end-line=\"16\" selection-end-column=\"23\" />\n          <folding>\n            <element signature=\"e#0#12#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/misc/parallel.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\" />\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/measures/statistics.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state>\n          <folding>\n            <element signature=\"e#0#18#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/dataloader/davis2017_ehem.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state>\n          <folding>\n            <element signature=\"e#0#31#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/dataloader/dataset_utils.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"162\">\n          <caret line=\"9\" column=\"4\" selection-start-line=\"9\" selection-start-column=\"4\" selection-end-line=\"9\" selection-end-column=\"4\" />\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/dataloader/davis2017.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"-342\">\n          <caret line=\"20\" column=\"21\" selection-start-line=\"20\" selection-start-column=\"21\" selection-end-line=\"20\" selection-end-column=\"21\" />\n          <folding>\n            <element signature=\"e#0#31#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/dataloader/run_davis2017.m\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\" />\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/dataloader/run_youtube.m\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\" />\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/dataloader/hed_bsds.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state>\n          <folding>\n            <element signature=\"e#0#9#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/dataloader/youtubeVOS.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\" />\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/dataloader/base_youtube.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state>\n          <folding>\n            <element signature=\"e#0#16#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/dataloader/base.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"414\">\n          <caret line=\"23\" column=\"30\" selection-start-line=\"23\" selection-start-column=\"30\" selection-end-line=\"23\" selection-end-column=\"30\" />\n          <folding>\n            <element signature=\"e#451#467#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/misc/io_aux.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"144\">\n          <caret line=\"11\" column=\"41\" selection-start-line=\"11\" selection-start-column=\"41\" selection-end-line=\"11\" selection-end-column=\"41\" />\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/misc/config.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"280\">\n          <caret line=\"53\" column=\"30\" selection-start-line=\"53\" selection-start-column=\"30\" selection-end-line=\"53\" selection-end-column=\"30\" />\n          <folding>\n            <element signature=\"e#22#33#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/misc/config_youtubeVOS.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"820\">\n          <caret line=\"65\" column=\"60\" selection-start-line=\"65\" selection-start-column=\"60\" selection-end-line=\"65\" selection-end-column=\"60\" />\n          <folding>\n            <element signature=\"e#22#33#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/misc/logger.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\" />\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/modules/MATNet.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"756\">\n          <caret line=\"261\" column=\"47\" lean-forward=\"true\" selection-start-line=\"261\" selection-start-column=\"47\" selection-end-line=\"261\" selection-end-column=\"47\" />\n          <folding>\n            <element signature=\"e#0#12#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/utils/__init__.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\" />\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/modules/cbam.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"198\">\n          <caret line=\"11\" column=\"21\" selection-start-line=\"11\" selection-start-column=\"21\" selection-end-line=\"11\" selection-end-column=\"21\" />\n          <folding>\n            <element signature=\"e#0#12#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/utils/objectives.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"162\">\n          <caret line=\"9\" column=\"25\" selection-start-line=\"9\" selection-start-column=\"25\" selection-end-line=\"9\" selection-end-column=\"25\" />\n          <folding>\n            <element signature=\"e#0#12#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/misc/visual.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"198\">\n          <caret line=\"12\" column=\"41\" selection-start-line=\"12\" selection-start-column=\"41\" selection-end-line=\"12\" selection-end-column=\"41\" />\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/misc/timer.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\" />\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/measures/t_stability.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\" />\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/measures/f_boundary.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\" />\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/utils/utils.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"72\">\n          <caret line=\"4\" lean-forward=\"true\" selection-start-line=\"4\" selection-end-line=\"4\" />\n          <folding>\n            <element signature=\"e#0#12#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$USER_HOME$/anaconda2/envs/pytorch-1.0.0/lib/python3.6/site-packages/torch/utils/data/dataloader.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"2044\">\n          <caret line=\"767\" column=\"8\" selection-start-line=\"767\" selection-start-column=\"8\" selection-end-line=\"767\" selection-end-column=\"8\" />\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/train_MATNet.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"591\">\n          <caret line=\"145\" column=\"20\" selection-start-line=\"145\" selection-start-column=\"20\" selection-end-line=\"145\" selection-end-column=\"20\" />\n          <folding>\n            <element signature=\"e#0#12#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/dataloader/davis2017_youtubevos_ehem.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"-116\">\n          <caret line=\"193\" column=\"56\" selection-start-line=\"193\" selection-start-column=\"56\" selection-end-line=\"193\" selection-end-column=\"56\" />\n          <folding>\n            <element signature=\"e#0#31#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/measures/jaccard.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"564\">\n          <caret line=\"42\" selection-start-line=\"42\" selection-end-line=\"42\" />\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/apply_densecrf_davis.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"144\">\n          <caret line=\"8\" column=\"29\" lean-forward=\"true\" selection-start-line=\"8\" selection-start-column=\"29\" selection-end-line=\"8\" selection-end-column=\"29\" />\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/args.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"144\">\n          <caret line=\"8\" column=\"54\" selection-start-line=\"8\" selection-start-column=\"54\" selection-end-line=\"8\" selection-end-column=\"54\" />\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/test_MATNet.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\">\n        <state relative-caret-position=\"486\">\n          <caret line=\"99\" column=\"32\" selection-start-line=\"99\" selection-start-column=\"32\" selection-end-line=\"99\" selection-end-column=\"32\" />\n          <folding>\n            <element signature=\"e#0#12#0\" expanded=\"true\" />\n          </folding>\n        </state>\n      </provider>\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/dataset_lmdb_generator.py\">\n      <provider selected=\"true\" editor-type-id=\"text-editor\" />\n    </entry>\n    <entry file=\"file://$PROJECT_DIR$/README.md\">\n      <provider selected=\"true\" editor-type-id=\"split-provider[text-editor;markdown-preview-editor]\">\n        <state split_layout=\"SPLIT\">\n          <first_editor relative-caret-position=\"126\">\n            <caret line=\"7\" column=\"44\" selection-start-line=\"7\" selection-start-column=\"44\" selection-end-line=\"7\" selection-end-column=\"44\" />\n          </first_editor>\n          <second_editor />\n        </state>\n      </provider>\n    </entry>\n  </component>\n</project>"
  },
  {
    "path": "3rdparty/run_davis.py",
    "content": "import os\nimport glob\nimport torch\nimport numpy\nimport PIL\nfrom run import estimate\n\ndef main():\n    davis_folder = '/media/iiai/data/VOS/DAVIS2017/JPEGImages/480p'\n    save_dir = '/media/iiai/data/VOS/DAVIS2017/davis2017-hed'\n\n    videos = os.listdir(davis_folder)\n    print(videos)\n\n    for idx, video in enumerate(videos):\n        print('process {}[{}/{}]'.format(video, idx, len(videos)))\n        save_dir_video = os.path.join(save_dir, video)\n        if not os.path.exists(save_dir_video):\n            os.makedirs(save_dir_video)\n\n        imagefiles = sorted(glob.glob(os.path.join(davis_folder, video, '*.jpg')))\n\n        for imagefile in imagefiles:\n            tensorInput = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))\n\n            tensorOutput = estimate(tensorInput)\n\n            save_name = os.path.basename(imagefile)\n            save_file = os.path.join(save_dir_video, save_name)\n            PIL.Image.fromarray(\n                (tensorOutput.clamp(0.0, 1.0).numpy().transpose(1, 2, 0)[:, :, 0] * 255.0).astype(numpy.uint8)).save(\n                save_file)\n\nmain()\n"
  },
  {
    "path": "3rdparty/run_davis2017.m",
    "content": "clc\n\nsave_dir = '/media/iiai/data/VOS/DAVIS2017/Annotations_edge/480p';\n\n\nsrc_dir = '/media/iiai/data/VOS/DAVIS2017/Annotations/480p';\n\nseqs = dir(src_dir);\nseqs = {seqs.name};\nseqs = seqs(3:end);\n\nfor i = 1 : length(seqs)\n    seq_path = [src_dir, '/', seqs{i}];\n    images = dir([seq_path, '/*.png']);\n    images = {images.name};\n    \n    for j = 1 : length(images)\n        imagefile = fullfile(seq_path, images{j});\n        im = imread(imagefile);\n        \n        res = seg2edge(im, 2, [], 'regular');\n        \n        save_path = fullfile(save_dir, seqs{i});\n        if ~exist(save_path, 'dir')\n            mkdir(save_path)\n        end\n        imwrite(res, fullfile(save_path, images{j}))\n    end\nend"
  },
  {
    "path": "3rdparty/run_davis_flow.py",
    "content": "import torch\nimport glob\nimport getopt\nimport math\nimport numpy\nimport os\nimport PIL\nimport PIL.Image\nimport sys\nfrom run import estimate\nimport flow_vis, cv2 \n\ndef main():\n    davis_folder = '/home/tianfei/dataset/DAVIS2017/JPEGImages/480p'\n    save_dir = './davis2017'\n\n    videos = os.listdir(davis_folder)\n\n    for idx, video in enumerate(videos):\n        print('process {}[{}/{}]'.format(video, idx, len(videos)))\n        save_dir_video = os.path.join(save_dir, video)\n        if not os.path.exists(save_dir_video):\n            os.makedirs(save_dir_video)\n\n        imagefiles = sorted(glob.glob(os.path.join(davis_folder, video, '*.jpg')))\n\n        for i in range(len(imagefiles)-1):\n            f1 = imagefiles[i]\n            f2 = imagefiles[i+1]\n\n            save_name = os.path.basename(f1)[:-4] + '_' + os.path.basename(f2)[:-4] + '.png'\n            save_file = os.path.join(save_dir_video, save_name)\n            run(f1, f2, save_file)\n\n\ndef run(imagefile1, imagefile2, save_file):\n\ttensorFirst = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile1))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))\n\ttensorSecond = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile2))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))\n\n\ttensorOutput = estimate(tensorFirst, tensorSecond)\n\n\tflow_color = flow_vis.flow_to_color(tensorOutput.numpy().transpose(1,2,0), convert_to_bgr=True)\n\tcv2.imwrite(save_file, flow_color)\n\n\t#objectOutput = open(save_file, 'wb')\n\n\t#numpy.array([ 80, 73, 69, 72 ], numpy.uint8).tofile(objectOutput)\n\t#numpy.array([ tensorOutput.size(2), tensorOutput.size(1) ], numpy.int32).tofile(objectOutput)\n\t#numpy.array(tensorOutput.numpy().transpose(1, 2, 0), numpy.float32).tofile(objectOutput)\n\n\t#objectOutput.close()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "3rdparty/run_youtube.m",
    "content": "clc\n\nsave_dir = '/media/tfzhou/Yi Passport/YouTubeVOS_2018/train/Annotations_edge';\n\n\nsrc_dir = '/media/tfzhou/Yi Passport/YouTubeVOS_2018/train/Annotations';\n\nseqs = dir(src_dir);\nseqs = {seqs.name};\nseqs = seqs(3:end);\n\nfor i = 1 : length(seqs)\n    disp(seqs{i})\n    seq_path = [src_dir, '/', seqs{i}];\n    images = dir([seq_path, '/*.png']);\n    images = {images.name};\n    \n    for j = 1 : length(images)\n        imagefile = fullfile(seq_path, images{j});\n        im = imread(imagefile);\n        \n        res = seg2edge(im, 2, [], 'regular');\n        \n        save_path = fullfile(save_dir, seqs{i});\n        if ~exist(save_path, 'dir')\n            mkdir(save_path)\n        end\n        imwrite(res, fullfile(save_path, images{j}))\n    end\nend"
  },
  {
    "path": "3rdparty/run_youtube.py",
    "content": "import os\nimport glob\nimport torch\nimport numpy\nimport PIL\nfrom run import estimate\n\ndef main():\n    davis_folder = '/media/tfzhou/Yi Passport/YouTubeVOS_2018/train/JPEGImages'\n    save_dir = '/media/tfzhou/Yi Passport/YouTubeVOS_2018-hed'\n\n    videos = os.listdir(davis_folder)\n    print(videos)\n\n    for idx, video in enumerate(videos):\n        print('process {}[{}/{}]'.format(video, idx, len(videos)))\n        save_dir_video = os.path.join(save_dir, video)\n        if not os.path.exists(save_dir_video):\n            os.makedirs(save_dir_video)\n\n        imagefiles = sorted(glob.glob(os.path.join(davis_folder, video, '*.jpg')))\n\n        for imagefile in imagefiles:\n            tensorInput = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))\n\n            tensorOutput = estimate(tensorInput)\n\n            save_name = os.path.basename(imagefile)\n            save_file = os.path.join(save_dir_video, save_name)\n            PIL.Image.fromarray(\n                (tensorOutput.clamp(0.0, 1.0).numpy().transpose(1, 2, 0)[:, :, 0] * 255.0).astype(numpy.uint8)).save(\n                save_file)\n\nmain()\n"
  },
  {
    "path": "3rdparty/run_youtube_flow.py",
    "content": "import torch\nimport glob\nimport getopt\nimport math\nimport numpy\nimport os\nimport PIL\nimport PIL.Image\nimport sys\nimport flow_vis\nfrom run import estimate\nimport cv2\n\ndef main():\n    davis_folder = '/home/tianfei/dataset/YouTubeVOS/train/JPEGImages/'\n    save_dir = './youtubevos'\n\n    videos = os.listdir(davis_folder)\n\n    for idx, video in enumerate(videos):\n        print('process {}[{}/{}]'.format(video, idx, len(videos)))\n        save_dir_video = os.path.join(save_dir, video)\n        if not os.path.exists(save_dir_video):\n            os.makedirs(save_dir_video)\n\n        imagefiles = sorted(glob.glob(os.path.join(davis_folder, video, '*.jpg')))\n\n        for i in range(len(imagefiles)-1):\n            f1 = imagefiles[i]\n            f2 = imagefiles[i+1]\n\n            save_name = os.path.basename(f1)[:-4] + '_' + os.path.basename(f2)[:-4] + '.png'\n            save_file = os.path.join(save_dir_video, save_name)\n            run(f1, f2, save_file)\n\n\ndef run(imagefile1, imagefile2, save_file):\n\ttensorFirst = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile1))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))\n\ttensorSecond = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile2))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))\n\n\ttensorOutput = estimate(tensorFirst, tensorSecond)\n\n\t#objectOutput = open(save_file, 'wb')\n\n\tflow_color = flow_vis.flow_to_color(tensorOutput.numpy().transpose(1,2,0), convert_to_bgr=True)\n\tcv2.imwrite(save_file, flow_color)\n\n\t#numpy.array([ 80, 73, 69, 72 ], numpy.uint8).tofile(objectOutput)\n\t#numpy.array([ tensorOutput.size(2), tensorOutput.size(1) ], numpy.int32).tofile(objectOutput)\n\t#numpy.array(tensorOutput.numpy().transpose(1, 2, 0), numpy.float32).tofile(objectOutput)\n\n\t#objectOutput.close()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "3rdparty/seg2edge.m",
    "content": "% This function takes an input segment and produces binary boundaries.\n% Multi-channel input segments are supported by the function.\nfunction [idxEdge] = seg2edge(seg, radius, labelIgnore, edge_type)\n% Get dimensions\n[height, width, chn] = size(seg);\nif(~isempty(labelIgnore))\n    if(chn~=size(labelIgnore, 2))\n        error('Channel dimension not matching ignored label dimension!')\n    end\nend\n\n% Set the considered neighborhood.\nradius_search = max(ceil(radius), 1);\n[X, Y] = meshgrid(1:width, 1:height);\n[x, y] = meshgrid(-radius_search:radius_search, -radius_search:radius_search);\n\n% Columnize everything\nX = X(:); Y = Y(:);\nx = x(:); y = y(:);\nif(chn == 1)\n    seg = seg(:);\nelse\n    seg = reshape(seg, [height*width chn]);\nend\n\n% Build circular neighborhood\nidxNeigh = sqrt(x.^2 + y.^2) <= radius;\nx = x(idxNeigh); y = y(idxNeigh);\nnumPxlImg = length(X);\nnumPxlNeigh = length(x);\n\n% Compute Gaussian weight\nidxEdge = false(numPxlImg, 1);\nfor i = 1:numPxlNeigh\n    XNeigh = X+x(i);\n    YNeigh = Y+y(i);\n    idxValid = find( XNeigh >= 1 & XNeigh <= width & YNeigh >=1 & YNeigh <= height );\n    \n    XCenter = X(idxValid);\n    YCenter = Y(idxValid);\n    XNeigh = XNeigh(idxValid);\n    YNeigh = YNeigh(idxValid);\n    LCenter = seg(sub2ind([height width], YCenter, XCenter), :);\n    LNeigh = seg(sub2ind([height width], YNeigh, XNeigh), :);\n    \n    if(strcmp(edge_type, 'regular'))\n        idxDiff = find(any(LCenter~=LNeigh, 2));\n    elseif(strcmp(edge_type, 'inner'))\n        idxDiff = find(any(LCenter~=LNeigh, 2) & any(LCenter~=0, 2) & all(LNeigh==0, 2) );\n    elseif(strcmp(edge_type, 'outer'))\n        idxDiff = find(any(LCenter~=LNeigh, 2) & all(LCenter==0, 2) & any(LNeigh~=0, 2) );\n    else\n        error('Wrong edge type input!');\n    end    \n    \n    LCenterEdge = LCenter(idxDiff, :);\n    LNeighEdge = LNeigh(idxDiff, :);\n    idxIgnore2 = false(length(idxDiff), 1);\n    for j = 1:size(labelIgnore, 1)\n        idxIgnore2 = idxIgnore2 | ( all(bsxfun(@eq, LCenterEdge, labelIgnore(j, :)), 2) | all(bsxfun(@eq, LNeighEdge, labelIgnore(j, :)), 2) );\n    end\n    \n    idxDiffGT = idxDiff(~idxIgnore2);\n    idxEdge(idxValid(idxDiffGT)) = true;\nend\nidxEdge = reshape(idxEdge, [height, width]);\n"
  },
  {
    "path": "README.md",
    "content": "## Motion-Attentive Transition for Zero-Shot Video Object Segmentation\n\n[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/motion-attentive-transition-for-zero-shot/unsupervised-video-object-segmentation-on)](https://paperswithcode.com/sota/unsupervised-video-object-segmentation-on?p=motion-attentive-transition-for-zero-shot)\n\n[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/matnet-motion-attentive-transition-network/unsupervised-video-object-segmentation-on-4)](https://paperswithcode.com/sota/unsupervised-video-object-segmentation-on-4?p=matnet-motion-attentive-transition-network)\n\n> UPDATES:<br>\n> - [2021/04/17] Our MATNet achieves state-of-the-art results (__64.2__ in terms of _Mean J_) on the [MoCA](https://drive.google.com/drive/folders/1x-owzr9Voz65NQghrN_H1LEYDaaQP5n1?usp=sharing) dataset in \"Self-supervised Video Object Segmentation by Motion Grouping\" by Charig Yang, Hala Lamdouar, Erika Lu, Andrew Zisserman, Weidi Xie. Thanks [Charig Yang](https://charigyang.github.io/) for providing the segmentation results [Google Drive](https://drive.google.com/drive/folders/1x-owzr9Voz65NQghrN_H1LEYDaaQP5n1?usp=sharing).\n> - [2020/06/15] Update results for DAVIS-17 test-dev set!\n> - [2020/03/04] Update results for DAVIS-17 validation set!\n> - [2019/11/17] Codes released!\n\nThis is a PyTorch implementation of our MATNet for unsupervised video object segmentation.\n\n**Motion-Attentive Transition for Zero-Shot Video Object Segmentation.** [[Arxiv](https://arxiv.org/abs/2003.04253)] [[TIP](https://ieeexplore.ieee.org/document/9165947)]\n\n## Prerequisites\n\nThe training and testing experiments are conducted using PyTorch 1.0.1 with a single GeForce RTX 2080Ti GPU with 11GB Memory.\n- [PyTorch 1.0.1](https://github.com/pytorch/pytorch)\n                   \nOther minor Python modules can be installed by running\n\n```bash\npip install -r requirements.txt\n```\n\n## Train\n\n### Clone\n```git clone --recursive https://github.com/tfzhou/MATNet.git```\n\n### Download Datasets\nIn the paper, we use the following two public available dataset for training. Here are some steps to prepare the data:\n- [DAVIS-17](https://davischallenge.org/davis2017/code.html): we use all the data in the train subset of DAVIS-16. \n    However, please download DAVIS-17 to fit the code. It will automatically choose the subset of DAVIS-16 for training. \n- [YoutubeVOS-2018](https://youtube-vos.org/dataset/): we sample the training data every 10 frames in YoutubeVOS-2018. We use the dataset version with 6fps rather than 30fps.\n- Create soft links:\n\n    ```cd data; ln -s your/davis17/path DAVIS2017; ln -s your/youtubevos/path YouTubeVOS_2018;```\n    \n### Prepare Edge Annotations\nI have provided some matlab scripts to generate edge annotations from mask. Please run ```data/run_davis2017.m``` \nand ```data/run_youtube.m```.\n\n### Prepare HED Results\nI have provided the pytorch codes to generate HED results for the two datasets (see ```3rdparty/pytorch-hed```).\nPlease run ```run_davis.py``` and ```run_youtube.py```. \n\nThe codes are borrowed from https://github.com/sniklaus/pytorch-hed. \n\n### Prepare Optical Flow\nI have provided the pytorch codes to generate optical flow results for the two datasets (see ```3rdparty/pytorch-pwc```).\nPlease run ```run_davis_flow.py``` and ```run_youtubevos_flow.py```. \n\nThe codes are borrowed from https://github.com/sniklaus/pytorch-pwc. \nPlease follow the [setup](https://github.com/sniklaus/pytorch-pwc#setup) section to install ```cupy```. \n\n`warning: Total size of optical flow results of Youtube-VOS is more than 30GB.`\n\n### Train\nOnce all data is prepared, please run ```python train_MATNet.py``` for training.\n\n## Test\n1. Run ```python test_MATNet.py``` to obtain the saliency results on DAVIS-16 val set.\n2. Run ```python apply_densecrf_davis.py``` for binary segmentation results.\n\n\n## Segmentation Results\n\n1. The segmentation results on DAVIS-16 and Youtube-objects can be downloaded from [Google Drive](https://drive.google.com/file/d/1d23TGBtrr11g8KFAStwewTyxLq2nX4PT/view?usp=sharing).\n2. The segmentation results on DAVIS-17 __val__ can be downloaded from [Google Drive](https://drive.google.com/open?id=1GTqjWc7tktw92tBNKln2eFmb9WzdcVrz). We achieved __58.6__ in terms of _Mean J&F_.\n3. The segmentation results on DAVIS-17 __test-dev__ can be downloaded from [Google Drive](https://drive.google.com/file/d/1Ood-rr0d4YRFSrGGh6yVpYvOvE_h0tVK/view?usp=sharing). We achieved __59.8__ in terms of _Mean J&F_. The method also achieved the second place in DAVIS-20 unsupervised object segmentation challenge. Please refer to [paper](https://davischallenge.org/challenge2020/papers/DAVIS-Unsupervised-Challenge-2nd-Team.pdf) for more details of our challenge solution.\n\n## Pretrained Models\n\nThe pre-trained model can be downloaded from [Google Drive](https://drive.google.com/file/d/1XlenYXgQjoThgRUbffCUEADS6kE4lvV_/view?usp=sharing).\n\n## Citation\nIf you find MATNet useful for your research, please consider citing the following papers:\n```\n@inproceedings{zhou2020motion,\n  title={Motion-Attentive Transition for Zero-Shot Video Object Segmentation},\n  author={Zhou, Tianfei and Wang, Shunzhou and Zhou, Yi and Yao, Yazhou and Li, Jianwu and Shao, Ling},\n  booktitle={Proceedings of the 34th AAAI Conference on Artificial Intelligence (AAAI)},\n  year={2020},\n  pages={13066--13073}\n}\n\n@article{zhou2020matnet,\n  title={MATNet: Motion-Attentive Transition Network for Zero-Shot Video Object Segmentation},\n  author={Zhou, Tianfei and Li, Jianwu and Wang, Shunzhou and Tao, Ran and Shen, Jianbing},\n  journal={IEEE Transactions on Image Processing},\n  volume={29},\n  pages={8326-8338},\n  year={2020}\n}\n\n@inproceedings{zhou2021unsupervised,\n  author = {Zhou, Tianfei and Li, Jianwu and Li, Xueyi and Shao, Ling},\n  title = {Target-Aware Object Discovery and Association for Unsupervised Video Multi-Object Segmentation},\n  booktitle = {CVPR},\n  year = {2021}\n}\n```\n\n\n"
  },
  {
    "path": "apply_densecrf_davis.py",
    "content": "import pydensecrf.densecrf as dcrf\nimport numpy as np\nimport sys\nimport time\n\nimport os\nfrom tqdm import tqdm\nfrom skimage.io import imread, imsave\nfrom pydensecrf.utils import unary_from_labels, create_pairwise_bilateral,\\\n    create_pairwise_gaussian, unary_from_softmax\n\nfrom os import listdir, makedirs\nfrom os.path import isfile, join\n\ndef sigmoid(x):\n    return 1 / (1 + np.exp(-x))\n\nimage_dir = 'data/DAVIS2017/JPEGImages/480p'\ndavis_result_dir = 'output/davis16'\nmodel_name = 'MATNet_epoch0' # specify the folder name of saliency results\nmask_dir = os.path.join(davis_result_dir, model_name)\nsave_dir = join(davis_result_dir, model_name + '_crf')\n\nfor seq in tqdm(listdir(mask_dir)):\n    seq_dir = join(image_dir, seq)\n    seq_mask_dir = join(mask_dir, seq)\n    res_dir = join(save_dir, seq)\n    if not os.path.exists(res_dir):\n        os.makedirs(res_dir)\n\n    for f in listdir(seq_mask_dir):\n\n        frameName = f[:-4]\n\n        image = imread(join(seq_dir, f[:-4] + '.jpg'))\n        mask = imread(join(seq_mask_dir, f))\n\n        H, W = mask.shape\n\n        min_val = np.min(mask.ravel())\n        max_val = np.max(mask.ravel())\n        out = (mask.astype('float') - min_val) / (max_val - min_val)\n        labels = np.zeros((2, image.shape[0], image.shape[1]))\n        labels[1, :, :] = out\n        labels[0, :, :] = 1 - out\n\n        tau = 1.05\n        EPSILON = 1e-8\n        anno_norm = mask / 255\n        n_energy = -np.log((1.0 - anno_norm + EPSILON)) / (tau * sigmoid(1 - anno_norm))\n        p_energy = -np.log(anno_norm + EPSILON) / (tau * sigmoid(anno_norm))\n        labels[1, :, :] = n_energy\n        labels[0, :, :] = p_energy\n\n        colors = [0, 255]\n        colorize = np.empty((len(colors), 1), np.uint8)\n        colorize[:, 0] = colors\n\n        n_labels = 2\n\n        crf = dcrf.DenseCRF(image.shape[1] * image.shape[0], n_labels)\n\n        U = unary_from_softmax(labels)\n        crf.setUnaryEnergy(U)\n\n        feats = create_pairwise_gaussian(sdims=(3, 3), shape=image.shape[:2])\n        crf.addPairwiseEnergy(feats, compat=3,\n                              kernel=dcrf.DIAG_KERNEL,\n                              normalization=dcrf.NORMALIZE_SYMMETRIC)\n\n        feats = create_pairwise_bilateral(sdims=(30, 30), schan=(5, 5, 5),\n                                          img=image, chdim=2)\n        crf.addPairwiseEnergy(feats, compat=5,\n                              kernel=dcrf.DIAG_KERNEL,\n                              normalization=dcrf.NORMALIZE_SYMMETRIC)\n\n        Q, tmp1, tmp2 = crf.startInference()\n        for i in range(5):\n            temp = crf.klDivergence(Q)\n            crf.stepInference(Q, tmp1, tmp2)\n            if abs(crf.klDivergence(Q)-temp) < 500:\n                break\n\n        MAP = np.argmax(Q, axis=0)\n        MAP = colorize[MAP]\n\n        imsave(res_dir + '/' + frameName + '.png', MAP.reshape(mask.shape))\n        #print(\"Saving: \" + res_dir + '/' + frameName + '.png')\n"
  },
  {
    "path": "args.py",
    "content": "import argparse\n\n\ndef get_parser():\n    parser = argparse.ArgumentParser(description='MATNet')\n\n    parser.add_argument('-year', dest='year', default='2017')\n    parser.add_argument('-imsize', dest='imsize', default=480, type=int)\n    parser.add_argument('-batch_size', dest='batch_size', default=4, type=int)\n    parser.add_argument('-num_workers', dest='num_workers', default=4,\n                        type=int)\n\n    ## TRAINING parameters ##\n    parser.add_argument('--resume', dest='resume', action='store_true',\n                        help=('whether to resume training an existing model'\n                              '(the one with name model_name will be used)'))\n    parser.set_defaults(resume=False)\n    parser.add_argument('-seed', dest='seed', default=123, type=int)\n    parser.add_argument('-gpu_id', dest='gpu_id', default=0, type=int)\n    parser.add_argument('-lr', dest='lr', default=1e-3, type=float)\n    parser.add_argument('-lr_cnn', dest='lr_cnn', default=1e-4, type=float)\n    parser.add_argument('-optim_cnn', dest='optim_cnn', default='sgd',\n                        choices=['adam', 'sgd', 'rmsprop'])\n    parser.add_argument('-momentum', dest='momentum', default=0.9, type=float)\n    parser.add_argument('-weight_decay', dest='weight_decay', default=5e-4,\n                        type=float)\n    parser.add_argument('-weight_decay_cnn', dest='weight_decay_cnn',\n                        default=5e-4, type=float)\n    parser.add_argument('-optim', dest='optim', default='sgd',\n                        choices=['adam', 'sgd', 'rmsprop'])\n    parser.add_argument('--crop', dest='crop', action='store_true')\n    parser.set_defaults(crop=False)\n\n    parser.add_argument('--update_encoder', dest='update_encoder',\n                        action='store_true',\n                        help='used in sync with finetune_after.'\n                             ' no need to activate.')\n    parser.set_defaults(update_encoder=True)\n\n    parser.add_argument('-max_epoch', dest='max_epoch', default=20, type=int)\n\n    # visualization and logging\n    parser.add_argument('-print_every', dest='print_every', default=10,\n                        type=int)\n\n    # loss weights\n    parser.add_argument('-iou_weight', dest='iou_weight', default=1.0,\n                        type=float)\n    # augmentation\n    parser.add_argument('--augment', dest='augment', action='store_true')\n    parser.set_defaults(augment=True)\n    parser.add_argument('-rotation', dest='rotation', default=10, type=int)\n    parser.add_argument('-translation', dest='translation', default=0.1,\n                        type=float)\n    parser.add_argument('-shear', dest='shear', default=0.1, type=float)\n    parser.add_argument('-zoom', dest='zoom', default=0.7, type=float)\n\n    # GPU\n    parser.add_argument('--cpu', dest='use_gpu', action='store_false')\n    parser.set_defaults(use_gpu=True)\n    parser.add_argument('-ngpus', dest='ngpus', default=1, type=int)\n\n    parser.add_argument('-model_name', dest='model_name', default='model')\n    parser.add_argument('-log_file', dest='log_file', default='train.log')\n\n    # dataset parameters\n    parser.add_argument('--resize', dest='resize', action='store_true')\n    parser.set_defaults(resize=False)\n    parser.add_argument('-dataset', dest='dataset', default='davis2017',\n                        choices=['davis2017', 'youtube'])\n\n    # testing\n    parser.add_argument('-eval_split', dest='eval_split', default='test')\n    parser.add_argument('-mask_th', dest='mask_th', default=0.5, type=float)\n    parser.add_argument('-max_dets', dest='max_dets', default=100, type=int)\n    parser.add_argument('-min_size', dest='min_size', default=0.001,\n                        type=float)\n    parser.add_argument('--display', dest='display', action='store_true')\n    parser.add_argument('--no_display_text', dest='no_display_text',\n                        action='store_true')\n    parser.set_defaults(display=False)\n    parser.set_defaults(display_route=False)\n    parser.set_defaults(no_display_text=False)\n    parser.set_defaults(use_gt_masks=False)\n    return parser\n\n\nif __name__ ==\"__main__\":\n    parser = get_parser()\n    args_dict = parser.parse_args()\n"
  },
  {
    "path": "data/db_info.yaml",
    "content": "attributes: [AC, BC, CS, DB, DEF, EA, FM, HO, IO, LR, MB, OCC, OV, ROT, SC, SV]\nsets:  [train, val, val-dev]\nyears: [2016, 2017]\n\nsequences:\n\n- name: aerobatics\n  attributes: []\n  num_frames: 71\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: bear\n  attributes: [DEF]\n  num_frames: 82\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: bike-packing\n  attributes: []\n  num_frames: 69\n  set: val\n  eval_t: False\n  year: 2017\n\n- name: blackswan\n  attributes: []\n  num_frames: 50\n  set: val\n  eval_t: True\n  year: 2016\n\n- name: bmx-bumps\n  attributes: [LR, SV, SC, FM, CS, IO, MB, OCC, HO, EA, OV]\n  num_frames: 90\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: bmx-trees\n  attributes: [LR, SV, SC, FM, CS, IO, MB, DEF, OCC, HO, EA, BC]\n  num_frames: 80\n  set: val\n  eval_t: False\n  year: 2016\n\n- name: boat\n  attributes: [SC, DB, EA, BC]\n  num_frames: 75\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: boxing-fisheye\n  attributes: []\n  num_frames: 87\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: breakdance\n  attributes: [FM, DB, MB, DEF, HO, ROT, OV, AC]\n  num_frames: 84\n  set: val\n  eval_t: False\n  year: 2016\n\n- name: breakdance-flare\n  attributes: [FM, CS, MB, DEF, HO, ROT]\n  num_frames: 71\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: bus\n  attributes: [SC, OCC, HO, EA]\n  num_frames: 80\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: camel\n  attributes: [CS, IO, DEF, ROT]\n  num_frames: 90\n  set: val\n  eval_t: True\n  year: 2016\n\n- name: car-race\n  attributes: []\n  num_frames: 31\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: car-roundabout\n  attributes: [ROT, BC]\n  num_frames: 75\n  set: val\n  eval_t: True\n  year: 2016\n\n- name: car-shadow\n  attributes: [LR, EA, AC, BC]\n  num_frames: 40\n  set: val\n  eval_t: True\n  year: 2016\n\n- name: car-turn\n  attributes: [SV, ROT, BC]\n  num_frames: 80\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: carousel\n  attributes: []\n  num_frames: 69\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: cat-girl\n  attributes: []\n  num_frames: 89\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: cats-car\n  attributes: []\n  num_frames: 67\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: chamaleon\n  attributes: []\n  num_frames: 85\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: classic-car\n  attributes: []\n  num_frames: 63\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: color-run\n  attributes: []\n  num_frames: 84\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: cows\n  attributes: [CS, IO, DEF, OCC, HO]\n  num_frames: 104\n  set: val\n  eval_t: True\n  year: 2016\n\n- name: crossing\n  attributes: []\n  num_frames: 52\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: dance-jump\n  attributes: [SC, DB, MB, DEF, OCC, HO, ROT, EA]\n  num_frames: 60\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: dance-twirl\n  attributes: [SC, CS, IO, MB, DEF, HO, ROT, OV]\n  num_frames: 90\n  set: val\n  eval_t: False\n  year: 2016\n\n- name: dancing\n  attributes: []\n  num_frames: 62\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: deer\n  attributes: []\n  num_frames: 79\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: disc-jockey\n  attributes: []\n  num_frames: 76\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: dog\n  attributes: [FM, CS, MB, DEF, ROT, EA]\n  num_frames: 60\n  set: val\n  eval_t: False\n  year: 2016\n\n- name: dog-agility\n  attributes: [FM, MB, DEF, OCC, HO, EA, OV, AC]\n  num_frames: 25\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: dog-gooses\n  attributes: []\n  num_frames: 86\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: dogs-jump\n  attributes: []\n  num_frames: 66\n  set: val\n  eval_t: False\n  year: 2017\n\n- name: dogs-scale\n  attributes: []\n  num_frames: 83\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: drift-chicane\n  attributes: [LR, SV, FM, DB, HO, ROT, EA, AC]\n  num_frames: 52\n  set: val\n  eval_t: False\n  year: 2016\n\n- name: drift-straight\n  attributes: [LR, SV, FM, CS, MB, HO, ROT, EA, OV, AC]\n  num_frames: 50\n  set: val\n  eval_t: True\n  year: 2016\n\n- name: drift-turn\n  attributes: [SV, FM, IO, DB, HO, ROT, OV, AC]\n  num_frames: 64\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: drone\n  attributes: []\n  num_frames: 91\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: elephant\n  attributes: [CS, DB, DEF, EA]\n  num_frames: 80\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: flamingo\n  attributes: [SC, IO, DB, DEF, HO]\n  num_frames: 80\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: giant-slalom\n  attributes: []\n  num_frames: 127\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: girl-dog\n  attributes: []\n  num_frames: 86\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: goat\n  attributes: [CS, DEF, EA, BC]\n  num_frames: 90\n  set: val\n  eval_t: False\n  year: 2016\n\n- name: gold-fish\n  attributes: []\n  num_frames: 78\n  set: val\n  eval_t: False\n  year: 2017\n\n- name: golf\n  attributes: []\n  num_frames: 79\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: guitar-violin\n  attributes: []\n  num_frames: 55\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: gym\n  attributes: []\n  num_frames: 60\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: helicopter\n  attributes: []\n  num_frames: 49\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: hike\n  attributes: [LR, DEF, HO]\n  num_frames: 80\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: hockey\n  attributes: [SC, IO, DEF, HO, ROT]\n  num_frames: 75\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: horsejump-high\n  attributes: [SC, IO, DEF, OCC, HO]\n  num_frames: 50\n  set: val\n  eval_t: False\n  year: 2016\n\n- name: horsejump-low\n  attributes: [SC, IO, DEF, OCC, HO, EA]\n  num_frames: 60\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: horsejump-stick\n  attributes: []\n  num_frames: 58\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: hoverboard\n  attributes: []\n  num_frames: 81\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: india\n  attributes: []\n  num_frames: 81\n  set: val\n  eval_t: False\n  year: 2017\n\n- name: judo\n  attributes: []\n  num_frames: 34\n  set: val\n  eval_t: False\n  year: 2017\n\n- name: kid-football\n  attributes: []\n  num_frames: 68\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: kite-surf\n  attributes: [SV, SC, IO, DB, MB, OCC, HO, EA]\n  num_frames: 50\n  set: val\n  eval_t: True\n  year: 2016\n\n- name: kite-walk\n  attributes: [SC, IO, DB, DEF, OCC, HO]\n  num_frames: 80\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: koala\n  attributes: []\n  num_frames: 100\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: lab-coat\n  attributes: []\n  num_frames: 47\n  set: val\n  eval_t: False\n  year: 2017\n\n- name: lady-running\n  attributes: []\n  num_frames: 65\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: libby\n  attributes: [SC, MB, DEF, OCC, HO, EA]\n  num_frames: 49\n  set: val\n  eval_t: False\n  year: 2016\n\n- name: lindy-hop\n  attributes: []\n  num_frames: 73\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: loading\n  attributes: []\n  num_frames: 50\n  set: val\n  eval_t: False\n  year: 2017\n\n- name: lock\n  attributes: []\n  num_frames: 43\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: longboard\n  attributes: []\n  num_frames: 52\n  set: train\n  eval_t: False\n  year: 2017\n\n\n- name: lucia\n  attributes: [DEF, OCC, HO]\n  num_frames: 70\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: mallard-fly\n  attributes: [LR, SV, FM, DB, MB, DEF, ROT, EA, OV, AC]\n  num_frames: 70\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: mallard-water\n  attributes: [LR, IO, DB, EA]\n  num_frames: 80\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: man-bike\n  attributes: []\n  num_frames: 75\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n\n- name: mbike-trick\n  attributes: []\n  num_frames: 79\n  set: val\n  eval_t: False\n  year: 2017\n\n- name: miami-surf\n  attributes: []\n  num_frames: 70\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: monkeys-trees\n  attributes: []\n  num_frames: 83\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n\n- name: motocross-bumps\n  attributes: [SV, FM, IO, HO, ROT, OV, AC, BC]\n  num_frames: 60\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: motocross-jump\n  attributes: [SV, SC, FM, IO, MB, DEF, HO, ROT, EA, OV, AC]\n  num_frames: 40\n  set: val\n  eval_t: False\n  year: 2016\n\n- name: motorbike\n  attributes: [LR, SV, SC, FM, IO, OCC, HO, ROT, EA]\n  num_frames: 43\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: mtb-race\n  attributes: []\n  num_frames: 69\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: night-race\n  attributes: []\n  num_frames: 46\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: orchid\n  attributes: []\n  num_frames: 57\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: paragliding\n  attributes: [LR, SC, IO, HO]\n  num_frames: 70\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: paragliding-launch\n  attributes: [SC, IO, DEF, HO, EA]\n  num_frames: 80\n  set: val\n  eval_t: True\n  year: 2016\n\n- name: parkour\n  attributes: [LR, SV, FM, DEF, OCC, HO, ROT, AC]\n  num_frames: 100\n  set: val\n  eval_t: False\n  year: 2016\n\n- name: people-sunset\n  attributes: []\n  num_frames: 67\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: pigs\n  attributes: []\n  num_frames: 79\n  set: val\n  eval_t: False\n  year: 2017\n\n- name: planes-crossing\n  attributes: []\n  num_frames: 31\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: planes-water\n  attributes: []\n  num_frames: 38\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: rallye\n  attributes: []\n  num_frames: 50\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: rhino\n  attributes: [DEF, OCC, BC]\n  num_frames: 90\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: rollerblade\n  attributes: [LR, FM, CS, MB, DEF, HO]\n  num_frames: 35\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: rollercoaster\n  attributes: []\n  num_frames: 70\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: salsa\n  attributes: []\n  num_frames: 86\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: schoolgirls\n  attributes: []\n  num_frames: 80\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: scooter-black\n  attributes: [SV, IO, HO, EA]\n  num_frames: 43\n  set: val\n  eval_t: True\n  year: 2016\n\n- name: scooter-board\n  attributes: []\n  num_frames: 91\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: scooter-gray\n  attributes: [SC, FM, IO, OCC, HO, ROT, EA, BC]\n  num_frames: 75\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: seasnake\n  attributes: []\n  num_frames: 80\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: sheep\n  attributes: []\n  num_frames: 68\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: shooting\n  attributes: []\n  num_frames: 40\n  set: val\n  eval_t: False\n  year: 2017\n\n- name: skate-jump\n  attributes: []\n  num_frames: 68\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: skate-park\n  attributes: []\n  num_frames: 80\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: slackline\n  attributes: []\n  num_frames: 60\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: snowboard\n  attributes: []\n  num_frames: 66\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: soapbox\n  attributes: [SV, IO, MB, DEF, HO, ROT, AC]\n  num_frames: 99\n  set: val\n  eval_t: True\n  year: 2016\n\n- name: soccerball\n  attributes: [LR, FM, MB, OCC, HO]\n  num_frames: 48\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: stroller\n  attributes: [SC, FM, CS, IO, DEF, HO]\n  num_frames: 91\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: stunt\n  attributes: []\n  num_frames: 71\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: subway\n  attributes: []\n  num_frames: 88\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: surf\n  attributes: [SV, FM, CS, IO, DB, HO, OV]\n  num_frames: 55\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: swing\n  attributes: [SC, FM, IO, DEF, OCC, HO]\n  num_frames: 60\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: tandem\n  attributes: []\n  num_frames: 72\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n\n- name: tennis\n  attributes: [SV, FM, IO, MB, DEF, HO]\n  num_frames: 70\n  set: train\n  eval_t: False\n  year: 2016\n\n- name: tennis-vest\n  attributes: []\n  num_frames: 75\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: tractor\n  attributes: []\n  num_frames: 65\n  set: test-dev\n  eval_t: False\n  year: 2017\n\n- name: tractor-sand\n  attributes: []\n  num_frames: 76\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: train\n  attributes: [SC, HO, EA]\n  num_frames: 80\n  set: train\n  eval_t: True\n  year: 2016\n\n- name: tuk-tuk\n  attributes: []\n  num_frames: 59\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: upside-down\n  attributes: []\n  num_frames: 65\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: varanus-cage\n  attributes: []\n  num_frames: 67\n  set: train\n  eval_t: False\n  year: 2017\n\n- name: walking\n  attributes: []\n  num_frames: 72\n  set: train\n  eval_t: False\n  year: 2017"
  },
  {
    "path": "data/palette.txt",
    "content": "0 0 0\n128 0 0\n0 128 0\n128 128 0\n0 0 128\n128 0 128\n0 128 128\n128 128 128\n64 0 0\n191 0 0\n64 128 0\n191 128 0\n64 0 128\n191 0 128\n64 128 128\n191 128 128\n0 64 0\n128 64 0\n0 191 0\n128 191 0\n0 64 128\n128 64 128\n22 22 22\n23 23 23\n24 24 24\n25 25 25\n26 26 26\n27 27 27\n28 28 28\n29 29 29\n30 30 30\n31 31 31\n32 32 32\n33 33 33\n34 34 34\n35 35 35\n36 36 36\n37 37 37\n38 38 38\n39 39 39\n40 40 40\n41 41 41\n42 42 42\n43 43 43\n44 44 44\n45 45 45\n46 46 46\n47 47 47\n48 48 48\n49 49 49\n50 50 50\n51 51 51\n52 52 52\n53 53 53\n54 54 54\n55 55 55\n56 56 56\n57 57 57\n58 58 58\n59 59 59\n60 60 60\n61 61 61\n62 62 62\n63 63 63\n64 64 64\n65 65 65\n66 66 66\n67 67 67\n68 68 68\n69 69 69\n70 70 70\n71 71 71\n72 72 72\n73 73 73\n74 74 74\n75 75 75\n76 76 76\n77 77 77\n78 78 78\n79 79 79\n80 80 80\n81 81 81\n82 82 82\n83 83 83\n84 84 84\n85 85 85\n86 86 86\n87 87 87\n88 88 88\n89 89 89\n90 90 90\n91 91 91\n92 92 92\n93 93 93\n94 94 94\n95 95 95\n96 96 96\n97 97 97\n98 98 98\n99 99 99\n100 100 100\n101 101 101\n102 102 102\n103 103 103\n104 104 104\n105 105 105\n106 106 106\n107 107 107\n108 108 108\n109 109 109\n110 110 110\n111 111 111\n112 112 112\n113 113 113\n114 114 114\n115 115 115\n116 116 116\n117 117 117\n118 118 118\n119 119 119\n120 120 120\n121 121 121\n122 122 122\n123 123 123\n124 124 124\n125 125 125\n126 126 126\n127 127 127\n128 128 128\n129 129 129\n130 130 130\n131 131 131\n132 132 132\n133 133 133\n134 134 134\n135 135 135\n136 136 136\n137 137 137\n138 138 138\n139 139 139\n140 140 140\n141 141 141\n142 142 142\n143 143 143\n144 144 144\n145 145 145\n146 146 146\n147 147 147\n148 148 148\n149 149 149\n150 150 150\n151 151 151\n152 152 152\n153 153 153\n154 154 154\n155 155 155\n156 156 156\n157 157 157\n158 158 158\n159 159 159\n160 160 160\n161 161 161\n162 162 162\n163 163 163\n164 164 164\n165 165 165\n166 166 166\n167 167 167\n168 168 168\n169 169 169\n170 170 170\n171 171 171\n172 172 172\n173 173 173\n174 174 174\n175 175 175\n176 176 176\n177 177 177\n178 178 178\n179 179 179\n180 180 180\n181 181 181\n182 182 182\n183 183 183\n184 184 184\n185 185 185\n186 186 186\n187 187 187\n188 188 188\n189 189 189\n190 190 190\n191 191 191\n192 192 192\n193 193 193\n194 194 194\n195 195 195\n196 196 196\n197 197 197\n198 198 198\n199 199 199\n200 200 200\n201 201 201\n202 202 202\n203 203 203\n204 204 204\n205 205 205\n206 206 206\n207 207 207\n208 208 208\n209 209 209\n210 210 210\n211 211 211\n212 212 212\n213 213 213\n214 214 214\n215 215 215\n216 216 216\n217 217 217\n218 218 218\n219 219 219\n220 220 220\n221 221 221\n222 222 222\n223 223 223\n224 224 224\n225 225 225\n226 226 226\n227 227 227\n228 228 228\n229 229 229\n230 230 230\n231 231 231\n232 232 232\n233 233 233\n234 234 234\n235 235 235\n236 236 236\n237 237 237\n238 238 238\n239 239 239\n240 240 240\n241 241 241\n242 242 242\n243 243 243\n244 244 244\n245 245 245\n246 246 246\n247 247 247\n248 248 248\n249 249 249\n250 250 250\n251 251 251\n252 252 252\n253 253 253\n254 254 254\n255 255 255"
  },
  {
    "path": "data/run_davis2017.m",
    "content": "clc\n\nsrc_dir = 'data/DAVIS2017/Annotations/480p';\nsave_dir = 'data/DAVIS2017/Annotations_edge/480p';\n\nseqs = dir(src_dir);\nseqs = {seqs.name};\nseqs = seqs(3:end);\n\nfor i = 1 : length(seqs)\n    seq_path = [src_dir, '/', seqs{i}];\n    images = dir([seq_path, '/*.png']);\n    images = {images.name};\n    \n    for j = 1 : length(images)\n        imagefile = fullfile(seq_path, images{j});\n        im = imread(imagefile);\n        \n        res = seg2edge(im, 2, [], 'regular');\n        \n        save_path = fullfile(save_dir, seqs{i});\n        if ~exist(save_path, 'dir')\n            mkdir(save_path)\n        end\n        imwrite(res, fullfile(save_path, images{j}))\n    end\nend\n"
  },
  {
    "path": "data/run_youtube.m",
    "content": "clc\n\nsrc_dir = 'data/YouTubeVOS_2018/train/Annotations';\nsave_dir = 'data/YouTubeVOS_2018/train/Annotations_edge';\n\nseqs = dir(src_dir);\nseqs = {seqs.name};\nseqs = seqs(3:end);\n\nfor i = 1 : length(seqs)\n    disp(seqs{i})\n    seq_path = [src_dir, '/', seqs{i}];\n    images = dir([seq_path, '/*.png']);\n    images = {images.name};\n    \n    for j = 1 : length(images)\n        imagefile = fullfile(seq_path, images{j});\n        im = imread(imagefile);\n        \n        res = seg2edge(im, 2, [], 'regular');\n        \n        save_path = fullfile(save_dir, seqs{i});\n        if ~exist(save_path, 'dir')\n            mkdir(save_path)\n        end\n        imwrite(res, fullfile(save_path, images{j}))\n    end\nend\n"
  },
  {
    "path": "data/seg2edge.m",
    "content": "% This function takes an input segment and produces binary boundaries.\n% Multi-channel input segments are supported by the function.\nfunction [idxEdge] = seg2edge(seg, radius, labelIgnore, edge_type)\n% Get dimensions\n[height, width, chn] = size(seg);\nif(~isempty(labelIgnore))\n    if(chn~=size(labelIgnore, 2))\n        error('Channel dimension not matching ignored label dimension!')\n    end\nend\n\n% Set the considered neighborhood.\nradius_search = max(ceil(radius), 1);\n[X, Y] = meshgrid(1:width, 1:height);\n[x, y] = meshgrid(-radius_search:radius_search, -radius_search:radius_search);\n\n% Columnize everything\nX = X(:); Y = Y(:);\nx = x(:); y = y(:);\nif(chn == 1)\n    seg = seg(:);\nelse\n    seg = reshape(seg, [height*width chn]);\nend\n\n% Build circular neighborhood\nidxNeigh = sqrt(x.^2 + y.^2) <= radius;\nx = x(idxNeigh); y = y(idxNeigh);\nnumPxlImg = length(X);\nnumPxlNeigh = length(x);\n\n% Compute Gaussian weight\nidxEdge = false(numPxlImg, 1);\nfor i = 1:numPxlNeigh\n    XNeigh = X+x(i);\n    YNeigh = Y+y(i);\n    idxValid = find( XNeigh >= 1 & XNeigh <= width & YNeigh >=1 & YNeigh <= height );\n    \n    XCenter = X(idxValid);\n    YCenter = Y(idxValid);\n    XNeigh = XNeigh(idxValid);\n    YNeigh = YNeigh(idxValid);\n    LCenter = seg(sub2ind([height width], YCenter, XCenter), :);\n    LNeigh = seg(sub2ind([height width], YNeigh, XNeigh), :);\n    \n    if(strcmp(edge_type, 'regular'))\n        idxDiff = find(any(LCenter~=LNeigh, 2));\n    elseif(strcmp(edge_type, 'inner'))\n        idxDiff = find(any(LCenter~=LNeigh, 2) & any(LCenter~=0, 2) & all(LNeigh==0, 2) );\n    elseif(strcmp(edge_type, 'outer'))\n        idxDiff = find(any(LCenter~=LNeigh, 2) & all(LCenter==0, 2) & any(LNeigh~=0, 2) );\n    else\n        error('Wrong edge type input!');\n    end    \n    \n    LCenterEdge = LCenter(idxDiff, :);\n    LNeighEdge = LNeigh(idxDiff, :);\n    idxIgnore2 = false(length(idxDiff), 1);\n    for j = 1:size(labelIgnore, 1)\n        idxIgnore2 = idxIgnore2 | ( all(bsxfun(@eq, LCenterEdge, labelIgnore(j, :)), 2) | all(bsxfun(@eq, LNeighEdge, labelIgnore(j, :)), 2) );\n    end\n    \n    idxDiffGT = idxDiff(~idxIgnore2);\n    idxEdge(idxValid(idxDiffGT)) = true;\nend\nidxEdge = reshape(idxEdge, [height, width]);\n"
  },
  {
    "path": "dataloader/base.py",
    "content": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Object Segmentation\n#-----------------------------------------------------------------------------\n# Copyright (c) 2017 Federico Perazzi\n# Licensed under the BSD License [see LICENSE for details]\n# Written by Federico Perazzi (federico@disneyresearch.com)\n# ----------------------------------------------------------------------------\n\nimport functools\nimport os.path as osp\n\nimport numpy as np\n\nfrom skimage.io import ImageCollection\n\nfrom misc.config import cfg\nfrom misc.io_aux import imread_indexed\n\n#################################\n# HELPER FUNCTIONS\n#################################\n\n\ndef _load_annotation(filename, single_object):\n    \"\"\" Load image given filename.\"\"\"\n\n    annotation,_ = imread_indexed(filename)\n\n    if single_object:\n        annotation = (annotation != 0).astype(np.uint8)\n\n    return annotation\n\n\ndef _get_num_objects(annotation):\n    \"\"\" Count number of objects from segmentation mask\"\"\"\n\n    ids = sorted(np.unique(annotation))\n\n    # Remove unknown-label\n    ids = ids[:-1] if ids[-1] == 255 else ids\n\n    # Handle no-background case\n    ids = ids if ids[0] else ids[1:]\n\n    return len(ids)\n\n#################################\n# LOADER CLASSES\n#################################\n\n\nclass BaseLoader(ImageCollection):\n    \"\"\"\n    Base class to load image sets (inherit from skimage.ImageCollection).\n\n    Arguments:\n      path      (string): path to sequence folder.\n      regex     (string): regular expression to define image search pattern.\n      load_func (func)  : function to load image from disk (see skimage.ImageCollection).\n\n    \"\"\"\n\n    def __init__(self, split, path, regex, load_func=None, lmdb_env=None):\n\n        if not lmdb_env == None:\n            key_db = osp.basename(path)\n            with lmdb_env.begin() as txn:\n                _files_vec = txn.get(key_db.encode()).decode().split('|')\n                _files = [bytes(osp.join(path, f).encode())\n                          for f in _files_vec]\n            super(BaseLoader, self).__init__(_files, load_func=load_func)\n        else:\n            super(BaseLoader, self).__init__(\n                osp.join(path + '/' + regex),load_func=load_func)\n\n        # Sequence name\n        self.name = osp.basename(path)\n        self.split = split\n\n    def __str__(self):\n        return \"< class: '{}' name: '{}', frames: {} >\".format(\n            type(self).__name__, self.name, len(self))\n\n\nclass Sequence(BaseLoader):\n    \"\"\"\n    Load image sequences.\n\n    Arguments:\n      name  (string): sequence name.\n      regex (string): regular expression to define image search pattern.\n\n    \"\"\"\n\n    def __init__(self, split, name, regex=\"*.jpg\", lmdb_env=None):\n\n        super(Sequence, self).__init__(\n            split, osp.join(cfg.PATH.SEQUENCES, name), regex,\n            lmdb_env=lmdb_env)\n\n\nclass SequenceClip_simple:\n    \"\"\"\n    Load image sequences.\n\n    Arguments:\n      name  (string): sequence name.\n      regex (string): regular expression to define image search pattern.\n\n    \"\"\"\n\n    def __init__(self, seq, starting_frame):\n\n        self.__dict__.update(seq.__dict__)\n        self.starting_frame = starting_frame\n\n    def __str__(self):\n        return \"< class: '{}' name: '{}',\" \\\n               \" startingframe: {}, frames: {} >\".format(type(self).__name__,\n                                                         self.name,\n                                                         self.starting_frame,\n                                                         len(self))\n\nclass SequenceClip(BaseLoader):\n    \"\"\"\n    Load image sequences.\n\n    Arguments:\n      name  (string): sequence name.\n      regex (string): regular expression to define image search pattern.\n\n    \"\"\"\n\n    def __init__(self, split, name, starting_frame, regex=\"*.jpg\",\n                 lmdb_env=None):\n        super(SequenceClip, self).__init__(\n            split, osp.join(cfg.PATH.SEQUENCES, name), regex, lmdb_env=lmdb_env)\n\n        self.starting_frame = starting_frame\n\n    def __str__(self):\n        return \"< class: '{}' name: '{}',\" \\\n               \" startingframe: {}, frames: {} >\".format(type(self).__name__,\n                                                         self.name,\n                                                         self.starting_frame,\n                                                         len(self))\n\n\nclass Segmentation(BaseLoader):\n\n  \"\"\"\n  Load image sequences.\n\n  Arguments:\n    path          (string): path to sequence folder.\n    single_object (bool):   assign same id=1 to each object.\n    regex         (string): regular expression to define image search pattern.\n\n  \"\"\"\n\n  def __init__(self,split,path,single_object,regex=\"*.png\", lmdb_env=None):\n    super(Segmentation, self).__init__(split,path,regex,\n       functools.partial(_load_annotation,single_object=single_object), lmdb_env=lmdb_env)\n\n    self.n_objects = _get_num_objects(self[0])\n\n  def iter_objects_id(self):\n    \"\"\"\n    Iterate over objects providing object id for each of them.\n    \"\"\"\n    for obj_id in range(1,self.n_objects+1):\n      yield obj_id\n\n  def iter_objects(self):\n    \"\"\"\n    Iterate over objects providing binary masks for each of them.\n    \"\"\"\n\n    for obj_id in self.iter_objects_id():\n      bn_segmentation = [(s==obj_id).astype(np.uint8) for s in self]\n      yield bn_segmentation\n\nclass Annotation(Segmentation):\n\n  \"\"\"\n  Load ground-truth annotations.\n\n  Arguments:\n    name          (string): sequence name.\n    single_object (bool):   assign same id=1 to each object.\n    regex         (string): regular expression to define image search pattern.\n\n  \"\"\"\n\n  def __init__(self,split,name,single_object,regex=\"*.png\", lmdb_env=None):\n    \n    super(Annotation, self).__init__(\n        split,osp.join(cfg.PATH.ANNOTATIONS,name),single_object,regex, lmdb_env=lmdb_env)\n\n\nclass AnnotationClip_simple:\n    \"\"\"\n    Load ground-truth annotations.\n\n    Arguments:\n      name          (string): sequence name.\n      single_object (bool):   assign same id=1 to each object.\n      regex         (string): regular expression to define image search pattern.\n\n    \"\"\"\n\n    def __init__(self, annot, starting_frame):\n\n        self.__dict__.update(annot.__dict__)\n        self.starting_frame = starting_frame\n\nclass AnnotationClip(Segmentation):\n\n  \"\"\"\n  Load ground-truth annotations.\n\n  Arguments:\n    name          (string): sequence name.\n    single_object (bool):   assign same id=1 to each object.\n    regex         (string): regular expression to define image search pattern.\n\n  \"\"\"\n\n  def __init__(self,split,name,starting_frame,single_object,regex=\"*.png\", lmdb_env=None):\n\n    super(AnnotationClip, self).__init__(\n        split,osp.join(cfg.PATH.ANNOTATIONS,name),single_object,regex, lmdb_env=lmdb_env)\n    self.starting_frame = starting_frame\n    "
  },
  {
    "path": "dataloader/base_youtube.py",
    "content": "import functools\nimport os.path as osp\n\nimport numpy as np\n\nfrom PIL import Image\nfrom skimage.io import ImageCollection\n\nfrom misc.config_youtubeVOS import cfg, phase\nfrom misc.io_aux import imread_indexed,imwrite_indexed\n\n#################################\n# HELPER FUNCTIONS\n#################################\n\ndef _load_annotation(filename,single_object):\n  \"\"\" Load image given filename.\"\"\"\n\n  annotation,_ = imread_indexed(filename)\n\n  if single_object:\n    annotation = (annotation != 0).astype(np.uint8)\n\n  return annotation\n\ndef _get_num_objects(annotation):\n  \"\"\" Count number of objects from segmentation mask\"\"\"\n\n  ids = sorted(np.unique(annotation))\n\n  # Remove unknown-label\n  ids = ids[:-1] if ids[-1] == 255 else ids\n\n  # Handle no-background case\n  ids = ids if ids[0] else ids[1:]\n\n  return len(ids)\n\n#################################\n# LOADER CLASSES\n#################################\n\nclass BaseLoader(ImageCollection):\n\n  \"\"\"\n  Base class to load image sets (inherit from skimage.ImageCollection).\n\n  Arguments:\n    path      (string): path to sequence folder.\n    regex     (string): regular expression to define image search pattern.\n    load_func (func)  : function to load image from disk (see skimage.ImageCollection).\n\n  \"\"\"\n\n  def __init__(self,split,path,regex,load_func=None, lmdb_env=None):\n    \n    if not lmdb_env == None:\n        key_db = osp.basename(path)\n        with lmdb_env.begin() as txn:\n            _files_vec = txn.get(key_db.encode()).decode().split('|')\n            _files = [bytes(osp.join(path, f).encode()) for f in _files_vec]\n        super(BaseLoader, self).__init__(_files, load_func=load_func)\n    else:  \n        super(BaseLoader, self).__init__(\n            osp.join(path + '/' + regex),load_func=load_func)\n\n    # Sequence name\n    self.name = osp.basename(path)\n    self.split = split\n\n    # Check sequence name\n    if split == phase.TRAIN.value:\n        if not self.name in cfg.SEQUENCES_TRAIN:\n            raise Exception(\"Sequence name \\'{}\\' not found.\".format(self.name))\n    elif split == phase.VAL.value:\n        if not self.name in cfg.SEQUENCES_VAL:\n            raise Exception(\"Sequence name \\'{}\\' not found.\".format(self.name))\n    elif split == phase.TRAINVAL.value:\n        if not self.name in cfg.SEQUENCES_TRAINVAL:\n            raise Exception(\"Sequence name \\'{}\\' not found.\".format(self.name))\n    else:\n        if not self.name in cfg.SEQUENCES_TEST:\n            raise Exception(\"Sequence name \\'{}\\' not found.\".format(self.name))\n\n  def __str__(self):\n    return \"< class: '{}' name: '{}', frames: {} >\".format(\n        type(self).__name__,self.name,len(self))\n\nclass Sequence(BaseLoader):\n\n  \"\"\"\n  Load image sequences.\n\n  Arguments:\n    name  (string): sequence name.\n    regex (string): regular expression to define image search pattern.\n\n  \"\"\"\n\n  def __init__(self,split,name,regex=\"*.jpg\", lmdb_env=None):\n      \n    if split == phase.TRAIN.value:   \n        super(Sequence, self).__init__(\n            split,osp.join(cfg.PATH.SEQUENCES_TRAIN,name),regex, lmdb_env=lmdb_env)\n    elif split == phase.VAL.value:\n        super(Sequence, self).__init__(\n            split,osp.join(cfg.PATH.SEQUENCES_VAL,name),regex, lmdb_env=lmdb_env)\n    elif split == phase.TRAINVAL.value:\n        super(Sequence, self).__init__(\n            split,osp.join(cfg.PATH.SEQUENCES_TRAINVAL,name),regex, lmdb_env=lmdb_env)\n    else: #split == 'test':\n        super(Sequence, self).__init__(\n            split,osp.join(cfg.PATH.SEQUENCES_TEST,name),regex, lmdb_env=lmdb_env)\n\nclass SequenceClip_simple:\n    \"\"\"\n    Load image sequences.\n\n    Arguments:\n      name  (string): sequence name.\n      regex (string): regular expression to define image search pattern.\n\n    \"\"\"\n\n    def __init__(self, seq, starting_frame):\n        if seq.split == phase.TRAIN.value:\n            self.__dict__.update(seq.__dict__)\n        else:\n            self.__dict__.update(seq.__dict__)\n        self.starting_frame = starting_frame\n\n    def __str__(self):\n        return \"< class: '{}' name: '{}', startingframe: {}, frames: {} >\".format(\n            type(self).__name__, self.name, self.starting_frame, len(self))\n\nclass SequenceClip(BaseLoader):\n\n  \"\"\"\n  Load image sequences.\n\n  Arguments:\n    name  (string): sequence name.\n    regex (string): regular expression to define image search pattern.\n\n  \"\"\"\n\n  def __init__(self,split,name,starting_frame,regex=\"*.jpg\", lmdb_env=None):\n    if split == phase.TRAIN.value:   \n        super(SequenceClip, self).__init__(\n            split,osp.join(cfg.PATH.SEQUENCES_TRAIN,name),regex, lmdb_env=lmdb_env)\n    elif split == phase.VAL.value:\n        super(SequenceClip, self).__init__(\n            split,osp.join(cfg.PATH.SEQUENCES_VAL,name),regex, lmdb_env=lmdb_env)\n    elif split == phase.TRAINVAL.value:\n        super(SequenceClip, self).__init__(\n            split,osp.join(cfg.PATH.SEQUENCES_TRAINVAL,name),regex, lmdb_env=lmdb_env)\n    else: #split == 'test':\n        super(SequenceClip, self).__init__(\n            split,osp.join(cfg.PATH.SEQUENCES_TEST,name),regex, lmdb_env=lmdb_env)\n    self.starting_frame = starting_frame\n    \n  def __str__(self):\n    return \"< class: '{}' name: '{}', startingframe: {}, frames: {} >\".format(\n        type(self).__name__,self.name,self.starting_frame,len(self))\n\nclass Segmentation(BaseLoader):\n\n  \"\"\"\n  Load image sequences.\n\n  Arguments:\n    path          (string): path to sequence folder.\n    single_object (bool):   assign same id=1 to each object.\n    regex         (string): regular expression to define image search pattern.\n\n  \"\"\"\n\n  def __init__(self,split,path,single_object,regex=\"*.png\", lmdb_env=None):\n    super(Segmentation, self).__init__(split,path,regex,\n       functools.partial(_load_annotation,single_object=single_object), lmdb_env=lmdb_env)\n\n    self.n_objects = _get_num_objects(self[0])\n\n  def iter_objects_id(self):\n    \"\"\"\n    Iterate over objects providing object id for each of them.\n    \"\"\"\n    for obj_id in range(1,self.n_objects+1):\n      yield obj_id\n\n  def iter_objects(self):\n    \"\"\"\n    Iterate over objects providing binary masks for each of them.\n    \"\"\"\n\n    for obj_id in self.iter_objects_id():\n      bn_segmentation = [(s==obj_id).astype(np.uint8) for s in self]\n      yield bn_segmentation\n\nclass Annotation(Segmentation):\n\n  \"\"\"\n  Load ground-truth annotations.\n\n  Arguments:\n    name          (string): sequence name.\n    single_object (bool):   assign same id=1 to each object.\n    regex         (string): regular expression to define image search pattern.\n\n  \"\"\"\n\n  def __init__(self,split,name,single_object,regex=\"*.png\", lmdb_env=None):\n    \n    if split == phase.TRAIN.value:   \n        super(Annotation, self).__init__(\n            split,osp.join(cfg.PATH.ANNOTATIONS_TRAIN,name),single_object,regex, lmdb_env=lmdb_env)\n    elif split == phase.VAL.value:\n        super(Annotation, self).__init__(\n            split,osp.join(cfg.PATH.ANNOTATIONS_VAL,name),single_object,regex, lmdb_env=lmdb_env)\n    elif split == phase.TRAINVAL.value:\n        super(Annotation, self).__init__(\n            split,osp.join(cfg.PATH.ANNOTATIONS_TRAINVAL,name),single_object,regex, lmdb_env=lmdb_env)\n    else: #split == 'test':\n        super(Annotation, self).__init__(\n            split,osp.join(cfg.PATH.ANNOTATIONS_TEST,name),single_object,regex, lmdb_env=lmdb_env)\n\nclass AnnotationClip_simple:\n    \"\"\"\n    Load ground-truth annotations.\n\n    Arguments:\n      name          (string): sequence name.\n      single_object (bool):   assign same id=1 to each object.\n      regex         (string): regular expression to define image search pattern.\n\n    \"\"\"\n    \n    def __init__(self, annot, starting_frame):\n        if annot.split == phase.TRAIN.value:\n            self.__dict__.update(annot.__dict__)\n        else:\n            self.__dict__.update(annot.__dict__)\n        self.starting_frame = starting_frame\n\nclass AnnotationClip(Segmentation):\n\n  \"\"\"\n  Load ground-truth annotations.\n\n  Arguments:\n    name          (string): sequence name.\n    single_object (bool):   assign same id=1 to each object.\n    regex         (string): regular expression to define image search pattern.\n\n  \"\"\"\n\n  def __init__(self,split,name,starting_frame,single_object,regex=\"*.png\", lmdb_env=None):\n    if split == phase.TRAIN.value:   \n        super(AnnotationClip, self).__init__(\n            split,osp.join(cfg.PATH.ANNOTATIONS_TRAIN,name),single_object,regex, lmdb_env=lmdb_env)\n    elif split == phase.VAL.value:\n        super(AnnotationClip, self).__init__(\n            split,osp.join(cfg.PATH.ANNOTATIONS_VAL,name),single_object,regex, lmdb_env=lmdb_env)\n    elif split == phase.TRAINVAL.value:\n        super(AnnotationClip, self).__init__(\n            split,osp.join(cfg.PATH.ANNOTATIONS_TRAINVAL,name),single_object,regex, lmdb_env=lmdb_env)\n    else: #split == 'test':\n        super(AnnotationClip, self).__init__(\n            split,osp.join(cfg.PATH.ANNOTATIONS_TEST,name),single_object,regex, lmdb_env=lmdb_env)\n\n    self.starting_frame = starting_frame\n    \n"
  },
  {
    "path": "dataloader/custom_transforms.py",
    "content": "import random\nimport cv2\nimport numpy as np\nimport torch\nimport math\n\n\nclass ScaleNRotate(object):\n    \"\"\"Scale (zoom-in, zoom-out) and Rotate the image and the ground truth.\n    Args:\n        two possibilities:\n        1.  rots (tuple): (minimum, maximum) rotation angle\n            scales (tuple): (minimum, maximum) scale\n        2.  rots [list]: list of fixed possible rotation angles\n            scales [list]: list of fixed possible scales\n    \"\"\"\n    def __init__(self, rots=(-30, 30), scales=(.75, 1.25)):\n        assert (isinstance(rots, type(scales)))\n        self.rots = rots\n        self.scales = scales\n\n    def __call__(self, sample):\n\n        if type(self.rots) == tuple:\n            # Continuous range of scales and rotations\n            rot = (self.rots[1] - self.rots[0]) * random.random() - \\\n                  (self.rots[1] - self.rots[0])/2\n\n            sc = (self.scales[1] - self.scales[0]) * random.random() - \\\n                 (self.scales[1] - self.scales[0]) / 2 + 1\n        elif type(self.rots) == list:\n            # Fixed range of scales and rotations\n            rot = self.rots[random.randint(0, len(self.rots))]\n            sc = self.scales[random.randint(0, len(self.scales))]\n\n        for elem in sample.keys():\n            if 'fname' in elem:\n                continue\n\n            tmp = sample[elem]\n\n            h, w = tmp.shape[:2]\n            center = (w / 2, h / 2)\n            assert(center != 0)  # Strange behaviour warpAffine\n            M = cv2.getRotationMatrix2D(center, rot, sc)\n\n            if ((tmp == 0) | (tmp == 1)).all():\n                flagval = cv2.INTER_NEAREST\n            else:\n                flagval = cv2.INTER_CUBIC\n            tmp = cv2.warpAffine(tmp, M, (w, h), flags=flagval)\n\n            sample[elem] = tmp\n\n        return sample\n\n\nclass Resize(object):\n    \"\"\"Randomly resize the image and the ground truth to specified scales.\n    Args:\n        scales (list): the list of scales\n    \"\"\"\n    def __init__(self, scales=[0.5, 0.8, 1]):\n        self.scales = scales\n\n    def __call__(self, sample):\n\n        # Fixed range of scales\n        sc = self.scales[random.randint(0, len(self.scales) - 1)]\n\n        for elem in sample.keys():\n            if 'fname' in elem:\n                continue\n            tmp = sample[elem]\n\n            if tmp.ndim == 2:\n                flagval = cv2.INTER_NEAREST\n            else:\n                flagval = cv2.INTER_CUBIC\n\n            tmp = cv2.resize(tmp, None, fx=sc, fy=sc, interpolation=flagval)\n\n            sample[elem] = tmp\n\n        return sample\n\n\nclass RandomHorizontalFlip(object):\n    \"\"\"Horizontally flip the given image and ground truth randomly with a probability of 0.5.\"\"\"\n\n    def __call__(self, sample):\n\n        if random.random() < 0.5:\n            for elem in sample.keys():\n                if 'fname' in elem:\n                    continue\n                tmp = sample[elem]\n                tmp = cv2.flip(tmp, flipCode=1)\n                sample[elem] = tmp\n\n        return sample\n\nclass ToTensor(object):\n    \"\"\"Convert ndarrays in sample to Tensors.\"\"\"\n\n    def __call__(self, sample):\n\n        for elem in sample.keys():\n            if 'fname' in elem:\n                continue\n            tmp = sample[elem]\n\n            if tmp.ndim == 2:\n                tmp = tmp[:, :, np.newaxis]\n            print('111111111111:', tmp.shape)\n\n            # swap color axis because\n            # numpy image: H x W x C\n            # torch image: C X H X W\n\n            tmp = tmp.transpose((2, 0, 1))\n            sample[elem] = torch.from_numpy(tmp)\n\n        return sample\n"
  },
  {
    "path": "dataloader/dataset_utils.py",
    "content": "def get_dataset_davis_youtube_ehem(args, split, image_transforms=None,\n                                   target_transforms=None, augment=False,\n                                   inputRes=None):\n\n    from .davis2017_youtubevos_ehem import DAVISLoader as MyChosenDataset\n\n    dataset = MyChosenDataset(args, split=split, transform=image_transforms,\n                              target_transform=target_transforms,\n                              augment=augment, inputRes=inputRes)\n    return dataset\n"
  },
  {
    "path": "dataloader/davis2017.py",
    "content": "from __future__ import division\n\nimport torch\nfrom torch.utils import data\n\nimport os\nimport cv2\nimport glob\nimport lmdb\nimport numpy as np\nfrom PIL import Image\nimport os.path as osp\nfrom scipy.misc import imresize\nfrom matplotlib import pyplot as plt\n\nfrom torchvision import transforms\nfrom dataloader import custom_transforms as tr\nfrom .base import Sequence, Annotation\nfrom misc.config import cfg, phase, db_read_sequences\n\nclass DAVISLoader(data.Dataset):\n    '''\n    Dataset for DAVIS\n    '''\n\n    def __init__(self, args, split, inputRes, augment=False,\n                 transform=None, target_transform=None):\n        self._year = args.year\n        self._phase = split\n        self.transform = transform\n        self.target_transform = target_transform\n        self.inputRes = inputRes\n        self.augment = augment\n        self.augment_transform = None\n        self._single_object = False\n\n        assert args.year == \"2017\" or args.year == \"2016\"\n\n        if augment:\n            self.augment_transform = transforms.Compose([\n                tr.RandomHorizontalFlip(),\n                tr.ScaleNRotate(rots=(-args.rotation, args.rotation),\n                                scales=(.75, 1.25))])\n\n        self._db_sequences = db_read_sequences(args.year, self._phase)\n\n        # Check lmdb existance. If not proceed with standard dataloader.\n        lmdb_env_seq_dir = osp.join(cfg.PATH.DATA, 'lmdb_seq')\n        lmdb_env_annot_dir = osp.join(cfg.PATH.DATA, 'lmdb_annot')\n\n        if osp.isdir(lmdb_env_seq_dir) and osp.isdir(lmdb_env_annot_dir):\n            lmdb_env_seq = lmdb.open(lmdb_env_seq_dir)\n            lmdb_env_annot = lmdb.open(lmdb_env_annot_dir)\n        else:\n            lmdb_env_seq = None\n            lmdb_env_annot = None\n            print('LMDB not found. This could affect the data loading time. It is recommended to use LMDB.')\n\n        self.sequences = [Sequence(self._phase, s.name, lmdb_env=lmdb_env_seq) for s in self._db_sequences]\n        self._db_sequences = db_read_sequences(args.year, self._phase)\n\n        # Load annotations\n        self.annotations = [Annotation(self._phase,s.name, self._single_object, lmdb_env=lmdb_env_annot) for s in self._db_sequences]\n        self._db_sequences = db_read_sequences(args.year, self._phase)\n\n        # Load Videos\n        self.videos = []\n        for seq, s in zip(self.sequences, self._db_sequences):\n            if s['set'] == self._phase:\n                self.videos.append(s['name'])\n\n        self.imagefiles = []\n        self.maskfiles = []\n        self.flowfiles = []\n        self.edgefiles = []\n\n        for _video in self.videos:\n            imagefiles = sorted(glob.glob(os.path.join(cfg.PATH.SEQUENCES, _video, '*.jpg')))\n            maskfiles = sorted(glob.glob(os.path.join(cfg.PATH.ANNOTATIONS, _video, '*.png')))\n            flowfiles = sorted(glob.glob(os.path.join(cfg.PATH.FLOW, _video, '*.png')))\n            edgefiles = sorted(glob.glob(os.path.join(cfg.PATH.ANNOTATIONS_EDGE, _video, '*.png')))\n\n            self.imagefiles.extend(imagefiles[:-1])\n            self.maskfiles.extend(maskfiles[:-1])\n            self.flowfiles.extend(flowfiles)\n            self.edgefiles.extend(edgefiles[:-1])\n\n        print('images: ', len(self.imagefiles))\n        print('masks: ', len(self.maskfiles))\n\n        assert(len(self.imagefiles) == len(self.maskfiles) == len(self.flowfiles) == len(self.edgefiles))\n\n\n    def __len__(self):\n        return len(self.imagefiles)\n\n\n    def __getitem__(self, index):\n        imagefile = self.imagefiles[index]\n        maskfile = self.maskfiles[index]\n        flowfile = self.flowfiles[index]\n        edgefile = self.edgefiles[index]\n\n        image = Image.open(imagefile).convert('RGB')\n        flow = Image.open(flowfile).convert('RGB')\n\n        mask = cv2.imread(maskfile, 0)\n        mask[mask > 0] = 255\n\n        bdry = cv2.imread(edgefile, 0)\n\n        #plt.imshow(bdry)\n        #plt.show()\n\n        mask = Image.fromarray(mask)\n        bdry = Image.fromarray(bdry)\n\n        if self.inputRes is not None:\n            image = imresize(image, self.inputRes)\n            flow = imresize(flow, self.inputRes)\n            mask = imresize(mask, self.inputRes, interp='nearest')\n            bdry = imresize(bdry, self.inputRes, interp='nearest')\n\n        sample = {'image': image, 'flow': flow, 'mask': mask, 'bdry': bdry}\n\n        if self.augment_transform is not None:\n            sample = self.augment_transform(sample)\n\n        image, flow, mask, bdry = sample['image'], sample['flow'], sample['mask'], sample['bdry']\n\n        if self.transform is not None:\n            image = self.transform(image)\n            flow = self.transform(flow)\n\n        if self.target_transform is not None:\n            mask = self.target_transform(mask)\n            bdry = self.target_transform(bdry)\n\n        return image, flow, mask, bdry\n"
  },
  {
    "path": "dataloader/davis2017_youtubevos_ehem.py",
    "content": "from __future__ import division\n\nfrom torch.utils import data\n\nimport os\nimport cv2\nimport glob\nimport lmdb\nimport numpy as np\nfrom PIL import Image\nimport os.path as osp\nfrom scipy.misc import imresize\n\nfrom torchvision import transforms\nfrom dataloader import custom_transforms as tr\nfrom .base import Sequence, Annotation\n\nfrom misc.config import cfg as cfg_davis\nfrom misc.config_youtubeVOS import cfg as cfg_youtube\nfrom misc.config import db_read_sequences as db_read_sequences_davis\nfrom misc.config_youtubeVOS import db_read_sequences_train as db_read_sequences_train_youtube\n\n\nclass DAVISLoader(data.Dataset):\n    '''\n    Dataset for DAVIS\n    '''\n\n    def __init__(self, args, split, inputRes, augment=False,\n                 transform=None, target_transform=None):\n        self._year = args.year\n        self._phase = split\n        self.transform = transform\n        self.target_transform = target_transform\n        self.inputRes = inputRes\n        self.augment = augment\n        self.augment_transform = None\n        self._single_object = False\n\n        assert args.year == \"2017\" or args.year == \"2016\"\n\n        if augment:\n            self.augment_transform = transforms.Compose([\n                tr.RandomHorizontalFlip(),\n                tr.ScaleNRotate(rots=(-args.rotation, args.rotation),\n                                scales=(.75, 1.25))])\n\n        self.imagefiles = []\n        self.maskfiles = []\n        self.flowfiles = []\n        self.edgefiles = []\n        self.hedfiles = []\n\n        if split == 'train':\n            self.load_davis(args)\n            self.load_youtubevos(args)\n        else:\n            self.load_davis(args)\n\n    def __len__(self):\n        return len(self.imagefiles)\n\n    def __getitem__(self, index):\n        imagefile = self.imagefiles[index]\n        maskfile = self.maskfiles[index]\n        flowfile = self.flowfiles[index]\n        edgefile = self.edgefiles[index]\n        hedfile = self.hedfiles[index]\n\n        image = Image.open(imagefile).convert('RGB')\n        flow = Image.open(flowfile).convert('RGB')\n\n        mask = cv2.imread(maskfile, 0)\n        mask[mask > 0] = 255\n\n        bdry = cv2.imread(edgefile, 0)\n        hed = cv2.imread(hedfile, 0)\n\n        # enlarge the object mask\n        kernel = np.ones((11, 11), np.uint8)  # use a large kernel\n        dilated_mask = cv2.dilate(mask, kernel, iterations=1)\n        inverse_dilated_mask = (255.0 - dilated_mask) / 255.0\n        negative_pixels = hed * inverse_dilated_mask\n        kernel = np.ones((5, 5), np.uint8)  # use a small kernel\n        negative_pixels = cv2.dilate(negative_pixels, kernel, iterations=1)\n\n        mask = Image.fromarray(mask)\n        bdry = Image.fromarray(bdry)\n        negative_pixels = Image.fromarray(negative_pixels)\n\n        if self.inputRes is not None:\n            image = imresize(image, self.inputRes)\n            flow = imresize(flow, self.inputRes)\n            mask = imresize(mask, self.inputRes, interp='nearest')\n            bdry = imresize(bdry, self.inputRes, interp='nearest')\n            negative_pixels = imresize(negative_pixels, self.inputRes,\n                                       interp='nearest')\n\n        sample = {'image': image, 'flow': flow, 'mask': mask, 'bdry': bdry,\n                  'negative_pixels': negative_pixels}\n\n        if self.augment_transform is not None:\n            sample = self.augment_transform(sample)\n\n        image, flow, mask, bdry, negative_pixels =\\\n            sample['image'], sample['flow'],\\\n            sample['mask'], sample['bdry'], sample['negative_pixels']\n\n        if self.transform is not None:\n            image = self.transform(image)\n            flow = self.transform(flow)\n\n        if self.target_transform is not None:\n            mask = mask[:, :, np.newaxis]\n            bdry = bdry[:, :, np.newaxis]\n            negative_pixels = negative_pixels[:, :, np.newaxis]\n            mask = self.target_transform(mask)\n            bdry = self.target_transform(bdry)\n            negative_pixels = self.target_transform(negative_pixels)\n\n        return image, flow, mask, bdry, negative_pixels\n\n    def load_youtubevos(self, args):\n        self._db_sequences = db_read_sequences_train_youtube()\n\n        # Check lmdb existance. If not proceed with standard dataloader.\n        lmdb_env_seq_dir = osp.join(cfg_youtube.PATH.DATA, 'lmdb_seq')\n        lmdb_env_annot_dir = osp.join(cfg_youtube.PATH.DATA, 'lmdb_annot')\n\n        if osp.isdir(lmdb_env_seq_dir) and osp.isdir(lmdb_env_annot_dir):\n            lmdb_env_seq = lmdb.open(lmdb_env_seq_dir)\n            lmdb_env_annot = lmdb.open(lmdb_env_annot_dir)\n        else:\n            lmdb_env_seq = None\n            lmdb_env_annot = None\n            print('LMDB not found. This could affect the data loading time.'\n                  ' It is recommended to use LMDB.')\n\n        # Load sequences\n        self.sequences = [Sequence(self._phase, s, lmdb_env=lmdb_env_seq)\n                          for s in self._db_sequences]\n\n        # Load sequences\n        videos = []\n        for seq, s in zip(self.sequences, self._db_sequences):\n            videos.append(s)\n\n        for _video in videos:\n            imagefile = sorted(glob.glob(os.path.join(\n                cfg_youtube.PATH.SEQUENCES_TRAIN, _video, '*.jpg')))\n            maskfile = sorted(glob.glob(os.path.join(\n                cfg_youtube.PATH.ANNOTATIONS_TRAIN, _video, '*.png')))\n            flowfile = sorted(glob.glob(os.path.join(\n                cfg_youtube.PATH.FLOW, _video, '*.png')))\n            edgefile = sorted(glob.glob(os.path.join(\n                cfg_youtube.PATH.ANNOTATIONS_TRAIN_EDGE, _video, '*.png')))\n            hedfile = sorted(glob.glob(os.path.join(\n                cfg_youtube.PATH.HED, _video, '*.jpg')))\n\n            self.imagefiles.extend(imagefile[:-1:10])\n            self.maskfiles.extend(maskfile[:-1:10])\n            self.flowfiles.extend(flowfile[::10])\n            self.edgefiles.extend(edgefile[:-1:10])\n            self.hedfiles.extend(hedfile[:-1:10])\n\n        print('images: ', len(self.imagefiles))\n        print('masks: ', len(self.maskfiles))\n        print('hed: ', len(self.hedfiles))\n        print('flow: ', len(self.flowfiles))\n        print('edge: ', len(self.edgefiles))\n\n        assert(len(self.imagefiles) == len(self.maskfiles) ==\n               len(self.flowfiles) == len(self.edgefiles) ==\n               len(self.hedfiles))\n\n    def load_davis(self, args):\n        self._db_sequences = db_read_sequences_davis(args.year, self._phase)\n\n        # Check lmdb existance. If not proceed with standard dataloader.\n        lmdb_env_seq_dir = osp.join(cfg_davis.PATH.DATA, 'lmdb_seq')\n        lmdb_env_annot_dir = osp.join(cfg_davis.PATH.DATA, 'lmdb_annot')\n\n        if osp.isdir(lmdb_env_seq_dir) and osp.isdir(lmdb_env_annot_dir):\n            lmdb_env_seq = lmdb.open(lmdb_env_seq_dir)\n            lmdb_env_annot = lmdb.open(lmdb_env_annot_dir)\n        else:\n            lmdb_env_seq = None\n            lmdb_env_annot = None\n            print('LMDB not found. This could affect the data loading time.'\n                  ' It is recommended to use LMDB.')\n\n        self.sequences = [Sequence(self._phase, s.name, lmdb_env=lmdb_env_seq)\n                          for s in self._db_sequences]\n        self._db_sequences = db_read_sequences_davis(args.year, self._phase)\n\n        # Load annotations\n        self.annotations = [Annotation(\n            self._phase, s.name, self._single_object, lmdb_env=lmdb_env_annot)\n            for s in self._db_sequences]\n        self._db_sequences = db_read_sequences_davis(args.year, self._phase)\n\n        # Load Videos\n        videos = []\n        for seq, s in zip(self.sequences, self._db_sequences):\n            if s['set'] == self._phase:\n                videos.append(s['name'])\n\n        for _video in videos:\n            imagefile = sorted(glob.glob(os.path.join(\n                cfg_davis.PATH.SEQUENCES, _video, '*.jpg')))\n            maskfile = sorted(glob.glob(os.path.join(\n                cfg_davis.PATH.ANNOTATIONS, _video, '*.png')))\n            flowfile = sorted(glob.glob(os.path.join(\n                cfg_davis.PATH.FLOW, _video, '*.png')))\n            edgefile = sorted(glob.glob(os.path.join(\n                cfg_davis.PATH.ANNOTATIONS_EDGE, _video, '*.png')))\n            hedfile = sorted(glob.glob(os.path.join(\n                cfg_davis.PATH.HED, _video, '*.jpg')))\n\n            self.imagefiles.extend(imagefile[:-1])\n            self.maskfiles.extend(maskfile[:-1])\n            self.flowfiles.extend(flowfile)\n            self.edgefiles.extend(edgefile[:-1])\n            self.hedfiles.extend(hedfile[:-1])\n\n        print('images: ', len(self.imagefiles))\n        print('masks: ', len(self.maskfiles))\n        print('hed: ', len(self.hedfiles))\n        print('flow: ', len(self.flowfiles))\n        print('edge: ', len(self.edgefiles))\n\n        assert(len(self.imagefiles) == len(self.maskfiles) ==\n               len(self.flowfiles) == len(self.edgefiles) ==\n               len(self.hedfiles))"
  },
  {
    "path": "dataloader/hed_bsds.py",
    "content": "import os\nfrom PIL import Image\n\nfrom torch.utils import data\n\nclass HEDBSDSTrain(data.Dataset):\n    def __init__(self, root_dir, transform=None, target_transform=None):\n        self.transform = transform\n        self.target_transform = target_transform\n\n        train_pair = os.path.join(root_dir, 'train_pair.lst')\n        with open(train_pair) as f:\n            lines = f.readlines()\n\n        self.imagefiles = []\n        self.labelfiles = []\n        for line in lines:\n            splits = line.split()\n            self.imagefiles.append(os.path.join(root_dir, splits[0]))\n            self.labelfiles.append(os.path.join(root_dir, splits[1]))\n\n\n    def __len__(self):\n        return len(self.imagefiles)\n\n\n    def __getitem__(self, index):\n        imagefile = self.imagefiles[index]\n        labelfile = self.labelfiles[index]\n\n        image = Image.open(imagefile).convert('RGB')\n        label = Image.open(labelfile).convert('L')\n\n        if self.transform is not None:\n            image = self.transform(image)\n\n        if self.target_transform is not None:\n            label = self.target_transform(label)\n\n        return image, label\n\n\nclass HEDBSDSTest(data.Dataset):\n    def __init__(self, root_dir, transform=None):\n        self.transform = transform\n\n        train_pair = os.path.join(root_dir, 'test.lst')\n        with open(train_pair) as f:\n            lines = f.readlines()\n\n        self.imagefiles = []\n        for line in lines:\n            splits = line.split()\n            self.imagefiles.append(os.path.join(root_dir, splits[0]))\n\n    def __len__(self):\n        return len(self.imagefiles)\n\n    def __getitem__(self, index):\n        imagefile = self.imagefiles[index]\n\n        image = Image.open(imagefile).convert('RGB')\n\n        if self.transform is not None:\n            image = self.transform(image)\n\n        return image, imagefile"
  },
  {
    "path": "dataloader/youtubeVOS.py",
    "content": "#TODO\n\nfrom collections import namedtuple\n\nimport os\nimport cv2\nimport numpy as np\n\nfrom PIL import Image\nfrom .base_youtube import Sequence, SequenceClip, Annotation, AnnotationClip, BaseLoader, Segmentation, SequenceClip_simple, AnnotationClip_simple\nfrom misc.config_youtubeVOS import cfg,phase,db_read_sequences_train,db_read_sequences_val, db_read_sequences_test, db_read_sequences_trainval\nimport os.path as osp\nimport glob\nimport lmdb\n\nfrom scipy.misc import imresize\n\nfrom torch.utils import data\nfrom torchvision import transforms\nfrom dataloader import custom_transforms as tr\n\nclass YoutubeVOSLoader(data.Dataset):\n    def __init__(self,\n                 args,\n                 transform=None,\n                 target_transform=None,\n                 augment=False,\n                 split = 'train',\n                 inputRes = None):\n\n        self._phase = split\n        self._single_object = args.single_object\n        self._length_clip = args.length_clip\n        self.transform = transform\n        self.target_transform = target_transform\n        self.split = split\n        self.inputRes = inputRes\n        self.max_seq_len = args.gt_maxseqlen\n        self.dataset = args.dataset\n        self.flip = augment\n\n        if augment:\n            self.augment_transform = transforms.Compose([\n                tr.RandomHorizontalFlip(),\n                tr.ScaleNRotate(rots=(-args.rotation, args.rotation), scales=(.75, 1.25))])\n        else:\n            self.augment_transform = None\n\n        if self._phase == phase.TRAIN.value:\n            self._db_sequences = db_read_sequences_train()\n        elif self._phase == phase.VAL.value:\n            self._db_sequences = db_read_sequences_val()\n        elif self._phase == phase.TRAINVAL.value:\n            self._db_sequences = db_read_sequences_trainval()\n        else: #self._phase == 'test':\n            self._db_sequences = db_read_sequences_test()\n\n        # Check lmdb existance. If not proceed with standard dataloader.\n        lmdb_env_seq_dir = osp.join(cfg.PATH.DATA, 'lmdb_seq')\n        lmdb_env_annot_dir = osp.join(cfg.PATH.DATA, 'lmdb_annot')\n\n        if osp.isdir(lmdb_env_seq_dir) and osp.isdir(lmdb_env_annot_dir):\n            lmdb_env_seq = lmdb.open(lmdb_env_seq_dir)\n            lmdb_env_annot = lmdb.open(lmdb_env_annot_dir)\n        else:\n            lmdb_env_seq = None\n            lmdb_env_annot = None\n            print('LMDB not found. This could affect the data loading time. It is recommended to use LMDB.')\n\n        # Load sequences\n        self.sequences = [Sequence(self._phase, s, lmdb_env=lmdb_env_seq) for s in self._db_sequences]\n\n        # Load annotations\n        self.annotations = [Annotation(self._phase,s,self._single_object, lmdb_env=lmdb_env_annot) for s in self._db_sequences]\n\n        # Load sequences\n        self.videos = []\n        for seq, s in zip(self.sequences, self._db_sequences):\n            self.videos.append(s)\n\n        self.imagefiles = []\n        self.maskfiles = []\n\n        for _video in self.videos:\n            imagefiles = sorted(glob.glob(os.path.join(cfg.PATH.SEQUENCES_TRAIN, _video, '*.jpg')))\n            maskfiles = sorted(glob.glob(os.path.join(cfg.PATH.ANNOTATIONS_TRAIN, _video, '*.png')))\n\n            self.imagefiles.extend(imagefiles)\n            self.maskfiles.extend(maskfiles)\n        print('images: ', len(self.imagefiles))\n        print('masks: ', len(self.maskfiles))\n\n    def __len__(self):\n        return len(self.imagefiles)\n\n    def __getitem__(self, index):\n        imagefile = self.imagefiles[index]\n        maskfile = self.maskfiles[index]\n\n        image = Image.open(imagefile).convert('RGB')\n        mask = cv2.imread(maskfile, 0)\n        mask[mask > 0] = 255\n        mask = Image.fromarray(mask)\n\n        if self.inputRes is not None:\n            image = imresize(image, self.inputRes)\n            mask = imresize(mask, self.inputRes, interp='nearest')\n\n        sample = {'image': image, 'gt': mask}\n\n        if self.augment_transform is not None:\n            sample = self.augment_transform(sample)\n\n        image, mask = sample['image'], sample['gt']\n        if self.transform is not None:\n            image = self.transform(image)\n        if self.target_transform is not None:\n            mask = self.target_transform(mask)\n\n        return image, mask"
  },
  {
    "path": "dataset_lmdb_generator.py",
    "content": "# This script generates a data structure in the form of key-value storage. This is made in the huge amount of \n# calls to the function os.listdir inside base_youtube.py\n\nimport os\nimport lmdb\n\nfrom args import get_parser\n\n\nclass LMDBGenerator:\n    def __init__(self, ext='.jpg', gen_type='seq'):\n        self.ext = ext\n        self.gen_type = gen_type\n\n    def generate_lmdb_file(self, root_dir, frames_dir):\n        env = lmdb.open(os.path.join(root_dir, 'lmdb_' + self.gen_type))\n        root_in_dirs = os.listdir(frames_dir)\n\n        for d in root_in_dirs:\n            folder_dir = os.path.join(frames_dir, d)\n\n            _files_basename = sorted([f for f in os.listdir(folder_dir) if f.endswith(self.ext)])\n            files_str_vec = '|'.join(_files_basename)\n\n            print( \"Generating lmdb for: \" + folder_dir)\n            with env.begin(write=True) as txn:\n                txn.put(d.encode('ascii'), files_str_vec.encode())\n\n\nif __name__ == \"__main__\":\n    parser = get_parser()\n    args = parser.parse_args()\n    \n    if args.dataset == 'youtube':\n        from misc.config_youtubeVOS import cfg\n\n        frame_lmdb_generator_sequences = LMDBGenerator(ext='.jpg', gen_type='seq')\n        frame_lmdb_generator_sequences.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.SEQUENCES_TRAIN)\n        frame_lmdb_generator_sequences.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.SEQUENCES_TEST)\n    \n        frame_lmdb_generator_annotations = LMDBGenerator(ext='.png', gen_type='annot')\n        frame_lmdb_generator_annotations.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.ANNOTATIONS_TRAIN)\n        frame_lmdb_generator_annotations.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.ANNOTATIONS_TEST)\n        \n    else:\n        from misc.config import cfg\n\n        frame_lmdb_generator_sequences = LMDBGenerator(ext='.jpg', gen_type='seq')\n        frame_lmdb_generator_sequences.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.SEQUENCES)\n    \n        frame_lmdb_generator_annotations = LMDBGenerator(ext='.png', gen_type='annot')\n        frame_lmdb_generator_annotations.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.ANNOTATIONS)\n"
  },
  {
    "path": "measures/__init__.py",
    "content": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Object Segmentation\n#-----------------------------------------------------------------------------\n# Copyright (c) 2017 Federico Perazzi\n# Licensed under the BSD License [see LICENSE for details]\n# Written by Federico Perazzi (federico@disneyresearch.com)\n# Adapted from DAVIS 2016 (Federico Perazzi)\n# ----------------------------------------------------------------------------\n\nfrom .jaccard     import db_eval_iou\nfrom .f_boundary  import db_eval_boundary\nfrom misc import log\n\ntry:\n  from .t_stability import db_eval_t_stab\nexcept:\n  log.warning(\"Temporal stability not available\")\n\nfrom .statistics import _statistics\n"
  },
  {
    "path": "measures/f_boundary.py",
    "content": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Object Segmentation\n#-----------------------------------------------------------------------------\n# Copyright (c) 2017 Federico Perazzi\n# Licensed under the BSD License [see LICENSE for details]\n# Written by Federico Perazzi (federico@disneyresearch.com)\n# Adapted from DAVIS 2016 (Federico Perazzi)\n# ----------------------------------------------------------------------------\n\nimport numpy as np\n\n\"\"\" Utilities for computing, reading and saving benchmark evaluation.\"\"\"\n\ndef db_eval_boundary(foreground_mask,gt_mask,bound_th=0.008):\n\t\"\"\"\n\tCompute mean,recall and decay from per-frame evaluation.\n\tCalculates precision/recall for boundaries between foreground_mask and\n\tgt_mask using morphological operators to speed it up.\n\tArguments:\n\t\tforeground_mask (ndarray): binary segmentation image.\n\t\tgt_mask         (ndarray): binary annotated image.\n\tReturns:\n\t\tF (float): boundaries F-measure\n\t\tP (float): boundaries precision\n\t\tR (float): boundaries recall\n\t\"\"\"\n\tassert np.atleast_3d(foreground_mask).shape[2] == 1\n\n\tbound_pix = bound_th if bound_th >= 1 else \\\n\t\t\tnp.ceil(bound_th*np.linalg.norm(foreground_mask.shape))\n\n\t# Get the pixel boundaries of both masks\n\tfg_boundary = seg2bmap(foreground_mask);\n\tgt_boundary = seg2bmap(gt_mask);\n\n\tfrom skimage.morphology import binary_dilation,disk\n\n\tfg_dil = binary_dilation(fg_boundary,disk(bound_pix))\n\tgt_dil = binary_dilation(gt_boundary,disk(bound_pix))\n\n\t# Get the intersection\n\tgt_match = gt_boundary * fg_dil\n\tfg_match = fg_boundary * gt_dil\n\n\t# Area of the intersection\n\tn_fg     = np.sum(fg_boundary)\n\tn_gt     = np.sum(gt_boundary)\n\n\t#% Compute precision and recall\n\tif n_fg == 0 and  n_gt > 0:\n\t\tprecision = 1\n\t\trecall = 0\n\telif n_fg > 0 and n_gt == 0:\n\t\tprecision = 0\n\t\trecall = 1\n\telif n_fg == 0  and n_gt == 0:\n\t\tprecision = 1\n\t\trecall = 1\n\telse:\n\t\tprecision = np.sum(fg_match)/float(n_fg)\n\t\trecall    = np.sum(gt_match)/float(n_gt)\n\n\t# Compute F measure\n\tif precision + recall == 0:\n\t\tF = 0\n\telse:\n\t\tF = 2*precision*recall/(precision+recall);\n\n\treturn F\n\ndef seg2bmap(seg,width=None,height=None):\n\t\"\"\"\n\tFrom a segmentation, compute a binary boundary map with 1 pixel wide\n\tboundaries.  The boundary pixels are offset by 1/2 pixel towards the\n\torigin from the actual segment boundary.\n\tArguments:\n\t\tseg     : Segments labeled from 1..k.\n\t\twidth\t  :\tWidth of desired bmap  <= seg.shape[1]\n\t\theight  :\tHeight of desired bmap <= seg.shape[0]\n\tReturns:\n\t\tbmap (ndarray):\tBinary boundary map.\n\t David Martin <dmartin@eecs.berkeley.edu>\n\t January 2003\n \"\"\"\n\n\tseg = seg.astype(np.bool)\n\tseg[seg>0] = 1\n\n\tassert np.atleast_3d(seg).shape[2] == 1\n\n\twidth  = seg.shape[1] if width  is None else width\n\theight = seg.shape[0] if height is None else height\n\n\th,w = seg.shape[:2]\n\n\tar1 = float(width) / float(height)\n\tar2 = float(w) / float(h)\n\n\tassert not (width>w | height>h | abs(ar1-ar2)>0.01),\\\n\t\t\t'Can''t convert %dx%d seg to %dx%d bmap.'%(w,h,width,height)\n\n\te  = np.zeros_like(seg)\n\ts  = np.zeros_like(seg)\n\tse = np.zeros_like(seg)\n\n\te[:,:-1]    = seg[:,1:]\n\ts[:-1,:]    = seg[1:,:]\n\tse[:-1,:-1] = seg[1:,1:]\n\n\tb        = seg^e | seg^s | seg^se\n\tb[-1,:]  = seg[-1,:]^e[-1,:]\n\tb[:,-1]  = seg[:,-1]^s[:,-1]\n\tb[-1,-1] = 0\n\n\tif w == width and h == height:\n\t\tbmap = b\n\telse:\n\t\tbmap = np.zeros((height,width))\n\t\tfor x in range(w):\n\t\t\tfor y in range(h):\n\t\t\t\tif b[y,x]:\n\t\t\t\t\tj = 1+floor((y-1)+height / h)\n\t\t\t\t\ti = 1+floor((x-1)+width  / h)\n\t\t\t\t\tbmap[j,i] = 1;\n\n\treturn bmap"
  },
  {
    "path": "measures/jaccard.py",
    "content": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Object Segmentation\n#-----------------------------------------------------------------------------\n# Copyright (c) 2017 Federico Perazzi\n# Licensed under the BSD License [see LICENSE for details]\n# Written by Federico Perazzi (federico@disneyresearch.com)\n# Adapted from DAVIS 2016 (Federico Perazzi)\n# ----------------------------------------------------------------------------\n\n\"\"\" Compute Jaccard Index. \"\"\"\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n\ndef db_eval_iou_multi(annotations, segmentations):\n    iou = 0.0\n    batch_size = annotations.shape[0]\n\n    for i in range(batch_size):\n        annotation = annotations[i, 0, :, :]\n        segmentation = segmentations[i, 0, :, :]\n\n        iou += db_eval_iou(annotation, segmentation)\n\n    iou /= batch_size\n    return iou\n\n\ndef db_eval_iou(annotation,segmentation):\n\n    \"\"\" Compute region similarity as the Jaccard Index.\n    Arguments:\n        annotation   (ndarray): binary annotation   map.\n        segmentation (ndarray): binary segmentation map.\n    Return:\n        jaccard (float): region similarity\n \"\"\"\n\n    annotation = annotation > 0.5\n    segmentation = segmentation > 0.5\n\n    if np.isclose(np.sum(annotation), 0) and\\\n            np.isclose(np.sum(segmentation), 0):\n        return 1\n    else:\n        return np.sum((annotation & segmentation)) / \\\n                np.sum((annotation | segmentation), dtype=np.float32)"
  },
  {
    "path": "measures/statistics.py",
    "content": "import numpy as np\nimport warnings\n\ndef mean(X):\n  \"\"\"\n  Compute average ignoring NaN values.\n  \"\"\"\n\n  return np.nanmean(X)\n\ndef recall(X,threshold=0.5):\n  \"\"\"\n  Fraction of values of X scoring higher than 'threshold'\n  \"\"\"\n  return mean(np.array(X)>threshold)\n\ndef decay(X,n_bins=4):\n  \"\"\"\n  Performance loss over time.\n  \"\"\"\n\n  ids = np.round(np.linspace(1,len(X),n_bins+1)+1e-10)-1;\n  ids = ids.astype(np.uint8)\n\n  D_bins = [X[ids[i]:ids[i+1]+1] for i in range(0,4)]\n\n  with warnings.catch_warnings():\n    warnings.simplefilter(\"ignore\", category=RuntimeWarning)\n    D = np.nanmean(D_bins[0])-np.mean(D_bins[3])\n  return D\n\ndef std(X):\n  \"\"\"\n  Compute standard deviation.\n  \"\"\"\n  return np.std(X)\n\n_statistics = {\n      'decay' : decay,\n      'mean'  : mean,\n      'recall': recall,\n      'std'   : std\n      }\n\ndef get(name):\n  return _statistics[name]\n"
  },
  {
    "path": "measures/t_stability.py",
    "content": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Object Segmentation\n#-----------------------------------------------------------------------------\n# Copyright (c) 2017 Federico Perazzi\n# Licensed under the BSD License [see LICENSE for details]\n# Written by Federico Perazzi (federico@disneyresearch.com)\n# Adapted from DAVIS 2016 (Federico Perazzi)\n# ----------------------------------------------------------------------------\n\nimport sys\nimport numpy as np\nimport scipy.spatial.distance as ssd\nfrom tstab import *\n\ndef get_bijective_pairs(pairs,costmat):\n\tbij_pairs = bij_pairs_one_dim(pairs, costmat,0)\n\tbij_pairs = bij_pairs_one_dim(bij_pairs, costmat.T,1)\n\treturn bij_pairs\n\ndef bij_pairs_one_dim(pairs, costmat, left_or_right):\n\n\tbij_pairs = []\n\tids1      = np.unique(pairs[:,left_or_right])\n\n\tfor ii in range(len(ids1)):\n\t\tcurr_pairs = pairs[pairs[:,left_or_right]==ids1[ii],:].astype(np.int)\n\t\tcurr_costs = costmat[curr_pairs[:,left_or_right], curr_pairs[:,1-left_or_right]]\n\t\tb = np.argmin(curr_costs)\n\t\tbij_pairs.append(curr_pairs[b])\n\n\treturn np.array(bij_pairs)\n\ndef hist_cost_2(BH1,BH2):\n\n\tnsamp1,nbins=BH1.shape\n\tnsamp2,nbins=BH2.shape\n\n\teps  = 2.2204e-16\n\tBH1n = BH1 / (np.sum(BH1,axis=1,keepdims=True)+eps)\n\tBH2n = BH2 / (np.sum(BH2,axis=1,keepdims=True)+eps)\n\n\ttmp1 = np.tile(np.transpose(np.atleast_3d(BH1n),[0,2,1]),(1,nsamp2,1))\n\ttmp2 = np.tile(np.transpose(np.atleast_3d(BH2n.T),[2,1,0]),(nsamp1,1,1))\n\tHC = 0.5*np.sum((tmp1-tmp2)**2/(tmp1+tmp2+eps),axis=2)\n\n\treturn HC\n\ndef sc_compute(Bsamp,Tsamp,mean_dist,nbins_theta,nbins_r,r_inner,r_outer,out_vec):\n\tin_vec = (out_vec==0).ravel()\n\tnsamp = Bsamp.shape[1]\n\tr_array=ssd.squareform(ssd.pdist(Bsamp.T)).T\n\n\ttheta_array_abs0=Bsamp[1,:].reshape(-1,1).dot(np.ones((1,nsamp))) - \\\n\t\t\tnp.ones((nsamp,1)).dot(Bsamp[1,:].reshape(1,-1))\n\n\ttheta_array_abs1=Bsamp[0,:].reshape(-1,1).dot(np.ones((1,nsamp))) - \\\n\t\t\tnp.ones((nsamp,1)).dot(Bsamp[0,:].reshape(1,-1))\n\n\ttheta_array_abs = np.arctan2(theta_array_abs0,theta_array_abs1).T\n\ttheta_array=theta_array_abs-Tsamp.T.dot(np.ones((1,nsamp)))\n\n\tif mean_dist is None:\n\t\tmean_dist = np.mean(r_array[in_vec].T[in_vec].T)\n\n\tr_array_n = r_array / mean_dist\n\n\tr_bin_edges=np.logspace(np.log10(r_inner),np.log10(r_outer),nbins_r)\n\tr_array_q=np.zeros((nsamp,nsamp))\n\n\tfor m in range(int(nbins_r)):\n\t\tr_array_q=r_array_q+(r_array_n<r_bin_edges[m])\n\n\tfz = r_array_q > 0\n\ttheta_array_2 = np.fmod(np.fmod(theta_array,2*np.pi)+2*np.pi,2*np.pi)\n\ttheta_array_q = 1+np.floor(theta_array_2/(2*np.pi/nbins_theta))\n\n\tnbins=nbins_theta*nbins_r\n\tBH=np.zeros((nsamp,nbins))\n\tcount = 0\n\tfor n in range(nsamp):\n\t\tfzn=fz[n]&in_vec\n\t\tSn = np.zeros((nbins_theta,nbins_r))\n\t\tcoords = np.hstack((theta_array_q[n,fzn].reshape(-1,1),\n\t\t\tr_array_q[n,fzn].astype(np.int).reshape(-1,1)))\n\n\t\t# SLOW...\n\t\t#for i,j in coords:\n\t\t\t#Sn[i-1,j-1] += 1\n\n\t\t# FASTER\n\t\tids = np.ravel_multi_index((coords.T-1).astype(np.int),Sn.shape)\n\t\tSn  = np.bincount(ids.ravel(),minlength = np.prod(Sn.shape)).reshape(Sn.shape)\n\n\n\t\tBH[n,:] = Sn.T[:].ravel()\n\n\treturn BH.astype(np.int),mean_dist\n\ndef db_eval_t_stab(fgmask,ground_truth,timing=True):\n\t\"\"\"\n\tCalculates the temporal stability index between two masks\n\tArguments:\n\t\t\t\t\tfgmask (ndarray):  Foreground Object mask at frame t\n\t\tground_truth (ndarray):  Foreground Object mask at frame t+1\n\tReturn:\n\t\t\t\t\t\t\t T (ndarray):  Temporal (in-)stability\n\t   raw_results (ndarray):  Supplemental values\n\t\"\"\"\n\n\tcont_th = 3\n\tcont_th_up = 3\n\n\t# Shape context parameters\n\tr_inner     = 1.0/8.0\n\tr_outer     = 2.0\n\tnbins_r     = 5.0\n\tnbins_theta = 12.0\n\n\tpoly1 = mask2poly(fgmask,cont_th)\n\tpoly2 = mask2poly(ground_truth,cont_th)\n\n\tif len(poly1.contour_coords) == 0 or \\\n\t\t\tlen(poly2.contour_coords) == 0:\n\t\treturn np.nan\n\n\tCs1 = get_longest_cont(poly1.contour_coords)\n\tCs2 = get_longest_cont(poly2.contour_coords)\n\n\tupCs1 = contour_upsample(Cs1,cont_th_up)\n\tupCs2 = contour_upsample(Cs2,cont_th_up)\n\n\tscs1,_=sc_compute(upCs1.T,np.zeros((1,upCs1.shape[0])),None,\n\t\t\tnbins_theta,nbins_r,r_inner,r_outer,np.zeros((1,upCs1.shape[0])))\n\n\tscs2,_=sc_compute(upCs2.T,np.zeros((1,upCs2.shape[0])),None,\n\t\t\tnbins_theta,nbins_r,r_inner,r_outer,np.zeros((1,upCs2.shape[0])))\n\n\t# Match with the 0-0 alignment\n\tcostmat              = hist_cost_2(scs1,scs2)\n\tpairs ,max_sx,max_sy = match_dijkstra(np.ascontiguousarray(costmat))\n\n\n\t# Shift costmat\n\tcostmat2 = np.roll(costmat ,-(max_sy+1),axis=1)\n\tcostmat2 = np.roll(costmat2,-(max_sx+1),axis=0)\n\n\t# Redo again with the correct alignment\n\tpairs,_,_ = match_dijkstra(costmat2)\n\n\t# Put the pairs back to the original place\n\tpairs[:,0] = np.mod(pairs[:,0]+max_sx+1, costmat.shape[0])\n\tpairs[:,1] = np.mod(pairs[:,1]+max_sy+1, costmat.shape[1])\n\n\tpairs = get_bijective_pairs(pairs,costmat)\n\n\tpairs_cost = costmat[pairs[:,0], pairs[:,1]]\n\tmin_cost   = np.average(pairs_cost)\n\n\treturn min_cost"
  },
  {
    "path": "misc/__init__.py",
    "content": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Object Segmentation\n#-----------------------------------------------------------------------------\n# Copyright (c) 2017 Federico Perazzi\n# Licensed under the BSD License [see LICENSE for details]\n# Written by Federico Perazzi (federico@disneyresearch.com)\n# Adapted from DAVIS 2016 (Federico Perazzi)\n# ----------------------------------------------------------------------------\n\nfrom .timer  import Timer\nfrom .logger import logging as log\nfrom .config import phase,cfg,db_read_sequences\nfrom .visual import overlay\n"
  },
  {
    "path": "misc/config.py",
    "content": "#!/usr/bin/env python\nimport yaml\nimport os.path as osp\nimport numpy as np\nfrom easydict import EasyDict as edict\n\nfrom enum import Enum\n\n# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Object Segmentation\n#-----------------------------------------------------------------------------\n# Copyright (c) 2017 Federico Perazzi\n# Licensed under the BSD License [see LICENSE for details]\n# Written by Federico Perazzi (federico@disneyresearch.com)\n# Adapted from FAST-RCNN (Ross Girshick)\n# ----------------------------------------------------------------------------\n\n\"\"\" Configuration file.\"\"\"\n\n\nclass phase(Enum):\n    TRAIN = 'train'\n    VAL = 'val'\n    TESTDEV = 'test-dev'\n    TRAINVAL = 'train-val'\n\n\n__C = edict()\n\n# Public access to configuration settings\ncfg = __C\n\n# Number of CPU cores used to parallelize evaluation.\n__C.N_JOBS = 32\n\n# Paths to dataset folders\n__C.PATH = edict()\n\n# Dataset resolution: (\"480p\",\"1080p\")\n__C.RESOLUTION=\"480p\"\n\n# Dataset year: (\"2016\",\"2017\")\n__C.YEAR = \"2017\"\n\n__C.PHASE = phase.VAL\n\n# Multiobject evaluation (Set to False only when evaluating DAVIS 2016)\n__C.MULTIOBJECT = True\n\n# Root folder of project\n__C.PATH.ROOT = osp.abspath('.')\n\n# Data folder\n__C.PATH.DATA = osp.abspath('./data/DAVIS2017')\n\n# Path to input images\n__C.PATH.SEQUENCES = osp.join(__C.PATH.DATA, \"JPEGImages\", __C.RESOLUTION)\n\n# Path to annotations\n__C.PATH.ANNOTATIONS = osp.join(__C.PATH.DATA, \"Annotations\", __C.RESOLUTION)\n\n__C.PATH.ANNOTATIONS_EDGE = osp.join(__C.PATH.DATA, \"Annotations_edge\",\n                                     __C.RESOLUTION)\n\n__C.PATH.FLOW = osp.join(__C.PATH.DATA, 'davis2017-flow/')\n\n__C.PATH.HED = osp.join(__C.PATH.DATA, 'davis2017-hed/')\n\n# Color palette\n__C.PATH.PALETTE = osp.abspath(osp.join(__C.PATH.ROOT, 'data/palette.txt'))\n\n# Paths to files\n__C.FILES = edict()\n\n# Path to property file, holding information on evaluation sequences.\n__C.FILES.DB_INFO = osp.abspath(osp.join(__C.PATH.ROOT,\n                                         \"data/db_info.yaml\"))\n\n# Measures and Statistics\n__C.EVAL = edict()\n\n# Metrics: J: region similarity, F: contour accuracy, T: temporal stability\n__C.EVAL.METRICS = ['J', 'F']\n\n# Statistics computed for each of the metrics listed above\n__C.EVAL.STATISTICS = ['mean', 'recall', 'decay']\n\n\ndef db_read_info():\n    \"\"\" Read dataset properties from file.\"\"\"\n    with open(cfg.FILES.DB_INFO, 'r') as f:\n        return edict(yaml.load(f))\n\n\ndef db_read_attributes():\n    \"\"\" Read list of sequences. \"\"\"\n    return db_read_info().attributes\n\n\ndef db_read_years():\n    \"\"\" Read list of sequences. \"\"\"\n    return db_read_info().years\n\n\ndef db_read_sequences(year=None,db_phase=None):\n    \"\"\" Read list of sequences. \"\"\"\n\n    sequences = db_read_info().sequences\n\n    if year is not None:\n        sequences = filter(\n            lambda s: int(s.year) <= int(year), sequences)\n\n    if db_phase is not None:\n        if db_phase == phase.TRAINVAL:\n            sequences = filter(lambda s: ((s.set == phase.VAL) or\n                                          (s.set == phase.TRAIN)), sequences)\n        else:\n            sequences = filter(lambda s: s.set == db_phase, sequences)\n    return sequences\n\n\n# Load all sequences\n__C.SEQUENCES = dict([(sequence.name, sequence)\n                      for sequence in db_read_sequences()])\n\n__C.palette = np.loadtxt(__C.PATH.PALETTE, dtype=np.uint8).reshape(-1, 3)\n"
  },
  {
    "path": "misc/config_youtubeVOS.py",
    "content": "#!/usr/bin/env python\nimport json\nimport os.path as osp\nfrom easydict import EasyDict as edict\nfrom enum import Enum\nimport numpy as np\n\n\"\"\" Configuration file.\"\"\"\n\n\nclass phase(Enum):\n    TRAIN = 'train'\n    VAL = 'val'\n    TESTDEV = 'test-dev'\n    TRAINVAL = 'trainval'\n\n\n__C = edict()\n\n# Public access to configuration settings\ncfg = __C\n\n# Number of CPU cores used to parallelize evaluation.\n__C.N_JOBS = 32\n\n# Paths to dataset folders\n__C.PATH = edict()\n\n__C.PHASE = phase.TRAIN\n\n# Multiobject evaluation (Set to False only when evaluating DAVIS 2016)\n__C.MULTIOBJECT = True\n\n# Root folder of project\n__C.PATH.ROOT = osp.abspath('.')\n\n# Data folder\n__C.PATH.DATA = osp.abspath('./data/YouTubeVOS_2018')\n\n\n# Path to input images\n__C.PATH.SEQUENCES_TRAIN = osp.join(__C.PATH.DATA, phase.TRAIN.value,\n                                    \"JPEGImages\")\n__C.PATH.SEQUENCES_VAL = osp.join(__C.PATH.DATA, phase.TRAIN.value,\n                                  \"JPEGImages\")\n__C.PATH.SEQUENCES_TRAINVAL = osp.join(__C.PATH.DATA, phase.TRAIN.value,\n                                       \"JPEGImages\")\n__C.PATH.SEQUENCES_TEST = osp.join(__C.PATH.DATA, phase.VAL.value,\n                                   \"JPEGImages\")\n\n# Path to annotations\n__C.PATH.ANNOTATIONS_TRAIN = osp.join(__C.PATH.DATA, phase.TRAIN.value,\n                                      \"Annotations\")\n__C.PATH.ANNOTATIONS_VAL = osp.join(__C.PATH.DATA, phase.TRAIN.value,\n                                    \"Annotations\")\n__C.PATH.ANNOTATIONS_TRAINVAL = osp.join(__C.PATH.DATA, phase.TRAIN.value,\n                                         \"Annotations\")\n__C.PATH.ANNOTATIONS_TEST = osp.join(__C.PATH.DATA, phase.VAL.value,\n                                     \"Annotations\")\n\n__C.PATH.FLOW = osp.join(__C.PATH.DATA, 'YouTubeVOS_2018-flow/')\n__C.PATH.HED = osp.join(__C.PATH.DATA, 'YouTubeVOS_2018-hed/')\n__C.PATH.ANNOTATIONS_TRAIN_EDGE = osp.join(__C.PATH.DATA, phase.TRAIN.value,\n                                           \"Annotations_edge\")\n\n__C.PATH.PALETTE = osp.abspath(osp.join(__C.PATH.ROOT, 'data/palette.txt'))\n\n# Paths to files\n__C.FILES = edict()\n\n# Path to property file, holding information on evaluation sequences.\n__C.FILES.DB_INFO_TRAIN = osp.abspath(\n    osp.join(__C.PATH.DATA, phase.TRAIN.value, \"train-train-meta.json\"))\n__C.FILES.DB_INFO_VAL = osp.abspath(\n    osp.join(__C.PATH.DATA, phase.TRAIN.value, \"train-val-meta.json\"))\n__C.FILES.DB_INFO_TRAINVAL = osp.abspath(\n    osp.join(__C.PATH.DATA, phase.TRAIN.value, \"meta.json\"))\n__C.FILES.DB_INFO_TEST = osp.abspath(\n    osp.join(__C.PATH.DATA, phase.VAL.value, \"meta.json\"))\n\n# Measures and Statistics\n__C.EVAL = edict()\n\n# Metrics: J: region similarity, F: contour accuracy, T: temporal stability\n__C.EVAL.METRICS = ['J', 'F']\n\n# Statistics computed for each of the metrics listed above\n__C.EVAL.STATISTICS = ['mean', 'recall', 'decay']\n\n\ndef db_read_sequences_train():\n    \"\"\" Read list of sequences. \"\"\"\n\n    json_data = open(__C.FILES.DB_INFO_TRAIN)\n    data = json.load(json_data)\n    sequences = data['videos'].keys()\n  \n    return sequences\n\n\ndef db_read_sequences_val():\n    \"\"\" Read list of sequences. \"\"\"\n\n    json_data = open(__C.FILES.DB_INFO_VAL)\n    data = json.load(json_data)\n    sequences = data['videos'].keys()\n  \n    return sequences\n\n\ndef db_read_sequences_trainval():\n    \"\"\" Read list of sequences. \"\"\"\n\n    json_data = open(__C.FILES.DB_INFO_TRAINVAL)\n    data = json.load(json_data)\n    sequences = data['videos'].keys()\n  \n    return sequences\n\n\ndef db_read_sequences_test():\n    \"\"\" Read list of sequences. \"\"\"\n\n    json_data = open(__C.FILES.DB_INFO_TEST)\n    data = json.load(json_data)\n    sequences = data['videos'].keys()\n\n    return sequences\n\n\n# Load all sequences\n__C.SEQUENCES_TRAIN = db_read_sequences_train()\n__C.SEQUENCES_VAL = db_read_sequences_val()\n__C.SEQUENCES_TRAINVAL = db_read_sequences_trainval()\n__C.SEQUENCES_TEST = db_read_sequences_test()\n\n__C.palette = np.loadtxt(__C.PATH.PALETTE, dtype=np.uint8).reshape(-1, 3)\n"
  },
  {
    "path": "misc/io_aux.py",
    "content": "from PIL import Image\nimport numpy as np\n\nfrom misc.config import cfg\n\ndef imread_indexed(filename):\n  \"\"\" Load image given filename.\"\"\"\n\n  im = Image.open(filename)\n\n  annotation = np.atleast_3d(im)[...,0]\n  return annotation,np.array(im.getpalette()).reshape((-1,3))\n\ndef imwrite_indexed(filename,array,color_palette=cfg.palette):\n  \"\"\" Save indexed png.\"\"\"\n\n  if np.atleast_3d(array).shape[2] != 1:\n    raise Exception(\"Saving indexed PNGs requires 2D array.\")\n\n  im = Image.fromarray(array)\n  im.putpalette(color_palette.ravel())\n  im.save(filename, format='PNG')"
  },
  {
    "path": "misc/logger.py",
    "content": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Object Segmentation\n#-----------------------------------------------------------------------------\n# Copyright (c) 2017 Federico Perazzi\n# Licensed under the BSD License [see LICENSE for details]\n# Written by Federico Perazzi (federico@disneyresearch.com)\n# Adapted from DAVIS 2016 (Federico Perazzi)\n# ----------------------------------------------------------------------------\n\nimport logging\n\nFORMAT = '[%(levelname)s]%(asctime)-15s %(message)s'\nlogging.basicConfig(\n  level=logging.INFO,format=FORMAT,datefmt='[%d-%m-%Y %H:%M:%S]')\n\n"
  },
  {
    "path": "misc/parallel.py",
    "content": "try:\n    from joblib import Parallel, delayed\nexcept Exception as e:\n    print(e) #TODO\n    # if joblib does not exist just run it in a single thread\n    delayed = lambda x: x\n    def Parallel( *args, **kwargs ):\n        return list\n\n# Allow pickling member functions\ndef _pickle_method(method):\n    func_name = method.__name__\n    obj = method.__self__\n    return _unpickle_method, (func_name, obj)\n\ndef _unpickle_method(func_name, obj):\n    try:\n        return obj.__getattribute__(func_name)\n    except AttributeError:\n        return None\n\n#The copyreg module offers a way to define functions used while pickling specific objects\n# Requires to install future module\nimport types\ntry:\n    import copyreg\nexcept:\n    import six.moves.copyreg as copyreg\n\ncopyreg.pickle(types.MethodType, _pickle_method, _unpickle_method)"
  },
  {
    "path": "misc/timer.py",
    "content": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Object Segmentation\n#-----------------------------------------------------------------------------\n# Copyright (c) 2017 Federico Perazzi\n# Licensed under the BSD License [see LICENSE for details]\n# Written by Federico Perazzi (federico@disneyresearch.com)\n# Adapted from FAST-RCNN (Ross Girshick)\n# ----------------------------------------------------------------------------\n\n\"\"\"\n  A simple wrapper to the built-in python timer.\n\"\"\"\n\nimport time\n\nclass Timer(object):\n  \"\"\"docstring for Timer\"\"\"\n  def __init__(self):\n    super(Timer, self).__init__()\n    self._start = None\n\n  def tic(self):\n    self._start = time.time()\n    return self\n\n  def toc(self):\n    assert self._start != None,\\\n        'Timer uninitialized. Call \"toc()\" first.'\n    return time.time() - self._start\n"
  },
  {
    "path": "misc/visual.py",
    "content": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Object Segmentation\n#-----------------------------------------------------------------------------\n# Copyright (c) 2017 Federico Perazzi\n# Licensed under the BSD License [see LICENSE for details]\n# Written by Federico Perazzi (federico@disneyresearch.com)\n# ----------------------------------------------------------------------------\n\nimport skimage\nimport numpy as np\n\n\ndef overlay(image, mask, colors=[255, 0, 0], cscale=2, alpha=0.4):\n    \"\"\" Overlay segmentation on top of RGB image. \"\"\"\n\n    colors = np.atleast_2d(colors) * cscale\n\n    im_overlay = image.copy()\n    object_ids = np.unique(mask)\n\n    for object_id in object_ids[1:]:\n        # Overlay color on  binary mask\n\n        foreground = image*alpha + np.ones(image.shape)*(1-alpha) * np.array(colors[object_id])\n        binary_mask = mask == object_id\n\n        # Compose image\n        im_overlay[binary_mask] = foreground[binary_mask]\n\n        countours = skimage.morphology.binary.binary_dilation(binary_mask) - binary_mask\n        im_overlay[countours, :] = 0\n\n    return im_overlay.astype(image.dtype)\n"
  },
  {
    "path": "modules/MATNet.py",
    "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torchvision import models\n\n\nclass Encoder(nn.Module):\n    def __init__(self):\n        super(Encoder, self).__init__()\n\n        resnet_im = models.resnet101(pretrained=True)\n        self.conv1_1 = resnet_im.conv1\n        self.bn1_1 = resnet_im.bn1\n        self.relu_1 = resnet_im.relu\n        self.maxpool_1 = resnet_im.maxpool\n\n        self.res2_1 = resnet_im.layer1\n        self.res3_1 = resnet_im.layer2\n        self.res4_1 = resnet_im.layer3\n        self.res5_1 = resnet_im.layer4\n\n        resnet_fl = models.resnet101(pretrained=True)\n        self.conv1_2 = resnet_fl.conv1\n        self.bn1_2 = resnet_fl.bn1\n        self.relu_2 = resnet_fl.relu\n        self.maxpool_2 = resnet_fl.maxpool\n\n        self.res2_2 = resnet_fl.layer1\n        self.res3_2 = resnet_fl.layer2\n        self.res4_2 = resnet_fl.layer3\n        self.res5_2 = resnet_fl.layer4\n\n        self.gated_res2 = Gated(256*2)\n        self.gated_res3 = Gated(512*2)\n        self.gated_res4 = Gated(1024*2)\n        self.gated_res5 = Gated(2048*2)\n\n        self.coa_res3 = CoAttention(channel=512)\n        self.coa_res4 = CoAttention(channel=1024)\n        self.coa_res5 = CoAttention(channel=2048)\n\n    def forward_res2(self, f1, f2):\n        x1 = self.conv1_1(f1)\n        x1 = self.bn1_1(x1)\n        x1 = self.relu_1(x1)\n        x1 = self.maxpool_1(x1)\n        r2_1 = self.res2_1(x1)\n\n        x2 = self.conv1_2(f2)\n        x2 = self.bn1_2(x2)\n        x2 = self.relu_2(x2)\n        x2 = self.maxpool_2(x2)\n        r2_2 = self.res2_2(x2)\n\n        return r2_1, r2_2\n\n    def forward(self, f1, f2):\n        r2_1, r2_2 = self.forward_res2(f1, f2)\n        r2 = torch.cat([r2_1, r2_2], dim=1)\n\n        # res3\n        r3_1 = self.res3_1(r2_1)\n        r3_2 = self.res3_2(r2_2)\n\n        Za, Zb, Qa, Qb = self.coa_res3(r3_1, r3_2)\n        r3_1 = F.relu(Zb + r3_1)\n        r3_2 = F.relu(Qb + r3_2)\n        r3 = torch.cat([r3_1, r3_2], dim=1)\n\n        # res4\n        r4_1 = self.res4_1(r3_1)\n        r4_2 = self.res4_2(r3_2)\n\n        Za, Zb, Qa, Qb = self.coa_res4(r4_1, r4_2)\n        r4_1 = F.relu(Zb + r4_1)\n        r4_2 = F.relu(Qb + r4_2)\n        r4 = torch.cat([r4_1, r4_2], dim=1)\n\n        # res5\n        r5_1 = self.res5_1(r4_1)\n        r5_2 = self.res5_2(r4_2)\n\n        Za, Zb, Qa, Qb = self.coa_res5(r5_1, r5_2)\n        r5_1 = F.relu(Zb + r5_1)\n        r5_2 = F.relu(Qb + r5_2)\n        r5 = torch.cat([r5_1, r5_2], dim=1)\n\n        r5_gated = self.gated_res5(r5)\n        r4_gated = self.gated_res4(r4)\n        r3_gated = self.gated_res3(r3)\n        r2_gated = self.gated_res2(r2)\n\n        return r5_gated, r4_gated, r3_gated, r2_gated\n\n\nclass CoAttention(nn.Module):\n    def __init__(self, channel):\n        super(CoAttention, self).__init__()\n\n        d = channel // 16\n        self.proja = nn.Conv2d(channel, d, kernel_size=1)\n        self.projb = nn.Conv2d(channel, d, kernel_size=1)\n\n        self.bottolneck1 = nn.Sequential(\n                nn.Conv2d(channel, channel, kernel_size=1),\n                nn.BatchNorm2d(channel),\n                nn.ReLU(inplace=True),\n                )\n\n        self.bottolneck2 = nn.Sequential(\n                nn.Conv2d(channel, channel, kernel_size=1),\n                nn.BatchNorm2d(channel),\n                nn.ReLU(inplace=True),\n                )\n\n        self.proj1 = nn.Conv2d(channel, 1, kernel_size=1)\n        self.proj2 = nn.Conv2d(channel, 1, kernel_size=1)\n\n        self.bna = nn.BatchNorm2d(channel)\n        self.bnb = nn.BatchNorm2d(channel)\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, Qa, Qb):\n        # cascade 1\n        Qa_1, Qb_1 = self.forward_sa(Qa, Qb)\n        _, Zb = self.forward_co(Qa_1, Qb_1)\n\n        Pa = F.relu(Zb + Qa)\n        Pb = F.relu(Qb_1 + Qb)\n\n        # cascade 2\n        Qa_2, Qb_2 = self.forward_sa(Pa, Pb)\n        _, Zb = self.forward_co(Qa_2, Qb_2)\n\n        Pa = F.relu(Zb + Pa)\n        Pb = F.relu(Qb_2 + Pb)\n\n        # cascade 3\n        Qa_3, Qb_3 = self.forward_sa(Pa, Pb)\n        Za, Zb = self.forward_co(Qa_3, Qb_3)\n\n        Pa = F.relu(Zb + Pa)\n        Pb = F.relu(Qb_3 + Pb)\n\n        # cascade 4\n        Qa_4, Qb_4 = self.forward_sa(Pa, Pb)\n        Za, Zb = self.forward_co(Qa_4, Qb_4)\n\n        Pa = F.relu(Zb + Pa)\n        Pb = F.relu(Qb_4 + Pb)\n\n        # cascade 5\n        Qa_5, Qb_5 = self.forward_sa(Pa, Pb)\n        Za, Zb = self.forward_co(Qa_5, Qb_5)\n\n        return Za, Zb, Qa_5, Qb_5\n\n    def forward_sa(self, Qa, Qb):\n        Aa = self.proj1(Qa)\n        Ab = self.proj2(Qb)\n\n        n, c, h, w = Aa.shape\n        Aa = Aa.view(-1, h*w)\n        Ab = Ab.view(-1, h*w)\n\n        Aa = F.softmax(Aa)\n        Ab = F.softmax(Ab)\n\n        Aa = Aa.view(n, c, h, w)\n        Ab = Ab.view(n, c, h, w)\n\n        Qa_attened = Aa * Qa\n        Qb_attened = Ab * Qb\n\n        return Qa_attened, Qb_attened\n\n    def forward_co(self, Qa, Qb):\n        Qa_low = self.proja(Qa)\n        Qb_low = self.projb(Qb)\n\n        N, C, H, W = Qa_low.shape\n        Qa_low = Qa_low.view(N, C, H * W)\n        Qb_low = Qb_low.view(N, C, H * W)\n        Qb_low = torch.transpose(Qb_low, 1, 2)\n\n        L = torch.bmm(Qb_low, Qa_low)\n\n        Aa = F.tanh(L)\n        Ab = torch.transpose(Aa, 1, 2)\n\n        N, C, H, W = Qa.shape\n\n        Qa_ = Qa.view(N, C, H * W)\n        Qb_ = Qb.view(N, C, H * W)\n\n        Za = torch.bmm(Qb_, Aa)\n        Zb = torch.bmm(Qa_, Ab)\n        Za = Za.view(N, C, H, W)\n        Zb = Zb.view(N, C, H, W)\n\n        Za = F.normalize(Za)\n        Zb = F.normalize(Zb)\n\n        return Za, Zb\n\n\nclass Gated(nn.Module):\n    def __init__(self, channel, reduction=16):\n        super(Gated, self).__init__()\n\n        self.avg_pool = nn.AdaptiveAvgPool2d(1)\n\n        self.excitation_1 = nn.Sequential(\n            nn.Linear(channel, channel // reduction),\n            nn.ReLU(inplace=True))\n\n        self.excitation_2 = nn.Sequential(\n            nn.Linear(channel // reduction, channel),\n            nn.Sigmoid()\n        )\n\n        self.global_attention = nn.Sequential(\n            nn.Linear(channel // reduction, 1),\n            nn.Sigmoid()\n        )\n\n        kernel_size = 7\n        self.spatial = BasicConv(1, 1, kernel_size, stride=1,\n                                 padding=(kernel_size-1) // 2, relu=False)\n\n    def forward(self, U):\n        # se layer\n        b, c, h, w = U.shape\n        S = self.avg_pool(U).view(b, c)\n        E_1 = self.excitation_1(S)\n\n        E_local = self.excitation_2(E_1).view(b, c, 1, 1)\n        U_se = E_local * U\n\n        # spatial layer\n        U_se_max = torch.max(U_se, 1)[0].unsqueeze(1)\n        SP_Att = self.spatial(U_se_max)\n        U_se_sp = SP_Att * U_se\n\n        # global layer\n        E_global = self.global_attention(E_1).view(b, 1, 1, 1)\n        V = E_global * U_se_sp\n\n        # residual layer\n        O = U + V\n\n        return O\n\nclass BasicConv(nn.Module):\n    def __init__(self, in_planes, out_planes, kernel_size, stride=1,\n                 padding=0, dilation=1, groups=1, relu=True, bn=True,\n                 bias=False):\n        super(BasicConv, self).__init__()\n        self.out_channels = out_planes\n        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size,\n                              stride=stride, padding=padding,\n                              dilation=dilation, groups=groups, bias=bias)\n        self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01,\n                                 affine=True) if bn else None\n        self.relu = nn.ReLU() if relu else None\n\n    def forward(self, x):\n        x = self.conv(x)\n        if self.bn is not None:\n            x = self.bn(x)\n        if self.relu is not None:\n            x = self.relu(x)\n        return x\n\n\nclass BoundaryModule(nn.Module):\n    def __init__(self, inchannel):\n        super(BoundaryModule, self).__init__()\n\n        self.bn1 = nn.BatchNorm2d(inchannel)\n        self.conv1 = nn.Conv2d(inchannel, 64, kernel_size=3, stride=1,\n                               padding=1)\n        self.relu = nn.ReLU()\n\n        self.bn2 = nn.BatchNorm2d(64)\n        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)\n\n        self.conv3 = nn.Conv2d(64, 1, kernel_size=1, stride=1, padding=0)\n        self.sigmoid = nn.Sigmoid()\n\n        self._initialize_weights()\n\n    def _initialize_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                m.weight.data.normal_(0, 0.001)\n                if m.bias is not None:\n                    m.bias.data.zero_()\n            elif isinstance(m, nn.BatchNorm2d):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n            elif isinstance(m, nn.Linear):\n                m.weight.data.normal_(0, 0.01)\n                m.bias.data.zero_()\n\n    def forward(self, x):\n        x = self.bn1(x)\n        x = self.conv1(x)\n        x = self.relu(x)\n        x = self.bn2(x)\n        x = self.conv2(x)\n        x = self.relu(x)\n        x = self.conv3(x)\n\n        return x\n\n\nclass Decoder(nn.Module):\n    def __init__(self):\n        super(Decoder, self).__init__()\n        mdim = 256\n        self.GC = GC(4096+1, mdim)\n        self.convG1 = nn.Conv2d(mdim, mdim, kernel_size=3, padding=1)\n        self.convG2 = nn.Conv2d(mdim, mdim, kernel_size=3, padding=1)\n        self.RF4 = Refine(2048+1, mdim)\n        self.RF3 = Refine(1024+1, mdim)\n        self.RF2 = Refine(512+1, mdim)\n\n        self.pred5 = nn.Conv2d(mdim, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)\n        self.pred4 = nn.Conv2d(mdim, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)\n        self.pred3 = nn.Conv2d(mdim, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)\n        self.pred2 = nn.Conv2d(mdim, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)\n\n        self.concat = nn.Conv2d(4, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)\n\n        self.bdry5 = BoundaryModule(4096)\n        self.bdry4 = BoundaryModule(2048)\n        self.bdry3 = BoundaryModule(1024)\n        self.bdry2 = BoundaryModule(512)\n\n        self._initialize_weights()\n\n    def _initialize_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                m.weight.data.normal_(0, 0.001)\n                if m.bias is not None:\n                    m.bias.data.zero_()\n            elif isinstance(m, nn.BatchNorm2d):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n            elif isinstance(m, nn.Linear):\n                m.weight.data.normal_(0, 0.01)\n                m.bias.data.zero_()\n\n    def forward(self, r5, r4, r3, r2):\n        p5 = self.bdry5(r5)\n        p4 = self.bdry4(r4)\n        p3 = self.bdry3(r3)\n        p2 = self.bdry2(r2)\n\n        p2_up = F.interpolate(p2, size=(473, 473), mode='bilinear')\n        p3_up = F.interpolate(p3, size=(473, 473), mode='bilinear')\n        p4_up = F.interpolate(p4, size=(473, 473), mode='bilinear')\n        p5_up = F.interpolate(p5, size=(473, 473), mode='bilinear')\n\n        concat = torch.cat([p2_up, p3_up, p4_up, p5_up], dim=1)\n        p = self.concat(concat)\n\n        p2_up = torch.sigmoid(p2_up)\n        p3_up = torch.sigmoid(p3_up)\n        p4_up = torch.sigmoid(p4_up)\n        p5_up = torch.sigmoid(p5_up)\n        p = torch.sigmoid(p)\n\n        r5 = torch.cat((r5, p5), dim=1)\n        r4 = torch.cat((r4, p4), dim=1)\n        r3 = torch.cat((r3, p3), dim=1)\n        r2 = torch.cat((r2, p2), dim=1)\n\n        m = self.forward_mask(r5, r4, r3, r2)\n\n        return m, p, p2_up, p3_up, p4_up, p5_up\n\n    def forward_mask(self, x, r4, r3, r2):\n        x = self.GC(x)\n        r = self.convG1(F.relu(x))\n        r = self.convG2(F.relu(r))\n        m5 = x + r\n        m4 = self.RF4(r4, m5)\n        m3 = self.RF3(r3, m4)\n        m2 = self.RF2(r2, m3)\n\n        p2 = self.pred2(F.relu(m2))\n        p2_up = F.interpolate(p2, size=(473, 473), mode='bilinear')\n        p2_s = torch.sigmoid(p2_up)\n\n        return p2_s\n\n\nclass GC(nn.Module):\n    def __init__(self, inplanes, planes, kh=7, kw=7):\n        super(GC, self).__init__()\n        self.conv_l1 = nn.Conv2d(inplanes, 256, kernel_size=(kh, 1),\n                                 padding=(int(kh/2), 0))\n        self.conv_l2 = nn.Conv2d(256, planes, kernel_size=(1, kw),\n                                 padding=(0, int(kw/2)))\n        self.conv_r1 = nn.Conv2d(inplanes, 256, kernel_size=(1, kw),\n                                 padding=(0, int(kw/2)))\n        self.conv_r2 = nn.Conv2d(256, planes, kernel_size=(kh, 1),\n                                 padding=(int(kh/2), 0))\n\n    def forward(self, x):\n        x_l = self.conv_l2(self.conv_l1(x))\n        x_r = self.conv_r2(self.conv_r1(x))\n        x = x_l + x_r\n        return x\n\n\nclass AtrousBlock(nn.Module):\n    def __init__(self, inplanes, planes, rate, stride=1):\n        super(AtrousBlock, self).__init__()\n\n        self.conv = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,\n                              dilation=rate, padding=rate)\n\n    def forward(self, x):\n        return self.conv(x)\n\n\nclass PyramidDilationConv(nn.Module):\n    def __init__(self, inplanes, planes):\n        super(PyramidDilationConv, self).__init__()\n\n        rate = [3, 5, 7]\n\n        self.block0 = nn.Conv2d(inplanes, planes, kernel_size=1)\n        self.block1 = AtrousBlock(inplanes, planes, rate[0])\n        self.block2 = AtrousBlock(inplanes, planes, rate[1])\n        self.block3 = AtrousBlock(inplanes, planes, rate[2])\n        self.bn = nn.BatchNorm2d(planes*4)\n\n    def forward(self, x):\n        x0 = self.block0(x)\n        x1 = self.block1(x)\n        x2 = self.block1(x)\n        x3 = self.block1(x)\n\n        xx = torch.cat([x0, x1, x2, x3], dim=1)\n        xx = self.bn(xx)\n        return xx\n\n\nclass Refine(nn.Module):\n    def __init__(self, inplanes, planes, scale_factor=2):\n        super(Refine, self).__init__()\n        self.convFS1 = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1)\n        self.convFS2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)\n        self.convFS3 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)\n        self.convMM1 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)\n        self.convMM2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)\n        self.scale_factor = scale_factor\n\n        outplanes = int(planes / 4)\n        self.pdc = PyramidDilationConv(inplanes, outplanes)\n\n    def forward(self, f, pm):\n        s = self.pdc(f)\n        sr = self.convFS2(F.relu(s))\n        sr = self.convFS3(F.relu(sr))\n        s = s + sr\n\n        m = s + F.interpolate(pm, size=s.shape[2:4], mode='bilinear')\n\n        mr = self.convMM1(F.relu(m))\n        mr = self.convMM2(F.relu(mr))\n        m = m + mr\n        return m\n"
  },
  {
    "path": "modules/cbam.py",
    "content": "import torch\nimport math\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nclass BasicConv(nn.Module):\n    def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):\n        super(BasicConv, self).__init__()\n        self.out_channels = out_planes\n        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)\n        self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None\n        self.relu = nn.ReLU() if relu else None\n\n    def forward(self, x):\n        x = self.conv(x)\n        if self.bn is not None:\n            x = self.bn(x)\n        if self.relu is not None:\n            x = self.relu(x)\n        return x\n\nclass Flatten(nn.Module):\n    def forward(self, x):\n        return x.view(x.size(0), -1)\n\nclass ChannelGate(nn.Module):\n    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']):\n        super(ChannelGate, self).__init__()\n        self.gate_channels = gate_channels\n        self.mlp = nn.Sequential(\n            Flatten(),\n            nn.Linear(gate_channels, gate_channels // reduction_ratio),\n            nn.ReLU(),\n            nn.Linear(gate_channels // reduction_ratio, gate_channels)\n            )\n        self.pool_types = pool_types\n    def forward(self, x):\n        channel_att_sum = None\n        for pool_type in self.pool_types:\n            if pool_type=='avg':\n                avg_pool = F.avg_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))\n                channel_att_raw = self.mlp( avg_pool )\n            elif pool_type=='max':\n                max_pool = F.max_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))\n                channel_att_raw = self.mlp( max_pool )\n            elif pool_type=='lp':\n                lp_pool = F.lp_pool2d( x, 2, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))\n                channel_att_raw = self.mlp( lp_pool )\n            elif pool_type=='lse':\n                # LSE pool only\n                lse_pool = logsumexp_2d(x)\n                channel_att_raw = self.mlp( lse_pool )\n\n            if channel_att_sum is None:\n                channel_att_sum = channel_att_raw\n            else:\n                channel_att_sum = channel_att_sum + channel_att_raw\n\n        scale = F.sigmoid( channel_att_sum ).unsqueeze(2).unsqueeze(3).expand_as(x)\n        return x * scale\n\ndef logsumexp_2d(tensor):\n    tensor_flatten = tensor.view(tensor.size(0), tensor.size(1), -1)\n    s, _ = torch.max(tensor_flatten, dim=2, keepdim=True)\n    outputs = s + (tensor_flatten - s).exp().sum(dim=2, keepdim=True).log()\n    return outputs\n\nclass ChannelPool(nn.Module):\n    def forward(self, x):\n        return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )\n\nclass SpatialGate(nn.Module):\n    def __init__(self):\n        super(SpatialGate, self).__init__()\n        kernel_size = 7\n        self.compress = ChannelPool()\n        self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)\n    def forward(self, x):\n        x_compress = self.compress(x)\n        x_out = self.spatial(x_compress)\n        scale = F.sigmoid(x_out) # broadcasting\n        return x * scale\n\nclass CBAM(nn.Module):\n    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False):\n        super(CBAM, self).__init__()\n        self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)\n        self.no_spatial=no_spatial\n        if not no_spatial:\n            self.SpatialGate = SpatialGate()\n    def forward(self, x):\n        x_out = self.ChannelGate(x)\n        if not self.no_spatial:\n            x_out = self.SpatialGate(x_out)\n        return x_out\n"
  },
  {
    "path": "requirements.txt",
    "content": "easydict==1.9\nlmdb==0.94\ntorchvision==0.2.2\nopencv_contrib_python==4.0.0.21\nscipy==1.2.1\nsix==1.12.0\ntorch==1.0.1.post2\npydensecrf==1.0rc2\ntqdm==4.31.1\nscikit_image==0.15.0\njoblib==0.13.2\nnumpy==1.16.2\nmatplotlib==3.0.3\nPillow==8.1.1\nPyYAML==5.1.2\nskimage==0.0\n"
  },
  {
    "path": "test_MATNet.py",
    "content": "import torch\nfrom torchvision import transforms\n\nimport os\nimport glob\nfrom tqdm import tqdm\nfrom PIL import Image\nfrom scipy.misc import imresize\n\nfrom modules.MATNet import Encoder, Decoder\nfrom utils.utils import check_parallel\nfrom utils.utils import load_checkpoint_epoch\n\n\ndef flip(x, dim):\n    if x.is_cuda:\n        return torch.index_select(x, dim, torch.arange(x.size(dim) - 1, -1, -1).long().cuda(0))\n    else:\n        return torch.index_select(x, dim, torch.arange(x.size(dim) - 1, -1, -1).long())\n\n\ninputRes = (473, 473)\nuse_flip = True\n\nto_tensor = transforms.ToTensor()\nnormalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],\n                                 std=[0.229, 0.224, 0.225])\nimage_transforms = transforms.Compose([to_tensor, normalize])\n\nmodel_name = 'MATNet' # specify the model name\nepoch = 0 # specify the epoch number\ndavis_result_dir = './output/davis16'\n\nencoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, load_args =\\\n    load_checkpoint_epoch(model_name, epoch, True, False)\nencoder = Encoder()\ndecoder = Decoder()\nencoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict)\nencoder.load_state_dict(encoder_dict)\ndecoder.load_state_dict(decoder_dict)\n\nencoder.cuda()\ndecoder.cuda()\n\nencoder.train(False)\ndecoder.train(False)\n\nval_set = 'data/DAVIS2017/ImageSets/2016/val.txt'\nwith open(val_set) as f:\n    seqs = f.readlines()\n    seqs = [seq.strip() for seq in seqs]\n\nfor video in tqdm(seqs):\n    davis_root_dir = 'data/DAVIS2017/JPEGImages/480p'\n    davis_flow_dir = 'data/DAVIS2017/davis2017-flow'\n\n    image_dir = os.path.join(davis_root_dir, video)\n    flow_dir = os.path.join(davis_flow_dir, video)\n\n    imagefiles = sorted(glob.glob(os.path.join(image_dir, '*.jpg')))[:-1]\n    flowfiles = sorted(glob.glob(os.path.join(flow_dir, '*.png')))\n\n    with torch.no_grad():\n        for imagefile, flowfile in zip(imagefiles, flowfiles):\n            image = Image.open(imagefile).convert('RGB')\n            flow = Image.open(flowfile).convert('RGB')\n\n            width, height = image.size\n\n            image = imresize(image, inputRes)\n            flow = imresize(flow, inputRes)\n\n            image = image_transforms(image)\n            flow = image_transforms(flow)\n\n            image = image.unsqueeze(0)\n            flow = flow.unsqueeze(0)\n\n            image, flow = image.cuda(), flow.cuda()\n\n            r5, r4, r3, r2 = encoder(image, flow)\n            mask_pred, bdry_pred, p2, p3, p4, p5 = decoder(r5, r4, r3, r2)\n\n            if use_flip:\n                image_flip = flip(image, 3)\n                flow_flip = flip(flow, 3)\n                r5, r4, r3, r2 = encoder(image_flip, flow_flip)\n                mask_pred_flip, bdry_pred_flip, p2, p3, p4, p5 =\\\n                    decoder(r5, r4, r3, r2)\n\n                mask_pred_flip = flip(mask_pred_flip, 3)\n                bdry_pred_flip = flip(bdry_pred_flip, 3)\n\n                mask_pred = (mask_pred + mask_pred_flip) / 2.0\n                bdry_pred = (bdry_pred + bdry_pred_flip) / 2.0\n\n            mask_pred = mask_pred[0, 0, :, :]\n            mask_pred = Image.fromarray(mask_pred.cpu().detach().numpy() * 255).convert('L')\n\n            save_folder = '{}/{}_epoch{}/{}'.format(davis_result_dir,\n                                                    model_name, epoch, video)\n            if not os.path.exists(save_folder):\n                os.makedirs(save_folder)\n\n            save_file = os.path.join(save_folder,\n                                     os.path.basename(imagefile)[:-4] + '.png')\n            mask_pred = mask_pred.resize((width, height))\n            mask_pred.save(save_file)\n\n\n"
  },
  {
    "path": "train_MATNet.py",
    "content": "import torch\nfrom torch.utils import data\nfrom torchvision import transforms\n\nimport os\nimport sys\nimport time\nimport random\nimport numpy as np\n\nfrom modules.MATNet import Encoder, Decoder\nfrom args import get_parser\nfrom utils.utils import get_optimizer\nfrom utils.utils import make_dir, check_parallel\nfrom dataloader.dataset_utils import get_dataset_davis_youtube_ehem\nfrom utils.utils import save_checkpoint_epoch, load_checkpoint_epoch\nfrom utils.objectives import WeightedBCE2d\nfrom measures.jaccard import db_eval_iou_multi\n\n\ndef init_dataloaders(args):\n    loaders = {}\n\n    # init dataloaders for training and validation\n    for split in ['train', 'val']:\n        batch_size = args.batch_size\n        to_tensor = transforms.ToTensor()\n        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],\n                                         std=[0.229, 0.224, 0.225])\n        image_transforms = transforms.Compose([to_tensor, normalize])\n        target_transforms = transforms.Compose([to_tensor])\n\n        dataset = get_dataset_davis_youtube_ehem(\n            args, split=split, image_transforms=image_transforms,\n            target_transforms=target_transforms,\n            augment=args.augment and split == 'train',\n            inputRes=(473, 473))\n\n        shuffle = True if split == 'train' else False\n        loaders[split] = data.DataLoader(dataset,\n                                         batch_size=batch_size,\n                                         shuffle=shuffle,\n                                         num_workers=args.num_workers,\n                                         drop_last=True)\n\n    return loaders\n\n\ndef trainIters(args):\n    print(args)\n\n    model_dir = os.path.join('ckpt/', args.model_name)\n    make_dir(model_dir)\n\n    epoch_resume = 0\n    if args.resume:\n        encoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, load_args = \\\n            load_checkpoint_epoch(args.model_name, args.epoch_resume,\n                                  args.use_gpu)\n\n        epoch_resume = args.epoch_resume\n\n        encoder = Encoder()\n        decoder = Decoder()\n\n        encoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict)\n        encoder.load_state_dict(encoder_dict)\n        decoder.load_state_dict(decoder_dict)\n    else:\n        encoder = Encoder()\n        decoder = Decoder()\n\n    criterion = WeightedBCE2d()\n\n    if args.use_gpu:\n        encoder.cuda()\n        decoder.cuda()\n        criterion.cuda()\n\n    encoder_params = list(encoder.parameters())\n    decoder_params = list(decoder.parameters())\n    dec_opt = get_optimizer(args.optim, args.lr, decoder_params,\n                            args.weight_decay)\n    enc_opt = get_optimizer(args.optim_cnn, args.lr_cnn, encoder_params,\n                            args.weight_decay_cnn)\n\n    loaders = init_dataloaders(args)\n\n    best_iou = 0\n\n    start = time.time()\n    for e in range(epoch_resume, args.max_epoch):\n        print(\"Epoch\", e)\n        epoch_losses = {'train': {'total': [], 'iou': [],\n                                  'mask_loss': [], 'bdry_loss': []},\n                        'val': {'total': [], 'iou': [],\n                                'mask_loss': [], 'bdry_loss': []}}\n\n        for split in ['train', 'val']:\n            if split == 'train':\n                encoder.train(True)\n                decoder.train(True)\n            else:\n                encoder.train(False)\n                decoder.train(False)\n\n            for batch_idx, (image, flow, mask, bdry, negative_pixels) in\\\n                    enumerate(loaders[split]):\n                image, flow, mask, bdry, negative_pixels = \\\n                    image.cuda(), flow.cuda(), mask.cuda(), bdry.cuda(),\\\n                    negative_pixels.cuda()\n\n                if split == 'train':\n                    r5, r4, r3, r2 = encoder(image, flow)\n                    mask_pred, p1, p2, p3, p4, p5 = decoder(r5, r4, r3, r2)\n\n                    mask_loss = criterion(mask_pred, mask, negative_pixels)\n                    bdry_loss = criterion(p1, bdry, negative_pixels) + \\\n                                criterion(p2, bdry, negative_pixels) + \\\n                                criterion(p3, bdry, negative_pixels) + \\\n                                criterion(p4, bdry, negative_pixels) + \\\n                                criterion(p5, bdry, negative_pixels)\n                    loss = mask_loss + 0.2 * bdry_loss\n\n                    iou = db_eval_iou_multi(mask.cpu().detach().numpy(),\n                                            mask_pred.cpu().detach().numpy())\n\n                    dec_opt.zero_grad()\n                    enc_opt.zero_grad()\n                    loss.backward()\n                    enc_opt.step()\n                    dec_opt.step()\n                else:\n                    with torch.no_grad():\n                        r5, r4, r3, r2 = encoder(image, flow)\n                        mask_pred, p1, p2, p3, p4, p5 = decoder(r5, r4, r3, r2)\n\n                        mask_loss = criterion(mask_pred, mask, negative_pixels)\n                        bdry_loss = criterion(p1, bdry, negative_pixels) + \\\n                                    criterion(p2, bdry, negative_pixels) + \\\n                                    criterion(p3, bdry, negative_pixels) + \\\n                                    criterion(p4, bdry, negative_pixels) + \\\n                                    criterion(p5, bdry, negative_pixels)\n                        loss = mask_loss + 0.2 * bdry_loss\n\n                    iou = db_eval_iou_multi(mask.cpu().detach().numpy(),\n                                            mask_pred.cpu().detach().numpy())\n\n                epoch_losses[split]['total'].append(loss.data.item())\n                epoch_losses[split]['mask_loss'].append(mask_loss.data.item())\n                epoch_losses[split]['bdry_loss'].append(bdry_loss.data.item())\n                epoch_losses[split]['iou'].append(iou)\n\n                if (batch_idx + 1) % args.print_every == 0:\n                    mt = np.mean(epoch_losses[split]['total'])\n                    mmask = np.mean(epoch_losses[split]['mask_loss'])\n                    mbdry = np.mean(epoch_losses[split]['bdry_loss'])\n                    miou = np.mean(epoch_losses[split]['iou'])\n\n                    te = time.time() - start\n                    print('Epoch: [{}/{}][{}/{}]\\tTime {:.3f}s\\tLoss: {:.4f}'\n                          '\\tMask Loss: {:.4f}\\tBdry Loss: {:.4f}'\n                          '\\tIOU: {:.4f}'.format(e, args.max_epoch, batch_idx,\n                                                 len(loaders[split]), te, mt,\n                                                 mmask, mbdry, miou))\n\n                    start = time.time()\n\n        miou = np.mean(epoch_losses['val']['iou'])\n        if miou > best_iou:\n            best_iou = miou\n            save_checkpoint_epoch(args, encoder, decoder,\n                                  enc_opt, dec_opt, e, False)\n\n\nif __name__ == '__main__':\n    parser = get_parser()\n    args = parser.parse_args()\n    torch.manual_seed(args.seed)\n    random.seed(args.seed)\n\n    args.model_name = 'MATNet'\n    args.batch_size = 2\n    args.max_epoch = 25\n    args.year = '2016'\n\n    gpu_id = args.gpu_id\n    print('gpu_id: ', gpu_id)\n    print('use_gpu: ', args.use_gpu)\n    if args.use_gpu:\n        torch.cuda.set_device(device=gpu_id)\n        torch.cuda.manual_seed(args.seed)\n    trainIters(args)\n"
  },
  {
    "path": "utils/__init__.py",
    "content": ""
  },
  {
    "path": "utils/objectives.py",
    "content": "import torch\r\nimport torch.nn as nn\r\nimport torch.nn.functional as F\r\n\r\n\r\nclass WeightedBCE2d(nn.Module):\r\n    def __init__(self):\r\n        super(WeightedBCE2d, self).__init__()\r\n\r\n    def forward(self, input, target, negative_pixels):\r\n        log_p = input.transpose(1, 2).transpose(2, 3).contiguous().view(1, -1)\r\n        target_t = target.transpose(1, 2).transpose(2, 3).contiguous().view(1, -1)\r\n        negative_pixels_t = negative_pixels.transpose(1, 2).transpose(2, 3).contiguous().view(1, -1)\r\n\r\n        pos_index = (target_t > 0)\r\n        hard_negative_index = (negative_pixels_t > 0)\r\n        easy_negative_index = (negative_pixels_t == 0)\r\n\r\n        pos_index = pos_index.data.cpu().numpy().astype(bool)\r\n        easy_negative_index = easy_negative_index.data.cpu().numpy().astype(bool)\r\n        hard_negative_index = hard_negative_index.data.cpu().numpy().astype(bool)\r\n        hard_negative_index_ = hard_negative_index.nonzero()\r\n\r\n        weight = torch.Tensor(log_p.size()).fill_(0)\r\n        weight = weight.numpy()\r\n\r\n        weight[pos_index] = 1.0\r\n        weight[easy_negative_index] = 1.0\r\n        weight[hard_negative_index] = 1.0 + negative_pixels_t[hard_negative_index_].cpu().numpy()\r\n\r\n        weight = torch.from_numpy(weight)\r\n        weight = weight.cuda()\r\n        loss = F.binary_cross_entropy(log_p, target_t, weight, size_average=True)\r\n\r\n        return loss\r\n"
  },
  {
    "path": "utils/utils.py",
    "content": "import torch\r\nimport os\r\nimport pickle\r\nfrom collections import OrderedDict\r\n\r\n\r\ndef make_dir(dir):\r\n    if not os.path.exists(dir):\r\n        os.makedirs(dir)\r\n\r\n\r\ndef check_parallel(encoder_dict, decoder_dict):\r\n    trained_parallel = False\r\n    for k, v in encoder_dict.items():\r\n        if k[:7] == \"module.\":\r\n            trained_parallel = True\r\n        break\r\n    if trained_parallel:\r\n        # create new OrderedDict that does not contain \"module.\"\r\n        new_encoder_state_dict = OrderedDict()\r\n        new_decoder_state_dict = OrderedDict()\r\n        for k, v in encoder_dict.items():\r\n            name = k[7:]  # remove \"module.\"\r\n            new_encoder_state_dict[name] = v\r\n        for k, v in decoder_dict.items():\r\n            name = k[7:]  # remove \"module.\"\r\n            new_decoder_state_dict[name] = v\r\n        encoder_dict = new_encoder_state_dict\r\n        decoder_dict = new_decoder_state_dict\r\n\r\n    return encoder_dict, decoder_dict\r\n\r\n\r\ndef get_base_params(args, model):\r\n    b = []\r\n    b.append(model.conv1)\r\n    b.append(model.bn1)\r\n    b.append(model.res2)\r\n    b.append(model.res3)\r\n    b.append(model.res4)\r\n    b.append(model.res5)\r\n\r\n    for i in range(len(b)):\r\n        for j in b[i].modules():\r\n            jj = 0\r\n            for k in j.parameters():\r\n                jj+=1\r\n                if k.requires_grad:\r\n                    yield k\r\n\r\n\r\ndef get_skip_params(model):\r\n    b = []\r\n    b.append(model.sk2.parameters())\r\n    b.append(model.sk3.parameters())\r\n    b.append(model.sk4.parameters())\r\n    b.append(model.sk5.parameters())\r\n    b.append(model.bn2.parameters())\r\n    b.append(model.bn3.parameters())\r\n    b.append(model.bn4.parameters())\r\n    b.append(model.bn5.parameters())\r\n\r\n    for j in range(len(b)):\r\n        for i in b[j]:\r\n            yield i\r\n\r\n\r\ndef merge_params(params):\r\n    for j in range(len(params)):\r\n        for i in params[j]:\r\n            yield i\r\n\r\n\r\ndef get_optimizer(optim_name, lr, parameters, weight_decay=0, momentum=0.9):\r\n    if optim_name == 'sgd':\r\n        opt = torch.optim.SGD(filter(lambda p: p.requires_grad, parameters),\r\n                              lr=lr, weight_decay=weight_decay,\r\n                              momentum=momentum)\r\n    elif optim_name == 'adam':\r\n        opt = torch.optim.Adam(filter(lambda p: p.requires_grad, parameters),\r\n                               lr=lr, weight_decay=weight_decay)\r\n    return opt\r\n\r\n\r\ndef save_checkpoint_epoch(args, encoder, decoder, enc_opt, dec_opt, epoch, best=False):\r\n    torch.save(encoder.state_dict(), os.path.join('ckpt', args.model_name, 'encoder_{}.pt'.format(epoch)))\r\n    torch.save(decoder.state_dict(), os.path.join('ckpt', args.model_name, 'decoder_{}.pt'.format(epoch)))\r\n    torch.save(enc_opt.state_dict(), os.path.join('ckpt', args.model_name, 'enc_opt_{}.pt'.format(epoch)))\r\n    torch.save(dec_opt.state_dict(), os.path.join('ckpt', args.model_name, 'dec_opt_{}.pt'.format(epoch)))\r\n\r\n    if best:\r\n        torch.save(encoder.state_dict(), os.path.join('ckpt', args.model_name, 'encoder.pt'))\r\n        torch.save(decoder.state_dict(), os.path.join('ckpt', args.model_name, 'decoder.pt'))\r\n        torch.save(enc_opt.state_dict(), os.path.join('ckpt', args.model_name, 'enc_opt.pt'))\r\n        torch.save(dec_opt.state_dict(), os.path.join('ckpt', args.model_name, 'dec_opt.pt'))\r\n\r\n    # save parameters for future use\r\n    pickle.dump(args, open(os.path.join('ckpt', args.model_name, 'args.pkl'), 'wb'))\r\n\r\n\r\ndef load_checkpoint_epoch(model_name, epoch, use_gpu=True, load_opt=True):\r\n    if use_gpu:\r\n        encoder_dict = torch.load(os.path.join('ckpt', model_name, 'encoder_{}.pt'.format(epoch)))\r\n        decoder_dict = torch.load(os.path.join('ckpt', model_name, 'decoder_{}.pt'.format(epoch)))\r\n        if load_opt:\r\n            enc_opt_dict = torch.load(os.path.join('ckpt', model_name, 'enc_opt_{}.pt'.format(epoch)))\r\n            dec_opt_dict = torch.load(os.path.join('ckpt', model_name, 'dec_opt_{}.pt'.format(epoch)))\r\n    else:\r\n        encoder_dict = torch.load(os.path.join('ckpt', model_name, 'encoder_{}.pt'.format(epoch)), map_location=lambda storage, location: storage)\r\n        decoder_dict = torch.load(os.path.join('ckpt', model_name, 'decoder_{}.pt'.format(epoch)), map_location=lambda storage, location: storage)\r\n        enc_opt_dict = torch.load(os.path.join('ckpt', model_name, 'enc_opt_{}.pt'.format(epoch)), map_location=lambda storage, location: storage)\r\n        dec_opt_dict = torch.load(os.path.join('ckpt', model_name, 'dec_opt_{}.pt'.format(epoch)), map_location=lambda storage, location: storage)\r\n    # save parameters for future use\r\n    if load_opt:\r\n        args = pickle.load(open(os.path.join('ckpt', model_name, 'args.pkl'), 'rb'))\r\n\r\n        return encoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, args\r\n    else:\r\n        return encoder_dict, decoder_dict, None, None, None\r\n"
  }
]