Repository: tfzhou/MATNet
Branch: master
Commit: c8b95e527c48
Files: 52
Total size: 167.4 KB
Directory structure:
gitextract_05390pb8/
├── .gitignore
├── .idea/
│ ├── aaai20-zero-ShotVOS-github.iml
│ ├── inspectionProfiles/
│ │ └── Project_Default.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── vcs.xml
│ └── workspace.xml
├── 3rdparty/
│ ├── run_davis.py
│ ├── run_davis2017.m
│ ├── run_davis_flow.py
│ ├── run_youtube.m
│ ├── run_youtube.py
│ ├── run_youtube_flow.py
│ └── seg2edge.m
├── README.md
├── apply_densecrf_davis.py
├── args.py
├── data/
│ ├── db_info.yaml
│ ├── palette.txt
│ ├── run_davis2017.m
│ ├── run_youtube.m
│ └── seg2edge.m
├── dataloader/
│ ├── base.py
│ ├── base_youtube.py
│ ├── custom_transforms.py
│ ├── dataset_utils.py
│ ├── davis2017.py
│ ├── davis2017_youtubevos_ehem.py
│ ├── hed_bsds.py
│ └── youtubeVOS.py
├── dataset_lmdb_generator.py
├── measures/
│ ├── __init__.py
│ ├── f_boundary.py
│ ├── jaccard.py
│ ├── statistics.py
│ └── t_stability.py
├── misc/
│ ├── __init__.py
│ ├── config.py
│ ├── config_youtubeVOS.py
│ ├── io_aux.py
│ ├── logger.py
│ ├── parallel.py
│ ├── timer.py
│ └── visual.py
├── modules/
│ ├── MATNet.py
│ └── cbam.py
├── requirements.txt
├── test_MATNet.py
├── train_MATNet.py
└── utils/
├── __init__.py
├── objectives.py
└── utils.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
ckpt/
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
================================================
FILE: .idea/aaai20-zero-ShotVOS-github.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6 (pytorch-1.0.0)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>
================================================
FILE: .idea/inspectionProfiles/Project_Default.xml
================================================
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="3">
<item index="0" class="java.lang.String" itemvalue="PyYAML" />
<item index="1" class="java.lang.String" itemvalue="skimage" />
<item index="2" class="java.lang.String" itemvalue="Pillow" />
</list>
</value>
</option>
</inspection_tool>
</profile>
</component>
================================================
FILE: .idea/misc.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (pytorch-1.0.0)" project-jdk-type="Python SDK" />
</project>
================================================
FILE: .idea/modules.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/aaai20-zero-ShotVOS-github.iml" filepath="$PROJECT_DIR$/.idea/aaai20-zero-ShotVOS-github.iml" />
</modules>
</component>
</project>
================================================
FILE: .idea/vcs.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="$PROJECT_DIR$/3rdparty/pytorch-hed" vcs="Git" />
<mapping directory="$PROJECT_DIR$/3rdparty/pytorch-pwc" vcs="Git" />
</component>
</project>
================================================
FILE: .idea/workspace.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="9b6416b8-b073-4819-80b8-0da9065c5674" name="Default Changelist" comment="">
<change afterPath="$PROJECT_DIR$/.idea/aaai20-zero-ShotVOS-github.iml" afterDir="false" />
<change afterPath="$PROJECT_DIR$/.idea/inspectionProfiles/Project_Default.xml" afterDir="false" />
<change afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
<change afterPath="$PROJECT_DIR$/.idea/modules.xml" afterDir="false" />
<change afterPath="$PROJECT_DIR$/.idea/vcs.xml" afterDir="false" />
<change afterPath="$PROJECT_DIR$/dataloader/base.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/dataloader/base_youtube.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/dataloader/custom_transforms.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/dataloader/dataset_utils.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/dataloader/davis2017.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/dataloader/davis2017_youtubevos_ehem.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/dataloader/hed_bsds.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/dataloader/youtubeVOS.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/3rdparty/pytorch-hed/run.py" beforeDir="false" afterPath="$PROJECT_DIR$/3rdparty/pytorch-hed/run.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/3rdparty/pytorch-pwc/run.py" beforeDir="false" afterPath="$PROJECT_DIR$/3rdparty/pytorch-pwc/run.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/README.md">
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
<state split_layout="SPLIT">
<first_editor relative-caret-position="126">
<caret line="7" column="44" selection-start-line="7" selection-start-column="44" selection-end-line="7" selection-end-column="44" />
</first_editor>
<second_editor />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/test_MATNet.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="486">
<caret line="99" column="32" selection-start-line="99" selection-start-column="32" selection-end-line="99" selection-end-column="32" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/dataset_lmdb_generator.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
</file>
</leaf>
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/train_MANet_Residual_Cascade_SA_Interleave.py" />
<option value="$PROJECT_DIR$/apply_densecrf_davis.py" />
<option value="$PROJECT_DIR$/utils/objectives.py" />
<option value="$PROJECT_DIR$/utils/utils.py" />
<option value="$PROJECT_DIR$/misc/visual.py" />
<option value="$PROJECT_DIR$/dataloader/davis2017_youtubevos_ehem.py" />
<option value="$PROJECT_DIR$/dataloader/dataset_utils.py" />
<option value="$PROJECT_DIR$/args.py" />
<option value="$PROJECT_DIR$/dataloader/base.py" />
<option value="$PROJECT_DIR$/misc/config_youtubeVOS.py" />
<option value="$PROJECT_DIR$/misc/config.py" />
<option value="$PROJECT_DIR$/modules/MATNet.py" />
<option value="$PROJECT_DIR$/measures/jaccard.py" />
<option value="$PROJECT_DIR$/train_MATNet.py" />
<option value="$PROJECT_DIR$/test_MATNet.py" />
<option value="$PROJECT_DIR$/README.md" />
</list>
</option>
</component>
<component name="ProjectFrameBounds" extendedState="6">
<option name="x" value="65" />
<option name="y" value="325" />
<option name="width" value="1853" />
<option name="height" value="1055" />
</component>
<component name="ProjectLevelVcsManager" settingsEditedManually="true" />
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="aaai20-zero-ShotVOS-github" type="b2602c69:ProjectViewProjectNode" />
<item name="aaai20-zero-ShotVOS-github" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="aaai20-zero-ShotVOS-github" type="b2602c69:ProjectViewProjectNode" />
<item name="aaai20-zero-ShotVOS-github" type="462c0819:PsiDirectoryNode" />
<item name="utils" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
<property name="WebServerToolWindowFactoryState" value="true" />
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
<property name="nodejs_npm_path_reset_for_default_project" value="true" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component>
<component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="$PROJECT_DIR$" />
</key>
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="RunManager" selected="Python.apply_densecrf_davis">
<configuration name="apply_densecrf_davis" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="aaai20-zero-ShotVOS-github" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/apply_densecrf_davis.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="test_MATNet" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="aaai20-zero-ShotVOS-github" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/test_MATNet.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<recent_temporary>
<list>
<item itemvalue="Python.apply_densecrf_davis" />
<item itemvalue="Python.test_MATNet" />
</list>
</recent_temporary>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="9b6416b8-b073-4819-80b8-0da9065c5674" name="Default Changelist" comment="" />
<created>1574574216202</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1574574216202</updated>
<workItem from="1574574222318" duration="8114000" />
<workItem from="1574586098553" duration="647000" />
<workItem from="1574600105252" duration="1530000" />
<workItem from="1574603699909" duration="23031000" />
</task>
<servers />
</component>
<component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="33322000" />
</component>
<component name="ToolWindowManager">
<frame x="65" y="-4" width="1855" height="1084" extended-state="6" />
<editor active="true" />
<layout>
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.18815717" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info id="Favorites" order="2" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" weight="0.32959184" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
<window_info anchor="bottom" id="Version Control" order="8" />
<window_info anchor="bottom" id="Database Changes" order="9" />
<window_info anchor="bottom" id="Event Log" order="10" side_tool="true" visible="true" weight="0.22040816" />
<window_info anchor="bottom" id="Terminal" order="11" />
<window_info anchor="bottom" id="Python Console" order="12" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
<window_info anchor="right" id="Remote Host" order="3" />
<window_info anchor="right" id="SciView" order="4" />
<window_info anchor="right" id="Database" order="5" />
</layout>
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="1" />
</component>
<component name="com.intellij.coverage.CoverageDataManagerImpl">
<SUITE FILE_PATH="coverage/aaai20_zero_ShotVOS_github$test_MATNet.coverage" NAME="test_MATNet Coverage Results" MODIFIED="1574582203491" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
<SUITE FILE_PATH="coverage/aaai20_zero_ShotVOS_github$apply_densecrf_davis.coverage" NAME="apply_densecrf_davis Coverage Results" MODIFIED="1574582766918" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/modules/MANet_Residual_Cascade_SA.py" />
<entry file="file://$PROJECT_DIR$/modules/MANet.py" />
<entry file="file://$PROJECT_DIR$/modules/MANet_Residual_Cascade_SA_Interleave_1layers.py" />
<entry file="file://$PROJECT_DIR$/modules/seed.py" />
<entry file="file://$PROJECT_DIR$/utils/hungarian.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="288">
<caret line="16" column="23" lean-forward="true" selection-start-line="16" selection-start-column="23" selection-end-line="16" selection-end-column="23" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/misc/parallel.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/measures/statistics.py">
<provider selected="true" editor-type-id="text-editor">
<state>
<folding>
<element signature="e#0#18#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/dataloader/davis2017_ehem.py">
<provider selected="true" editor-type-id="text-editor">
<state>
<folding>
<element signature="e#0#31#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/dataloader/dataset_utils.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="162">
<caret line="9" column="4" selection-start-line="9" selection-start-column="4" selection-end-line="9" selection-end-column="4" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/dataloader/davis2017.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-342">
<caret line="20" column="21" selection-start-line="20" selection-start-column="21" selection-end-line="20" selection-end-column="21" />
<folding>
<element signature="e#0#31#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/dataloader/run_davis2017.m">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/dataloader/run_youtube.m">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/dataloader/hed_bsds.py">
<provider selected="true" editor-type-id="text-editor">
<state>
<folding>
<element signature="e#0#9#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/dataloader/youtubeVOS.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/dataloader/base_youtube.py">
<provider selected="true" editor-type-id="text-editor">
<state>
<folding>
<element signature="e#0#16#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/dataloader/base.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="414">
<caret line="23" column="30" selection-start-line="23" selection-start-column="30" selection-end-line="23" selection-end-column="30" />
<folding>
<element signature="e#451#467#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/misc/io_aux.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="144">
<caret line="11" column="41" selection-start-line="11" selection-start-column="41" selection-end-line="11" selection-end-column="41" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/misc/config.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="280">
<caret line="53" column="30" selection-start-line="53" selection-start-column="30" selection-end-line="53" selection-end-column="30" />
<folding>
<element signature="e#22#33#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/misc/config_youtubeVOS.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="820">
<caret line="65" column="60" selection-start-line="65" selection-start-column="60" selection-end-line="65" selection-end-column="60" />
<folding>
<element signature="e#22#33#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/misc/logger.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/modules/MATNet.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="756">
<caret line="261" column="47" lean-forward="true" selection-start-line="261" selection-start-column="47" selection-end-line="261" selection-end-column="47" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/utils/__init__.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/modules/cbam.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="198">
<caret line="11" column="21" selection-start-line="11" selection-start-column="21" selection-end-line="11" selection-end-column="21" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/utils/objectives.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="162">
<caret line="9" column="25" selection-start-line="9" selection-start-column="25" selection-end-line="9" selection-end-column="25" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/misc/visual.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="198">
<caret line="12" column="41" selection-start-line="12" selection-start-column="41" selection-end-line="12" selection-end-column="41" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/misc/timer.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/measures/t_stability.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/measures/f_boundary.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/utils/utils.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="72">
<caret line="4" lean-forward="true" selection-start-line="4" selection-end-line="4" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda2/envs/pytorch-1.0.0/lib/python3.6/site-packages/torch/utils/data/dataloader.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="2044">
<caret line="767" column="8" selection-start-line="767" selection-start-column="8" selection-end-line="767" selection-end-column="8" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/train_MATNet.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="591">
<caret line="145" column="20" selection-start-line="145" selection-start-column="20" selection-end-line="145" selection-end-column="20" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/dataloader/davis2017_youtubevos_ehem.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-116">
<caret line="193" column="56" selection-start-line="193" selection-start-column="56" selection-end-line="193" selection-end-column="56" />
<folding>
<element signature="e#0#31#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/measures/jaccard.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="564">
<caret line="42" selection-start-line="42" selection-end-line="42" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/apply_densecrf_davis.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="144">
<caret line="8" column="29" lean-forward="true" selection-start-line="8" selection-start-column="29" selection-end-line="8" selection-end-column="29" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/args.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="144">
<caret line="8" column="54" selection-start-line="8" selection-start-column="54" selection-end-line="8" selection-end-column="54" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/test_MATNet.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="486">
<caret line="99" column="32" selection-start-line="99" selection-start-column="32" selection-end-line="99" selection-end-column="32" />
<folding>
<element signature="e#0#12#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/dataset_lmdb_generator.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/README.md">
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
<state split_layout="SPLIT">
<first_editor relative-caret-position="126">
<caret line="7" column="44" selection-start-line="7" selection-start-column="44" selection-end-line="7" selection-end-column="44" />
</first_editor>
<second_editor />
</state>
</provider>
</entry>
</component>
</project>
================================================
FILE: 3rdparty/run_davis.py
================================================
import os
import glob
import torch
import numpy
import PIL
from run import estimate
def main():
davis_folder = '/media/iiai/data/VOS/DAVIS2017/JPEGImages/480p'
save_dir = '/media/iiai/data/VOS/DAVIS2017/davis2017-hed'
videos = os.listdir(davis_folder)
print(videos)
for idx, video in enumerate(videos):
print('process {}[{}/{}]'.format(video, idx, len(videos)))
save_dir_video = os.path.join(save_dir, video)
if not os.path.exists(save_dir_video):
os.makedirs(save_dir_video)
imagefiles = sorted(glob.glob(os.path.join(davis_folder, video, '*.jpg')))
for imagefile in imagefiles:
tensorInput = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))
tensorOutput = estimate(tensorInput)
save_name = os.path.basename(imagefile)
save_file = os.path.join(save_dir_video, save_name)
PIL.Image.fromarray(
(tensorOutput.clamp(0.0, 1.0).numpy().transpose(1, 2, 0)[:, :, 0] * 255.0).astype(numpy.uint8)).save(
save_file)
main()
================================================
FILE: 3rdparty/run_davis2017.m
================================================
clc
save_dir = '/media/iiai/data/VOS/DAVIS2017/Annotations_edge/480p';
src_dir = '/media/iiai/data/VOS/DAVIS2017/Annotations/480p';
seqs = dir(src_dir);
seqs = {seqs.name};
seqs = seqs(3:end);
for i = 1 : length(seqs)
seq_path = [src_dir, '/', seqs{i}];
images = dir([seq_path, '/*.png']);
images = {images.name};
for j = 1 : length(images)
imagefile = fullfile(seq_path, images{j});
im = imread(imagefile);
res = seg2edge(im, 2, [], 'regular');
save_path = fullfile(save_dir, seqs{i});
if ~exist(save_path, 'dir')
mkdir(save_path)
end
imwrite(res, fullfile(save_path, images{j}))
end
end
================================================
FILE: 3rdparty/run_davis_flow.py
================================================
import torch
import glob
import getopt
import math
import numpy
import os
import PIL
import PIL.Image
import sys
from run import estimate
import flow_vis, cv2
def main():
davis_folder = '/home/tianfei/dataset/DAVIS2017/JPEGImages/480p'
save_dir = './davis2017'
videos = os.listdir(davis_folder)
for idx, video in enumerate(videos):
print('process {}[{}/{}]'.format(video, idx, len(videos)))
save_dir_video = os.path.join(save_dir, video)
if not os.path.exists(save_dir_video):
os.makedirs(save_dir_video)
imagefiles = sorted(glob.glob(os.path.join(davis_folder, video, '*.jpg')))
for i in range(len(imagefiles)-1):
f1 = imagefiles[i]
f2 = imagefiles[i+1]
save_name = os.path.basename(f1)[:-4] + '_' + os.path.basename(f2)[:-4] + '.png'
save_file = os.path.join(save_dir_video, save_name)
run(f1, f2, save_file)
def run(imagefile1, imagefile2, save_file):
tensorFirst = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile1))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))
tensorSecond = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile2))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))
tensorOutput = estimate(tensorFirst, tensorSecond)
flow_color = flow_vis.flow_to_color(tensorOutput.numpy().transpose(1,2,0), convert_to_bgr=True)
cv2.imwrite(save_file, flow_color)
#objectOutput = open(save_file, 'wb')
#numpy.array([ 80, 73, 69, 72 ], numpy.uint8).tofile(objectOutput)
#numpy.array([ tensorOutput.size(2), tensorOutput.size(1) ], numpy.int32).tofile(objectOutput)
#numpy.array(tensorOutput.numpy().transpose(1, 2, 0), numpy.float32).tofile(objectOutput)
#objectOutput.close()
if __name__ == '__main__':
main()
================================================
FILE: 3rdparty/run_youtube.m
================================================
clc
save_dir = '/media/tfzhou/Yi Passport/YouTubeVOS_2018/train/Annotations_edge';
src_dir = '/media/tfzhou/Yi Passport/YouTubeVOS_2018/train/Annotations';
seqs = dir(src_dir);
seqs = {seqs.name};
seqs = seqs(3:end);
for i = 1 : length(seqs)
disp(seqs{i})
seq_path = [src_dir, '/', seqs{i}];
images = dir([seq_path, '/*.png']);
images = {images.name};
for j = 1 : length(images)
imagefile = fullfile(seq_path, images{j});
im = imread(imagefile);
res = seg2edge(im, 2, [], 'regular');
save_path = fullfile(save_dir, seqs{i});
if ~exist(save_path, 'dir')
mkdir(save_path)
end
imwrite(res, fullfile(save_path, images{j}))
end
end
================================================
FILE: 3rdparty/run_youtube.py
================================================
import os
import glob
import torch
import numpy
import PIL
from run import estimate
def main():
davis_folder = '/media/tfzhou/Yi Passport/YouTubeVOS_2018/train/JPEGImages'
save_dir = '/media/tfzhou/Yi Passport/YouTubeVOS_2018-hed'
videos = os.listdir(davis_folder)
print(videos)
for idx, video in enumerate(videos):
print('process {}[{}/{}]'.format(video, idx, len(videos)))
save_dir_video = os.path.join(save_dir, video)
if not os.path.exists(save_dir_video):
os.makedirs(save_dir_video)
imagefiles = sorted(glob.glob(os.path.join(davis_folder, video, '*.jpg')))
for imagefile in imagefiles:
tensorInput = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))
tensorOutput = estimate(tensorInput)
save_name = os.path.basename(imagefile)
save_file = os.path.join(save_dir_video, save_name)
PIL.Image.fromarray(
(tensorOutput.clamp(0.0, 1.0).numpy().transpose(1, 2, 0)[:, :, 0] * 255.0).astype(numpy.uint8)).save(
save_file)
main()
================================================
FILE: 3rdparty/run_youtube_flow.py
================================================
import torch
import glob
import getopt
import math
import numpy
import os
import PIL
import PIL.Image
import sys
import flow_vis
from run import estimate
import cv2
def main():
davis_folder = '/home/tianfei/dataset/YouTubeVOS/train/JPEGImages/'
save_dir = './youtubevos'
videos = os.listdir(davis_folder)
for idx, video in enumerate(videos):
print('process {}[{}/{}]'.format(video, idx, len(videos)))
save_dir_video = os.path.join(save_dir, video)
if not os.path.exists(save_dir_video):
os.makedirs(save_dir_video)
imagefiles = sorted(glob.glob(os.path.join(davis_folder, video, '*.jpg')))
for i in range(len(imagefiles)-1):
f1 = imagefiles[i]
f2 = imagefiles[i+1]
save_name = os.path.basename(f1)[:-4] + '_' + os.path.basename(f2)[:-4] + '.png'
save_file = os.path.join(save_dir_video, save_name)
run(f1, f2, save_file)
def run(imagefile1, imagefile2, save_file):
tensorFirst = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile1))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))
tensorSecond = torch.FloatTensor(numpy.array(PIL.Image.open(imagefile2))[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) * (1.0 / 255.0))
tensorOutput = estimate(tensorFirst, tensorSecond)
#objectOutput = open(save_file, 'wb')
flow_color = flow_vis.flow_to_color(tensorOutput.numpy().transpose(1,2,0), convert_to_bgr=True)
cv2.imwrite(save_file, flow_color)
#numpy.array([ 80, 73, 69, 72 ], numpy.uint8).tofile(objectOutput)
#numpy.array([ tensorOutput.size(2), tensorOutput.size(1) ], numpy.int32).tofile(objectOutput)
#numpy.array(tensorOutput.numpy().transpose(1, 2, 0), numpy.float32).tofile(objectOutput)
#objectOutput.close()
if __name__ == '__main__':
main()
================================================
FILE: 3rdparty/seg2edge.m
================================================
% This function takes an input segment and produces binary boundaries.
% Multi-channel input segments are supported by the function.
function [idxEdge] = seg2edge(seg, radius, labelIgnore, edge_type)
% Get dimensions
[height, width, chn] = size(seg);
if(~isempty(labelIgnore))
if(chn~=size(labelIgnore, 2))
error('Channel dimension not matching ignored label dimension!')
end
end
% Set the considered neighborhood.
radius_search = max(ceil(radius), 1);
[X, Y] = meshgrid(1:width, 1:height);
[x, y] = meshgrid(-radius_search:radius_search, -radius_search:radius_search);
% Columnize everything
X = X(:); Y = Y(:);
x = x(:); y = y(:);
if(chn == 1)
seg = seg(:);
else
seg = reshape(seg, [height*width chn]);
end
% Build circular neighborhood
idxNeigh = sqrt(x.^2 + y.^2) <= radius;
x = x(idxNeigh); y = y(idxNeigh);
numPxlImg = length(X);
numPxlNeigh = length(x);
% Compute Gaussian weight
idxEdge = false(numPxlImg, 1);
for i = 1:numPxlNeigh
XNeigh = X+x(i);
YNeigh = Y+y(i);
idxValid = find( XNeigh >= 1 & XNeigh <= width & YNeigh >=1 & YNeigh <= height );
XCenter = X(idxValid);
YCenter = Y(idxValid);
XNeigh = XNeigh(idxValid);
YNeigh = YNeigh(idxValid);
LCenter = seg(sub2ind([height width], YCenter, XCenter), :);
LNeigh = seg(sub2ind([height width], YNeigh, XNeigh), :);
if(strcmp(edge_type, 'regular'))
idxDiff = find(any(LCenter~=LNeigh, 2));
elseif(strcmp(edge_type, 'inner'))
idxDiff = find(any(LCenter~=LNeigh, 2) & any(LCenter~=0, 2) & all(LNeigh==0, 2) );
elseif(strcmp(edge_type, 'outer'))
idxDiff = find(any(LCenter~=LNeigh, 2) & all(LCenter==0, 2) & any(LNeigh~=0, 2) );
else
error('Wrong edge type input!');
end
LCenterEdge = LCenter(idxDiff, :);
LNeighEdge = LNeigh(idxDiff, :);
idxIgnore2 = false(length(idxDiff), 1);
for j = 1:size(labelIgnore, 1)
idxIgnore2 = idxIgnore2 | ( all(bsxfun(@eq, LCenterEdge, labelIgnore(j, :)), 2) | all(bsxfun(@eq, LNeighEdge, labelIgnore(j, :)), 2) );
end
idxDiffGT = idxDiff(~idxIgnore2);
idxEdge(idxValid(idxDiffGT)) = true;
end
idxEdge = reshape(idxEdge, [height, width]);
================================================
FILE: README.md
================================================
## Motion-Attentive Transition for Zero-Shot Video Object Segmentation
[](https://paperswithcode.com/sota/unsupervised-video-object-segmentation-on?p=motion-attentive-transition-for-zero-shot)
[](https://paperswithcode.com/sota/unsupervised-video-object-segmentation-on-4?p=matnet-motion-attentive-transition-network)
> UPDATES:<br>
> - [2021/04/17] Our MATNet achieves state-of-the-art results (__64.2__ in terms of _Mean J_) on the [MoCA](https://drive.google.com/drive/folders/1x-owzr9Voz65NQghrN_H1LEYDaaQP5n1?usp=sharing) dataset in "Self-supervised Video Object Segmentation by Motion Grouping" by Charig Yang, Hala Lamdouar, Erika Lu, Andrew Zisserman, Weidi Xie. Thanks [Charig Yang](https://charigyang.github.io/) for providing the segmentation results [Google Drive](https://drive.google.com/drive/folders/1x-owzr9Voz65NQghrN_H1LEYDaaQP5n1?usp=sharing).
> - [2020/06/15] Update results for DAVIS-17 test-dev set!
> - [2020/03/04] Update results for DAVIS-17 validation set!
> - [2019/11/17] Codes released!
This is a PyTorch implementation of our MATNet for unsupervised video object segmentation.
**Motion-Attentive Transition for Zero-Shot Video Object Segmentation.** [[Arxiv](https://arxiv.org/abs/2003.04253)] [[TIP](https://ieeexplore.ieee.org/document/9165947)]
## Prerequisites
The training and testing experiments are conducted using PyTorch 1.0.1 with a single GeForce RTX 2080Ti GPU with 11GB Memory.
- [PyTorch 1.0.1](https://github.com/pytorch/pytorch)
Other minor Python modules can be installed by running
```bash
pip install -r requirements.txt
```
## Train
### Clone
```git clone --recursive https://github.com/tfzhou/MATNet.git```
### Download Datasets
In the paper, we use the following two public available dataset for training. Here are some steps to prepare the data:
- [DAVIS-17](https://davischallenge.org/davis2017/code.html): we use all the data in the train subset of DAVIS-16.
However, please download DAVIS-17 to fit the code. It will automatically choose the subset of DAVIS-16 for training.
- [YoutubeVOS-2018](https://youtube-vos.org/dataset/): we sample the training data every 10 frames in YoutubeVOS-2018. We use the dataset version with 6fps rather than 30fps.
- Create soft links:
```cd data; ln -s your/davis17/path DAVIS2017; ln -s your/youtubevos/path YouTubeVOS_2018;```
### Prepare Edge Annotations
I have provided some matlab scripts to generate edge annotations from mask. Please run ```data/run_davis2017.m```
and ```data/run_youtube.m```.
### Prepare HED Results
I have provided the pytorch codes to generate HED results for the two datasets (see ```3rdparty/pytorch-hed```).
Please run ```run_davis.py``` and ```run_youtube.py```.
The codes are borrowed from https://github.com/sniklaus/pytorch-hed.
### Prepare Optical Flow
I have provided the pytorch codes to generate optical flow results for the two datasets (see ```3rdparty/pytorch-pwc```).
Please run ```run_davis_flow.py``` and ```run_youtubevos_flow.py```.
The codes are borrowed from https://github.com/sniklaus/pytorch-pwc.
Please follow the [setup](https://github.com/sniklaus/pytorch-pwc#setup) section to install ```cupy```.
`warning: Total size of optical flow results of Youtube-VOS is more than 30GB.`
### Train
Once all data is prepared, please run ```python train_MATNet.py``` for training.
## Test
1. Run ```python test_MATNet.py``` to obtain the saliency results on DAVIS-16 val set.
2. Run ```python apply_densecrf_davis.py``` for binary segmentation results.
## Segmentation Results
1. The segmentation results on DAVIS-16 and Youtube-objects can be downloaded from [Google Drive](https://drive.google.com/file/d/1d23TGBtrr11g8KFAStwewTyxLq2nX4PT/view?usp=sharing).
2. The segmentation results on DAVIS-17 __val__ can be downloaded from [Google Drive](https://drive.google.com/open?id=1GTqjWc7tktw92tBNKln2eFmb9WzdcVrz). We achieved __58.6__ in terms of _Mean J&F_.
3. The segmentation results on DAVIS-17 __test-dev__ can be downloaded from [Google Drive](https://drive.google.com/file/d/1Ood-rr0d4YRFSrGGh6yVpYvOvE_h0tVK/view?usp=sharing). We achieved __59.8__ in terms of _Mean J&F_. The method also achieved the second place in DAVIS-20 unsupervised object segmentation challenge. Please refer to [paper](https://davischallenge.org/challenge2020/papers/DAVIS-Unsupervised-Challenge-2nd-Team.pdf) for more details of our challenge solution.
## Pretrained Models
The pre-trained model can be downloaded from [Google Drive](https://drive.google.com/file/d/1XlenYXgQjoThgRUbffCUEADS6kE4lvV_/view?usp=sharing).
## Citation
If you find MATNet useful for your research, please consider citing the following papers:
```
@inproceedings{zhou2020motion,
title={Motion-Attentive Transition for Zero-Shot Video Object Segmentation},
author={Zhou, Tianfei and Wang, Shunzhou and Zhou, Yi and Yao, Yazhou and Li, Jianwu and Shao, Ling},
booktitle={Proceedings of the 34th AAAI Conference on Artificial Intelligence (AAAI)},
year={2020},
pages={13066--13073}
}
@article{zhou2020matnet,
title={MATNet: Motion-Attentive Transition Network for Zero-Shot Video Object Segmentation},
author={Zhou, Tianfei and Li, Jianwu and Wang, Shunzhou and Tao, Ran and Shen, Jianbing},
journal={IEEE Transactions on Image Processing},
volume={29},
pages={8326-8338},
year={2020}
}
@inproceedings{zhou2021unsupervised,
author = {Zhou, Tianfei and Li, Jianwu and Li, Xueyi and Shao, Ling},
title = {Target-Aware Object Discovery and Association for Unsupervised Video Multi-Object Segmentation},
booktitle = {CVPR},
year = {2021}
}
```
================================================
FILE: apply_densecrf_davis.py
================================================
import pydensecrf.densecrf as dcrf
import numpy as np
import sys
import time
import os
from tqdm import tqdm
from skimage.io import imread, imsave
from pydensecrf.utils import unary_from_labels, create_pairwise_bilateral,\
create_pairwise_gaussian, unary_from_softmax
from os import listdir, makedirs
from os.path import isfile, join
def sigmoid(x):
return 1 / (1 + np.exp(-x))
image_dir = 'data/DAVIS2017/JPEGImages/480p'
davis_result_dir = 'output/davis16'
model_name = 'MATNet_epoch0' # specify the folder name of saliency results
mask_dir = os.path.join(davis_result_dir, model_name)
save_dir = join(davis_result_dir, model_name + '_crf')
for seq in tqdm(listdir(mask_dir)):
seq_dir = join(image_dir, seq)
seq_mask_dir = join(mask_dir, seq)
res_dir = join(save_dir, seq)
if not os.path.exists(res_dir):
os.makedirs(res_dir)
for f in listdir(seq_mask_dir):
frameName = f[:-4]
image = imread(join(seq_dir, f[:-4] + '.jpg'))
mask = imread(join(seq_mask_dir, f))
H, W = mask.shape
min_val = np.min(mask.ravel())
max_val = np.max(mask.ravel())
out = (mask.astype('float') - min_val) / (max_val - min_val)
labels = np.zeros((2, image.shape[0], image.shape[1]))
labels[1, :, :] = out
labels[0, :, :] = 1 - out
tau = 1.05
EPSILON = 1e-8
anno_norm = mask / 255
n_energy = -np.log((1.0 - anno_norm + EPSILON)) / (tau * sigmoid(1 - anno_norm))
p_energy = -np.log(anno_norm + EPSILON) / (tau * sigmoid(anno_norm))
labels[1, :, :] = n_energy
labels[0, :, :] = p_energy
colors = [0, 255]
colorize = np.empty((len(colors), 1), np.uint8)
colorize[:, 0] = colors
n_labels = 2
crf = dcrf.DenseCRF(image.shape[1] * image.shape[0], n_labels)
U = unary_from_softmax(labels)
crf.setUnaryEnergy(U)
feats = create_pairwise_gaussian(sdims=(3, 3), shape=image.shape[:2])
crf.addPairwiseEnergy(feats, compat=3,
kernel=dcrf.DIAG_KERNEL,
normalization=dcrf.NORMALIZE_SYMMETRIC)
feats = create_pairwise_bilateral(sdims=(30, 30), schan=(5, 5, 5),
img=image, chdim=2)
crf.addPairwiseEnergy(feats, compat=5,
kernel=dcrf.DIAG_KERNEL,
normalization=dcrf.NORMALIZE_SYMMETRIC)
Q, tmp1, tmp2 = crf.startInference()
for i in range(5):
temp = crf.klDivergence(Q)
crf.stepInference(Q, tmp1, tmp2)
if abs(crf.klDivergence(Q)-temp) < 500:
break
MAP = np.argmax(Q, axis=0)
MAP = colorize[MAP]
imsave(res_dir + '/' + frameName + '.png', MAP.reshape(mask.shape))
#print("Saving: " + res_dir + '/' + frameName + '.png')
================================================
FILE: args.py
================================================
import argparse
def get_parser():
parser = argparse.ArgumentParser(description='MATNet')
parser.add_argument('-year', dest='year', default='2017')
parser.add_argument('-imsize', dest='imsize', default=480, type=int)
parser.add_argument('-batch_size', dest='batch_size', default=4, type=int)
parser.add_argument('-num_workers', dest='num_workers', default=4,
type=int)
## TRAINING parameters ##
parser.add_argument('--resume', dest='resume', action='store_true',
help=('whether to resume training an existing model'
'(the one with name model_name will be used)'))
parser.set_defaults(resume=False)
parser.add_argument('-seed', dest='seed', default=123, type=int)
parser.add_argument('-gpu_id', dest='gpu_id', default=0, type=int)
parser.add_argument('-lr', dest='lr', default=1e-3, type=float)
parser.add_argument('-lr_cnn', dest='lr_cnn', default=1e-4, type=float)
parser.add_argument('-optim_cnn', dest='optim_cnn', default='sgd',
choices=['adam', 'sgd', 'rmsprop'])
parser.add_argument('-momentum', dest='momentum', default=0.9, type=float)
parser.add_argument('-weight_decay', dest='weight_decay', default=5e-4,
type=float)
parser.add_argument('-weight_decay_cnn', dest='weight_decay_cnn',
default=5e-4, type=float)
parser.add_argument('-optim', dest='optim', default='sgd',
choices=['adam', 'sgd', 'rmsprop'])
parser.add_argument('--crop', dest='crop', action='store_true')
parser.set_defaults(crop=False)
parser.add_argument('--update_encoder', dest='update_encoder',
action='store_true',
help='used in sync with finetune_after.'
' no need to activate.')
parser.set_defaults(update_encoder=True)
parser.add_argument('-max_epoch', dest='max_epoch', default=20, type=int)
# visualization and logging
parser.add_argument('-print_every', dest='print_every', default=10,
type=int)
# loss weights
parser.add_argument('-iou_weight', dest='iou_weight', default=1.0,
type=float)
# augmentation
parser.add_argument('--augment', dest='augment', action='store_true')
parser.set_defaults(augment=True)
parser.add_argument('-rotation', dest='rotation', default=10, type=int)
parser.add_argument('-translation', dest='translation', default=0.1,
type=float)
parser.add_argument('-shear', dest='shear', default=0.1, type=float)
parser.add_argument('-zoom', dest='zoom', default=0.7, type=float)
# GPU
parser.add_argument('--cpu', dest='use_gpu', action='store_false')
parser.set_defaults(use_gpu=True)
parser.add_argument('-ngpus', dest='ngpus', default=1, type=int)
parser.add_argument('-model_name', dest='model_name', default='model')
parser.add_argument('-log_file', dest='log_file', default='train.log')
# dataset parameters
parser.add_argument('--resize', dest='resize', action='store_true')
parser.set_defaults(resize=False)
parser.add_argument('-dataset', dest='dataset', default='davis2017',
choices=['davis2017', 'youtube'])
# testing
parser.add_argument('-eval_split', dest='eval_split', default='test')
parser.add_argument('-mask_th', dest='mask_th', default=0.5, type=float)
parser.add_argument('-max_dets', dest='max_dets', default=100, type=int)
parser.add_argument('-min_size', dest='min_size', default=0.001,
type=float)
parser.add_argument('--display', dest='display', action='store_true')
parser.add_argument('--no_display_text', dest='no_display_text',
action='store_true')
parser.set_defaults(display=False)
parser.set_defaults(display_route=False)
parser.set_defaults(no_display_text=False)
parser.set_defaults(use_gt_masks=False)
return parser
if __name__ =="__main__":
parser = get_parser()
args_dict = parser.parse_args()
================================================
FILE: data/db_info.yaml
================================================
attributes: [AC, BC, CS, DB, DEF, EA, FM, HO, IO, LR, MB, OCC, OV, ROT, SC, SV]
sets: [train, val, val-dev]
years: [2016, 2017]
sequences:
- name: aerobatics
attributes: []
num_frames: 71
set: test-dev
eval_t: False
year: 2017
- name: bear
attributes: [DEF]
num_frames: 82
set: train
eval_t: True
year: 2016
- name: bike-packing
attributes: []
num_frames: 69
set: val
eval_t: False
year: 2017
- name: blackswan
attributes: []
num_frames: 50
set: val
eval_t: True
year: 2016
- name: bmx-bumps
attributes: [LR, SV, SC, FM, CS, IO, MB, OCC, HO, EA, OV]
num_frames: 90
set: train
eval_t: False
year: 2016
- name: bmx-trees
attributes: [LR, SV, SC, FM, CS, IO, MB, DEF, OCC, HO, EA, BC]
num_frames: 80
set: val
eval_t: False
year: 2016
- name: boat
attributes: [SC, DB, EA, BC]
num_frames: 75
set: train
eval_t: True
year: 2016
- name: boxing-fisheye
attributes: []
num_frames: 87
set: train
eval_t: False
year: 2017
- name: breakdance
attributes: [FM, DB, MB, DEF, HO, ROT, OV, AC]
num_frames: 84
set: val
eval_t: False
year: 2016
- name: breakdance-flare
attributes: [FM, CS, MB, DEF, HO, ROT]
num_frames: 71
set: train
eval_t: False
year: 2016
- name: bus
attributes: [SC, OCC, HO, EA]
num_frames: 80
set: train
eval_t: True
year: 2016
- name: camel
attributes: [CS, IO, DEF, ROT]
num_frames: 90
set: val
eval_t: True
year: 2016
- name: car-race
attributes: []
num_frames: 31
set: test-dev
eval_t: False
year: 2017
- name: car-roundabout
attributes: [ROT, BC]
num_frames: 75
set: val
eval_t: True
year: 2016
- name: car-shadow
attributes: [LR, EA, AC, BC]
num_frames: 40
set: val
eval_t: True
year: 2016
- name: car-turn
attributes: [SV, ROT, BC]
num_frames: 80
set: train
eval_t: True
year: 2016
- name: carousel
attributes: []
num_frames: 69
set: test-dev
eval_t: False
year: 2017
- name: cat-girl
attributes: []
num_frames: 89
set: train
eval_t: False
year: 2017
- name: cats-car
attributes: []
num_frames: 67
set: test-dev
eval_t: False
year: 2017
- name: chamaleon
attributes: []
num_frames: 85
set: test-dev
eval_t: False
year: 2017
- name: classic-car
attributes: []
num_frames: 63
set: train
eval_t: False
year: 2017
- name: color-run
attributes: []
num_frames: 84
set: train
eval_t: False
year: 2017
- name: cows
attributes: [CS, IO, DEF, OCC, HO]
num_frames: 104
set: val
eval_t: True
year: 2016
- name: crossing
attributes: []
num_frames: 52
set: train
eval_t: False
year: 2017
- name: dance-jump
attributes: [SC, DB, MB, DEF, OCC, HO, ROT, EA]
num_frames: 60
set: train
eval_t: True
year: 2016
- name: dance-twirl
attributes: [SC, CS, IO, MB, DEF, HO, ROT, OV]
num_frames: 90
set: val
eval_t: False
year: 2016
- name: dancing
attributes: []
num_frames: 62
set: train
eval_t: False
year: 2017
- name: deer
attributes: []
num_frames: 79
set: test-dev
eval_t: False
year: 2017
- name: disc-jockey
attributes: []
num_frames: 76
set: train
eval_t: False
year: 2017
- name: dog
attributes: [FM, CS, MB, DEF, ROT, EA]
num_frames: 60
set: val
eval_t: False
year: 2016
- name: dog-agility
attributes: [FM, MB, DEF, OCC, HO, EA, OV, AC]
num_frames: 25
set: train
eval_t: False
year: 2016
- name: dog-gooses
attributes: []
num_frames: 86
set: train
eval_t: False
year: 2017
- name: dogs-jump
attributes: []
num_frames: 66
set: val
eval_t: False
year: 2017
- name: dogs-scale
attributes: []
num_frames: 83
set: train
eval_t: False
year: 2017
- name: drift-chicane
attributes: [LR, SV, FM, DB, HO, ROT, EA, AC]
num_frames: 52
set: val
eval_t: False
year: 2016
- name: drift-straight
attributes: [LR, SV, FM, CS, MB, HO, ROT, EA, OV, AC]
num_frames: 50
set: val
eval_t: True
year: 2016
- name: drift-turn
attributes: [SV, FM, IO, DB, HO, ROT, OV, AC]
num_frames: 64
set: train
eval_t: True
year: 2016
- name: drone
attributes: []
num_frames: 91
set: train
eval_t: False
year: 2017
- name: elephant
attributes: [CS, DB, DEF, EA]
num_frames: 80
set: train
eval_t: True
year: 2016
- name: flamingo
attributes: [SC, IO, DB, DEF, HO]
num_frames: 80
set: train
eval_t: True
year: 2016
- name: giant-slalom
attributes: []
num_frames: 127
set: test-dev
eval_t: False
year: 2017
- name: girl-dog
attributes: []
num_frames: 86
set: test-dev
eval_t: False
year: 2017
- name: goat
attributes: [CS, DEF, EA, BC]
num_frames: 90
set: val
eval_t: False
year: 2016
- name: gold-fish
attributes: []
num_frames: 78
set: val
eval_t: False
year: 2017
- name: golf
attributes: []
num_frames: 79
set: test-dev
eval_t: False
year: 2017
- name: guitar-violin
attributes: []
num_frames: 55
set: test-dev
eval_t: False
year: 2017
- name: gym
attributes: []
num_frames: 60
set: test-dev
eval_t: False
year: 2017
- name: helicopter
attributes: []
num_frames: 49
set: test-dev
eval_t: False
year: 2017
- name: hike
attributes: [LR, DEF, HO]
num_frames: 80
set: train
eval_t: True
year: 2016
- name: hockey
attributes: [SC, IO, DEF, HO, ROT]
num_frames: 75
set: train
eval_t: True
year: 2016
- name: horsejump-high
attributes: [SC, IO, DEF, OCC, HO]
num_frames: 50
set: val
eval_t: False
year: 2016
- name: horsejump-low
attributes: [SC, IO, DEF, OCC, HO, EA]
num_frames: 60
set: train
eval_t: False
year: 2016
- name: horsejump-stick
attributes: []
num_frames: 58
set: test-dev
eval_t: False
year: 2017
- name: hoverboard
attributes: []
num_frames: 81
set: test-dev
eval_t: False
year: 2017
- name: india
attributes: []
num_frames: 81
set: val
eval_t: False
year: 2017
- name: judo
attributes: []
num_frames: 34
set: val
eval_t: False
year: 2017
- name: kid-football
attributes: []
num_frames: 68
set: train
eval_t: False
year: 2017
- name: kite-surf
attributes: [SV, SC, IO, DB, MB, OCC, HO, EA]
num_frames: 50
set: val
eval_t: True
year: 2016
- name: kite-walk
attributes: [SC, IO, DB, DEF, OCC, HO]
num_frames: 80
set: train
eval_t: True
year: 2016
- name: koala
attributes: []
num_frames: 100
set: train
eval_t: False
year: 2017
- name: lab-coat
attributes: []
num_frames: 47
set: val
eval_t: False
year: 2017
- name: lady-running
attributes: []
num_frames: 65
set: train
eval_t: False
year: 2017
- name: libby
attributes: [SC, MB, DEF, OCC, HO, EA]
num_frames: 49
set: val
eval_t: False
year: 2016
- name: lindy-hop
attributes: []
num_frames: 73
set: train
eval_t: False
year: 2017
- name: loading
attributes: []
num_frames: 50
set: val
eval_t: False
year: 2017
- name: lock
attributes: []
num_frames: 43
set: test-dev
eval_t: False
year: 2017
- name: longboard
attributes: []
num_frames: 52
set: train
eval_t: False
year: 2017
- name: lucia
attributes: [DEF, OCC, HO]
num_frames: 70
set: train
eval_t: False
year: 2016
- name: mallard-fly
attributes: [LR, SV, FM, DB, MB, DEF, ROT, EA, OV, AC]
num_frames: 70
set: train
eval_t: False
year: 2016
- name: mallard-water
attributes: [LR, IO, DB, EA]
num_frames: 80
set: train
eval_t: True
year: 2016
- name: man-bike
attributes: []
num_frames: 75
set: test-dev
eval_t: False
year: 2017
- name: mbike-trick
attributes: []
num_frames: 79
set: val
eval_t: False
year: 2017
- name: miami-surf
attributes: []
num_frames: 70
set: train
eval_t: False
year: 2017
- name: monkeys-trees
attributes: []
num_frames: 83
set: test-dev
eval_t: False
year: 2017
- name: motocross-bumps
attributes: [SV, FM, IO, HO, ROT, OV, AC, BC]
num_frames: 60
set: train
eval_t: True
year: 2016
- name: motocross-jump
attributes: [SV, SC, FM, IO, MB, DEF, HO, ROT, EA, OV, AC]
num_frames: 40
set: val
eval_t: False
year: 2016
- name: motorbike
attributes: [LR, SV, SC, FM, IO, OCC, HO, ROT, EA]
num_frames: 43
set: train
eval_t: False
year: 2016
- name: mtb-race
attributes: []
num_frames: 69
set: test-dev
eval_t: False
year: 2017
- name: night-race
attributes: []
num_frames: 46
set: train
eval_t: False
year: 2017
- name: orchid
attributes: []
num_frames: 57
set: test-dev
eval_t: False
year: 2017
- name: paragliding
attributes: [LR, SC, IO, HO]
num_frames: 70
set: train
eval_t: False
year: 2016
- name: paragliding-launch
attributes: [SC, IO, DEF, HO, EA]
num_frames: 80
set: val
eval_t: True
year: 2016
- name: parkour
attributes: [LR, SV, FM, DEF, OCC, HO, ROT, AC]
num_frames: 100
set: val
eval_t: False
year: 2016
- name: people-sunset
attributes: []
num_frames: 67
set: test-dev
eval_t: False
year: 2017
- name: pigs
attributes: []
num_frames: 79
set: val
eval_t: False
year: 2017
- name: planes-crossing
attributes: []
num_frames: 31
set: test-dev
eval_t: False
year: 2017
- name: planes-water
attributes: []
num_frames: 38
set: train
eval_t: False
year: 2017
- name: rallye
attributes: []
num_frames: 50
set: train
eval_t: False
year: 2017
- name: rhino
attributes: [DEF, OCC, BC]
num_frames: 90
set: train
eval_t: True
year: 2016
- name: rollerblade
attributes: [LR, FM, CS, MB, DEF, HO]
num_frames: 35
set: train
eval_t: False
year: 2016
- name: rollercoaster
attributes: []
num_frames: 70
set: test-dev
eval_t: False
year: 2017
- name: salsa
attributes: []
num_frames: 86
set: test-dev
eval_t: False
year: 2017
- name: schoolgirls
attributes: []
num_frames: 80
set: train
eval_t: False
year: 2017
- name: scooter-black
attributes: [SV, IO, HO, EA]
num_frames: 43
set: val
eval_t: True
year: 2016
- name: scooter-board
attributes: []
num_frames: 91
set: train
eval_t: False
year: 2017
- name: scooter-gray
attributes: [SC, FM, IO, OCC, HO, ROT, EA, BC]
num_frames: 75
set: train
eval_t: False
year: 2016
- name: seasnake
attributes: []
num_frames: 80
set: test-dev
eval_t: False
year: 2017
- name: sheep
attributes: []
num_frames: 68
set: train
eval_t: False
year: 2017
- name: shooting
attributes: []
num_frames: 40
set: val
eval_t: False
year: 2017
- name: skate-jump
attributes: []
num_frames: 68
set: test-dev
eval_t: False
year: 2017
- name: skate-park
attributes: []
num_frames: 80
set: train
eval_t: False
year: 2017
- name: slackline
attributes: []
num_frames: 60
set: test-dev
eval_t: False
year: 2017
- name: snowboard
attributes: []
num_frames: 66
set: train
eval_t: False
year: 2017
- name: soapbox
attributes: [SV, IO, MB, DEF, HO, ROT, AC]
num_frames: 99
set: val
eval_t: True
year: 2016
- name: soccerball
attributes: [LR, FM, MB, OCC, HO]
num_frames: 48
set: train
eval_t: False
year: 2016
- name: stroller
attributes: [SC, FM, CS, IO, DEF, HO]
num_frames: 91
set: train
eval_t: True
year: 2016
- name: stunt
attributes: []
num_frames: 71
set: train
eval_t: False
year: 2017
- name: subway
attributes: []
num_frames: 88
set: test-dev
eval_t: False
year: 2017
- name: surf
attributes: [SV, FM, CS, IO, DB, HO, OV]
num_frames: 55
set: train
eval_t: True
year: 2016
- name: swing
attributes: [SC, FM, IO, DEF, OCC, HO]
num_frames: 60
set: train
eval_t: False
year: 2016
- name: tandem
attributes: []
num_frames: 72
set: test-dev
eval_t: False
year: 2017
- name: tennis
attributes: [SV, FM, IO, MB, DEF, HO]
num_frames: 70
set: train
eval_t: False
year: 2016
- name: tennis-vest
attributes: []
num_frames: 75
set: test-dev
eval_t: False
year: 2017
- name: tractor
attributes: []
num_frames: 65
set: test-dev
eval_t: False
year: 2017
- name: tractor-sand
attributes: []
num_frames: 76
set: train
eval_t: False
year: 2017
- name: train
attributes: [SC, HO, EA]
num_frames: 80
set: train
eval_t: True
year: 2016
- name: tuk-tuk
attributes: []
num_frames: 59
set: train
eval_t: False
year: 2017
- name: upside-down
attributes: []
num_frames: 65
set: train
eval_t: False
year: 2017
- name: varanus-cage
attributes: []
num_frames: 67
set: train
eval_t: False
year: 2017
- name: walking
attributes: []
num_frames: 72
set: train
eval_t: False
year: 2017
================================================
FILE: data/palette.txt
================================================
0 0 0
128 0 0
0 128 0
128 128 0
0 0 128
128 0 128
0 128 128
128 128 128
64 0 0
191 0 0
64 128 0
191 128 0
64 0 128
191 0 128
64 128 128
191 128 128
0 64 0
128 64 0
0 191 0
128 191 0
0 64 128
128 64 128
22 22 22
23 23 23
24 24 24
25 25 25
26 26 26
27 27 27
28 28 28
29 29 29
30 30 30
31 31 31
32 32 32
33 33 33
34 34 34
35 35 35
36 36 36
37 37 37
38 38 38
39 39 39
40 40 40
41 41 41
42 42 42
43 43 43
44 44 44
45 45 45
46 46 46
47 47 47
48 48 48
49 49 49
50 50 50
51 51 51
52 52 52
53 53 53
54 54 54
55 55 55
56 56 56
57 57 57
58 58 58
59 59 59
60 60 60
61 61 61
62 62 62
63 63 63
64 64 64
65 65 65
66 66 66
67 67 67
68 68 68
69 69 69
70 70 70
71 71 71
72 72 72
73 73 73
74 74 74
75 75 75
76 76 76
77 77 77
78 78 78
79 79 79
80 80 80
81 81 81
82 82 82
83 83 83
84 84 84
85 85 85
86 86 86
87 87 87
88 88 88
89 89 89
90 90 90
91 91 91
92 92 92
93 93 93
94 94 94
95 95 95
96 96 96
97 97 97
98 98 98
99 99 99
100 100 100
101 101 101
102 102 102
103 103 103
104 104 104
105 105 105
106 106 106
107 107 107
108 108 108
109 109 109
110 110 110
111 111 111
112 112 112
113 113 113
114 114 114
115 115 115
116 116 116
117 117 117
118 118 118
119 119 119
120 120 120
121 121 121
122 122 122
123 123 123
124 124 124
125 125 125
126 126 126
127 127 127
128 128 128
129 129 129
130 130 130
131 131 131
132 132 132
133 133 133
134 134 134
135 135 135
136 136 136
137 137 137
138 138 138
139 139 139
140 140 140
141 141 141
142 142 142
143 143 143
144 144 144
145 145 145
146 146 146
147 147 147
148 148 148
149 149 149
150 150 150
151 151 151
152 152 152
153 153 153
154 154 154
155 155 155
156 156 156
157 157 157
158 158 158
159 159 159
160 160 160
161 161 161
162 162 162
163 163 163
164 164 164
165 165 165
166 166 166
167 167 167
168 168 168
169 169 169
170 170 170
171 171 171
172 172 172
173 173 173
174 174 174
175 175 175
176 176 176
177 177 177
178 178 178
179 179 179
180 180 180
181 181 181
182 182 182
183 183 183
184 184 184
185 185 185
186 186 186
187 187 187
188 188 188
189 189 189
190 190 190
191 191 191
192 192 192
193 193 193
194 194 194
195 195 195
196 196 196
197 197 197
198 198 198
199 199 199
200 200 200
201 201 201
202 202 202
203 203 203
204 204 204
205 205 205
206 206 206
207 207 207
208 208 208
209 209 209
210 210 210
211 211 211
212 212 212
213 213 213
214 214 214
215 215 215
216 216 216
217 217 217
218 218 218
219 219 219
220 220 220
221 221 221
222 222 222
223 223 223
224 224 224
225 225 225
226 226 226
227 227 227
228 228 228
229 229 229
230 230 230
231 231 231
232 232 232
233 233 233
234 234 234
235 235 235
236 236 236
237 237 237
238 238 238
239 239 239
240 240 240
241 241 241
242 242 242
243 243 243
244 244 244
245 245 245
246 246 246
247 247 247
248 248 248
249 249 249
250 250 250
251 251 251
252 252 252
253 253 253
254 254 254
255 255 255
================================================
FILE: data/run_davis2017.m
================================================
clc
src_dir = 'data/DAVIS2017/Annotations/480p';
save_dir = 'data/DAVIS2017/Annotations_edge/480p';
seqs = dir(src_dir);
seqs = {seqs.name};
seqs = seqs(3:end);
for i = 1 : length(seqs)
seq_path = [src_dir, '/', seqs{i}];
images = dir([seq_path, '/*.png']);
images = {images.name};
for j = 1 : length(images)
imagefile = fullfile(seq_path, images{j});
im = imread(imagefile);
res = seg2edge(im, 2, [], 'regular');
save_path = fullfile(save_dir, seqs{i});
if ~exist(save_path, 'dir')
mkdir(save_path)
end
imwrite(res, fullfile(save_path, images{j}))
end
end
================================================
FILE: data/run_youtube.m
================================================
clc
src_dir = 'data/YouTubeVOS_2018/train/Annotations';
save_dir = 'data/YouTubeVOS_2018/train/Annotations_edge';
seqs = dir(src_dir);
seqs = {seqs.name};
seqs = seqs(3:end);
for i = 1 : length(seqs)
disp(seqs{i})
seq_path = [src_dir, '/', seqs{i}];
images = dir([seq_path, '/*.png']);
images = {images.name};
for j = 1 : length(images)
imagefile = fullfile(seq_path, images{j});
im = imread(imagefile);
res = seg2edge(im, 2, [], 'regular');
save_path = fullfile(save_dir, seqs{i});
if ~exist(save_path, 'dir')
mkdir(save_path)
end
imwrite(res, fullfile(save_path, images{j}))
end
end
================================================
FILE: data/seg2edge.m
================================================
% This function takes an input segment and produces binary boundaries.
% Multi-channel input segments are supported by the function.
function [idxEdge] = seg2edge(seg, radius, labelIgnore, edge_type)
% Get dimensions
[height, width, chn] = size(seg);
if(~isempty(labelIgnore))
if(chn~=size(labelIgnore, 2))
error('Channel dimension not matching ignored label dimension!')
end
end
% Set the considered neighborhood.
radius_search = max(ceil(radius), 1);
[X, Y] = meshgrid(1:width, 1:height);
[x, y] = meshgrid(-radius_search:radius_search, -radius_search:radius_search);
% Columnize everything
X = X(:); Y = Y(:);
x = x(:); y = y(:);
if(chn == 1)
seg = seg(:);
else
seg = reshape(seg, [height*width chn]);
end
% Build circular neighborhood
idxNeigh = sqrt(x.^2 + y.^2) <= radius;
x = x(idxNeigh); y = y(idxNeigh);
numPxlImg = length(X);
numPxlNeigh = length(x);
% Compute Gaussian weight
idxEdge = false(numPxlImg, 1);
for i = 1:numPxlNeigh
XNeigh = X+x(i);
YNeigh = Y+y(i);
idxValid = find( XNeigh >= 1 & XNeigh <= width & YNeigh >=1 & YNeigh <= height );
XCenter = X(idxValid);
YCenter = Y(idxValid);
XNeigh = XNeigh(idxValid);
YNeigh = YNeigh(idxValid);
LCenter = seg(sub2ind([height width], YCenter, XCenter), :);
LNeigh = seg(sub2ind([height width], YNeigh, XNeigh), :);
if(strcmp(edge_type, 'regular'))
idxDiff = find(any(LCenter~=LNeigh, 2));
elseif(strcmp(edge_type, 'inner'))
idxDiff = find(any(LCenter~=LNeigh, 2) & any(LCenter~=0, 2) & all(LNeigh==0, 2) );
elseif(strcmp(edge_type, 'outer'))
idxDiff = find(any(LCenter~=LNeigh, 2) & all(LCenter==0, 2) & any(LNeigh~=0, 2) );
else
error('Wrong edge type input!');
end
LCenterEdge = LCenter(idxDiff, :);
LNeighEdge = LNeigh(idxDiff, :);
idxIgnore2 = false(length(idxDiff), 1);
for j = 1:size(labelIgnore, 1)
idxIgnore2 = idxIgnore2 | ( all(bsxfun(@eq, LCenterEdge, labelIgnore(j, :)), 2) | all(bsxfun(@eq, LNeighEdge, labelIgnore(j, :)), 2) );
end
idxDiffGT = idxDiff(~idxIgnore2);
idxEdge(idxValid(idxDiffGT)) = true;
end
idxEdge = reshape(idxEdge, [height, width]);
================================================
FILE: dataloader/base.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# ----------------------------------------------------------------------------
import functools
import os.path as osp
import numpy as np
from skimage.io import ImageCollection
from misc.config import cfg
from misc.io_aux import imread_indexed
#################################
# HELPER FUNCTIONS
#################################
def _load_annotation(filename, single_object):
""" Load image given filename."""
annotation,_ = imread_indexed(filename)
if single_object:
annotation = (annotation != 0).astype(np.uint8)
return annotation
def _get_num_objects(annotation):
""" Count number of objects from segmentation mask"""
ids = sorted(np.unique(annotation))
# Remove unknown-label
ids = ids[:-1] if ids[-1] == 255 else ids
# Handle no-background case
ids = ids if ids[0] else ids[1:]
return len(ids)
#################################
# LOADER CLASSES
#################################
class BaseLoader(ImageCollection):
"""
Base class to load image sets (inherit from skimage.ImageCollection).
Arguments:
path (string): path to sequence folder.
regex (string): regular expression to define image search pattern.
load_func (func) : function to load image from disk (see skimage.ImageCollection).
"""
def __init__(self, split, path, regex, load_func=None, lmdb_env=None):
if not lmdb_env == None:
key_db = osp.basename(path)
with lmdb_env.begin() as txn:
_files_vec = txn.get(key_db.encode()).decode().split('|')
_files = [bytes(osp.join(path, f).encode())
for f in _files_vec]
super(BaseLoader, self).__init__(_files, load_func=load_func)
else:
super(BaseLoader, self).__init__(
osp.join(path + '/' + regex),load_func=load_func)
# Sequence name
self.name = osp.basename(path)
self.split = split
def __str__(self):
return "< class: '{}' name: '{}', frames: {} >".format(
type(self).__name__, self.name, len(self))
class Sequence(BaseLoader):
"""
Load image sequences.
Arguments:
name (string): sequence name.
regex (string): regular expression to define image search pattern.
"""
def __init__(self, split, name, regex="*.jpg", lmdb_env=None):
super(Sequence, self).__init__(
split, osp.join(cfg.PATH.SEQUENCES, name), regex,
lmdb_env=lmdb_env)
class SequenceClip_simple:
"""
Load image sequences.
Arguments:
name (string): sequence name.
regex (string): regular expression to define image search pattern.
"""
def __init__(self, seq, starting_frame):
self.__dict__.update(seq.__dict__)
self.starting_frame = starting_frame
def __str__(self):
return "< class: '{}' name: '{}'," \
" startingframe: {}, frames: {} >".format(type(self).__name__,
self.name,
self.starting_frame,
len(self))
class SequenceClip(BaseLoader):
"""
Load image sequences.
Arguments:
name (string): sequence name.
regex (string): regular expression to define image search pattern.
"""
def __init__(self, split, name, starting_frame, regex="*.jpg",
lmdb_env=None):
super(SequenceClip, self).__init__(
split, osp.join(cfg.PATH.SEQUENCES, name), regex, lmdb_env=lmdb_env)
self.starting_frame = starting_frame
def __str__(self):
return "< class: '{}' name: '{}'," \
" startingframe: {}, frames: {} >".format(type(self).__name__,
self.name,
self.starting_frame,
len(self))
class Segmentation(BaseLoader):
"""
Load image sequences.
Arguments:
path (string): path to sequence folder.
single_object (bool): assign same id=1 to each object.
regex (string): regular expression to define image search pattern.
"""
def __init__(self,split,path,single_object,regex="*.png", lmdb_env=None):
super(Segmentation, self).__init__(split,path,regex,
functools.partial(_load_annotation,single_object=single_object), lmdb_env=lmdb_env)
self.n_objects = _get_num_objects(self[0])
def iter_objects_id(self):
"""
Iterate over objects providing object id for each of them.
"""
for obj_id in range(1,self.n_objects+1):
yield obj_id
def iter_objects(self):
"""
Iterate over objects providing binary masks for each of them.
"""
for obj_id in self.iter_objects_id():
bn_segmentation = [(s==obj_id).astype(np.uint8) for s in self]
yield bn_segmentation
class Annotation(Segmentation):
"""
Load ground-truth annotations.
Arguments:
name (string): sequence name.
single_object (bool): assign same id=1 to each object.
regex (string): regular expression to define image search pattern.
"""
def __init__(self,split,name,single_object,regex="*.png", lmdb_env=None):
super(Annotation, self).__init__(
split,osp.join(cfg.PATH.ANNOTATIONS,name),single_object,regex, lmdb_env=lmdb_env)
class AnnotationClip_simple:
"""
Load ground-truth annotations.
Arguments:
name (string): sequence name.
single_object (bool): assign same id=1 to each object.
regex (string): regular expression to define image search pattern.
"""
def __init__(self, annot, starting_frame):
self.__dict__.update(annot.__dict__)
self.starting_frame = starting_frame
class AnnotationClip(Segmentation):
"""
Load ground-truth annotations.
Arguments:
name (string): sequence name.
single_object (bool): assign same id=1 to each object.
regex (string): regular expression to define image search pattern.
"""
def __init__(self,split,name,starting_frame,single_object,regex="*.png", lmdb_env=None):
super(AnnotationClip, self).__init__(
split,osp.join(cfg.PATH.ANNOTATIONS,name),single_object,regex, lmdb_env=lmdb_env)
self.starting_frame = starting_frame
================================================
FILE: dataloader/base_youtube.py
================================================
import functools
import os.path as osp
import numpy as np
from PIL import Image
from skimage.io import ImageCollection
from misc.config_youtubeVOS import cfg, phase
from misc.io_aux import imread_indexed,imwrite_indexed
#################################
# HELPER FUNCTIONS
#################################
def _load_annotation(filename,single_object):
""" Load image given filename."""
annotation,_ = imread_indexed(filename)
if single_object:
annotation = (annotation != 0).astype(np.uint8)
return annotation
def _get_num_objects(annotation):
""" Count number of objects from segmentation mask"""
ids = sorted(np.unique(annotation))
# Remove unknown-label
ids = ids[:-1] if ids[-1] == 255 else ids
# Handle no-background case
ids = ids if ids[0] else ids[1:]
return len(ids)
#################################
# LOADER CLASSES
#################################
class BaseLoader(ImageCollection):
"""
Base class to load image sets (inherit from skimage.ImageCollection).
Arguments:
path (string): path to sequence folder.
regex (string): regular expression to define image search pattern.
load_func (func) : function to load image from disk (see skimage.ImageCollection).
"""
def __init__(self,split,path,regex,load_func=None, lmdb_env=None):
if not lmdb_env == None:
key_db = osp.basename(path)
with lmdb_env.begin() as txn:
_files_vec = txn.get(key_db.encode()).decode().split('|')
_files = [bytes(osp.join(path, f).encode()) for f in _files_vec]
super(BaseLoader, self).__init__(_files, load_func=load_func)
else:
super(BaseLoader, self).__init__(
osp.join(path + '/' + regex),load_func=load_func)
# Sequence name
self.name = osp.basename(path)
self.split = split
# Check sequence name
if split == phase.TRAIN.value:
if not self.name in cfg.SEQUENCES_TRAIN:
raise Exception("Sequence name \'{}\' not found.".format(self.name))
elif split == phase.VAL.value:
if not self.name in cfg.SEQUENCES_VAL:
raise Exception("Sequence name \'{}\' not found.".format(self.name))
elif split == phase.TRAINVAL.value:
if not self.name in cfg.SEQUENCES_TRAINVAL:
raise Exception("Sequence name \'{}\' not found.".format(self.name))
else:
if not self.name in cfg.SEQUENCES_TEST:
raise Exception("Sequence name \'{}\' not found.".format(self.name))
def __str__(self):
return "< class: '{}' name: '{}', frames: {} >".format(
type(self).__name__,self.name,len(self))
class Sequence(BaseLoader):
"""
Load image sequences.
Arguments:
name (string): sequence name.
regex (string): regular expression to define image search pattern.
"""
def __init__(self,split,name,regex="*.jpg", lmdb_env=None):
if split == phase.TRAIN.value:
super(Sequence, self).__init__(
split,osp.join(cfg.PATH.SEQUENCES_TRAIN,name),regex, lmdb_env=lmdb_env)
elif split == phase.VAL.value:
super(Sequence, self).__init__(
split,osp.join(cfg.PATH.SEQUENCES_VAL,name),regex, lmdb_env=lmdb_env)
elif split == phase.TRAINVAL.value:
super(Sequence, self).__init__(
split,osp.join(cfg.PATH.SEQUENCES_TRAINVAL,name),regex, lmdb_env=lmdb_env)
else: #split == 'test':
super(Sequence, self).__init__(
split,osp.join(cfg.PATH.SEQUENCES_TEST,name),regex, lmdb_env=lmdb_env)
class SequenceClip_simple:
"""
Load image sequences.
Arguments:
name (string): sequence name.
regex (string): regular expression to define image search pattern.
"""
def __init__(self, seq, starting_frame):
if seq.split == phase.TRAIN.value:
self.__dict__.update(seq.__dict__)
else:
self.__dict__.update(seq.__dict__)
self.starting_frame = starting_frame
def __str__(self):
return "< class: '{}' name: '{}', startingframe: {}, frames: {} >".format(
type(self).__name__, self.name, self.starting_frame, len(self))
class SequenceClip(BaseLoader):
"""
Load image sequences.
Arguments:
name (string): sequence name.
regex (string): regular expression to define image search pattern.
"""
def __init__(self,split,name,starting_frame,regex="*.jpg", lmdb_env=None):
if split == phase.TRAIN.value:
super(SequenceClip, self).__init__(
split,osp.join(cfg.PATH.SEQUENCES_TRAIN,name),regex, lmdb_env=lmdb_env)
elif split == phase.VAL.value:
super(SequenceClip, self).__init__(
split,osp.join(cfg.PATH.SEQUENCES_VAL,name),regex, lmdb_env=lmdb_env)
elif split == phase.TRAINVAL.value:
super(SequenceClip, self).__init__(
split,osp.join(cfg.PATH.SEQUENCES_TRAINVAL,name),regex, lmdb_env=lmdb_env)
else: #split == 'test':
super(SequenceClip, self).__init__(
split,osp.join(cfg.PATH.SEQUENCES_TEST,name),regex, lmdb_env=lmdb_env)
self.starting_frame = starting_frame
def __str__(self):
return "< class: '{}' name: '{}', startingframe: {}, frames: {} >".format(
type(self).__name__,self.name,self.starting_frame,len(self))
class Segmentation(BaseLoader):
"""
Load image sequences.
Arguments:
path (string): path to sequence folder.
single_object (bool): assign same id=1 to each object.
regex (string): regular expression to define image search pattern.
"""
def __init__(self,split,path,single_object,regex="*.png", lmdb_env=None):
super(Segmentation, self).__init__(split,path,regex,
functools.partial(_load_annotation,single_object=single_object), lmdb_env=lmdb_env)
self.n_objects = _get_num_objects(self[0])
def iter_objects_id(self):
"""
Iterate over objects providing object id for each of them.
"""
for obj_id in range(1,self.n_objects+1):
yield obj_id
def iter_objects(self):
"""
Iterate over objects providing binary masks for each of them.
"""
for obj_id in self.iter_objects_id():
bn_segmentation = [(s==obj_id).astype(np.uint8) for s in self]
yield bn_segmentation
class Annotation(Segmentation):
"""
Load ground-truth annotations.
Arguments:
name (string): sequence name.
single_object (bool): assign same id=1 to each object.
regex (string): regular expression to define image search pattern.
"""
def __init__(self,split,name,single_object,regex="*.png", lmdb_env=None):
if split == phase.TRAIN.value:
super(Annotation, self).__init__(
split,osp.join(cfg.PATH.ANNOTATIONS_TRAIN,name),single_object,regex, lmdb_env=lmdb_env)
elif split == phase.VAL.value:
super(Annotation, self).__init__(
split,osp.join(cfg.PATH.ANNOTATIONS_VAL,name),single_object,regex, lmdb_env=lmdb_env)
elif split == phase.TRAINVAL.value:
super(Annotation, self).__init__(
split,osp.join(cfg.PATH.ANNOTATIONS_TRAINVAL,name),single_object,regex, lmdb_env=lmdb_env)
else: #split == 'test':
super(Annotation, self).__init__(
split,osp.join(cfg.PATH.ANNOTATIONS_TEST,name),single_object,regex, lmdb_env=lmdb_env)
class AnnotationClip_simple:
"""
Load ground-truth annotations.
Arguments:
name (string): sequence name.
single_object (bool): assign same id=1 to each object.
regex (string): regular expression to define image search pattern.
"""
def __init__(self, annot, starting_frame):
if annot.split == phase.TRAIN.value:
self.__dict__.update(annot.__dict__)
else:
self.__dict__.update(annot.__dict__)
self.starting_frame = starting_frame
class AnnotationClip(Segmentation):
"""
Load ground-truth annotations.
Arguments:
name (string): sequence name.
single_object (bool): assign same id=1 to each object.
regex (string): regular expression to define image search pattern.
"""
def __init__(self,split,name,starting_frame,single_object,regex="*.png", lmdb_env=None):
if split == phase.TRAIN.value:
super(AnnotationClip, self).__init__(
split,osp.join(cfg.PATH.ANNOTATIONS_TRAIN,name),single_object,regex, lmdb_env=lmdb_env)
elif split == phase.VAL.value:
super(AnnotationClip, self).__init__(
split,osp.join(cfg.PATH.ANNOTATIONS_VAL,name),single_object,regex, lmdb_env=lmdb_env)
elif split == phase.TRAINVAL.value:
super(AnnotationClip, self).__init__(
split,osp.join(cfg.PATH.ANNOTATIONS_TRAINVAL,name),single_object,regex, lmdb_env=lmdb_env)
else: #split == 'test':
super(AnnotationClip, self).__init__(
split,osp.join(cfg.PATH.ANNOTATIONS_TEST,name),single_object,regex, lmdb_env=lmdb_env)
self.starting_frame = starting_frame
================================================
FILE: dataloader/custom_transforms.py
================================================
import random
import cv2
import numpy as np
import torch
import math
class ScaleNRotate(object):
"""Scale (zoom-in, zoom-out) and Rotate the image and the ground truth.
Args:
two possibilities:
1. rots (tuple): (minimum, maximum) rotation angle
scales (tuple): (minimum, maximum) scale
2. rots [list]: list of fixed possible rotation angles
scales [list]: list of fixed possible scales
"""
def __init__(self, rots=(-30, 30), scales=(.75, 1.25)):
assert (isinstance(rots, type(scales)))
self.rots = rots
self.scales = scales
def __call__(self, sample):
if type(self.rots) == tuple:
# Continuous range of scales and rotations
rot = (self.rots[1] - self.rots[0]) * random.random() - \
(self.rots[1] - self.rots[0])/2
sc = (self.scales[1] - self.scales[0]) * random.random() - \
(self.scales[1] - self.scales[0]) / 2 + 1
elif type(self.rots) == list:
# Fixed range of scales and rotations
rot = self.rots[random.randint(0, len(self.rots))]
sc = self.scales[random.randint(0, len(self.scales))]
for elem in sample.keys():
if 'fname' in elem:
continue
tmp = sample[elem]
h, w = tmp.shape[:2]
center = (w / 2, h / 2)
assert(center != 0) # Strange behaviour warpAffine
M = cv2.getRotationMatrix2D(center, rot, sc)
if ((tmp == 0) | (tmp == 1)).all():
flagval = cv2.INTER_NEAREST
else:
flagval = cv2.INTER_CUBIC
tmp = cv2.warpAffine(tmp, M, (w, h), flags=flagval)
sample[elem] = tmp
return sample
class Resize(object):
"""Randomly resize the image and the ground truth to specified scales.
Args:
scales (list): the list of scales
"""
def __init__(self, scales=[0.5, 0.8, 1]):
self.scales = scales
def __call__(self, sample):
# Fixed range of scales
sc = self.scales[random.randint(0, len(self.scales) - 1)]
for elem in sample.keys():
if 'fname' in elem:
continue
tmp = sample[elem]
if tmp.ndim == 2:
flagval = cv2.INTER_NEAREST
else:
flagval = cv2.INTER_CUBIC
tmp = cv2.resize(tmp, None, fx=sc, fy=sc, interpolation=flagval)
sample[elem] = tmp
return sample
class RandomHorizontalFlip(object):
"""Horizontally flip the given image and ground truth randomly with a probability of 0.5."""
def __call__(self, sample):
if random.random() < 0.5:
for elem in sample.keys():
if 'fname' in elem:
continue
tmp = sample[elem]
tmp = cv2.flip(tmp, flipCode=1)
sample[elem] = tmp
return sample
class ToTensor(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample):
for elem in sample.keys():
if 'fname' in elem:
continue
tmp = sample[elem]
if tmp.ndim == 2:
tmp = tmp[:, :, np.newaxis]
print('111111111111:', tmp.shape)
# swap color axis because
# numpy image: H x W x C
# torch image: C X H X W
tmp = tmp.transpose((2, 0, 1))
sample[elem] = torch.from_numpy(tmp)
return sample
================================================
FILE: dataloader/dataset_utils.py
================================================
def get_dataset_davis_youtube_ehem(args, split, image_transforms=None,
target_transforms=None, augment=False,
inputRes=None):
from .davis2017_youtubevos_ehem import DAVISLoader as MyChosenDataset
dataset = MyChosenDataset(args, split=split, transform=image_transforms,
target_transform=target_transforms,
augment=augment, inputRes=inputRes)
return dataset
================================================
FILE: dataloader/davis2017.py
================================================
from __future__ import division
import torch
from torch.utils import data
import os
import cv2
import glob
import lmdb
import numpy as np
from PIL import Image
import os.path as osp
from scipy.misc import imresize
from matplotlib import pyplot as plt
from torchvision import transforms
from dataloader import custom_transforms as tr
from .base import Sequence, Annotation
from misc.config import cfg, phase, db_read_sequences
class DAVISLoader(data.Dataset):
'''
Dataset for DAVIS
'''
def __init__(self, args, split, inputRes, augment=False,
transform=None, target_transform=None):
self._year = args.year
self._phase = split
self.transform = transform
self.target_transform = target_transform
self.inputRes = inputRes
self.augment = augment
self.augment_transform = None
self._single_object = False
assert args.year == "2017" or args.year == "2016"
if augment:
self.augment_transform = transforms.Compose([
tr.RandomHorizontalFlip(),
tr.ScaleNRotate(rots=(-args.rotation, args.rotation),
scales=(.75, 1.25))])
self._db_sequences = db_read_sequences(args.year, self._phase)
# Check lmdb existance. If not proceed with standard dataloader.
lmdb_env_seq_dir = osp.join(cfg.PATH.DATA, 'lmdb_seq')
lmdb_env_annot_dir = osp.join(cfg.PATH.DATA, 'lmdb_annot')
if osp.isdir(lmdb_env_seq_dir) and osp.isdir(lmdb_env_annot_dir):
lmdb_env_seq = lmdb.open(lmdb_env_seq_dir)
lmdb_env_annot = lmdb.open(lmdb_env_annot_dir)
else:
lmdb_env_seq = None
lmdb_env_annot = None
print('LMDB not found. This could affect the data loading time. It is recommended to use LMDB.')
self.sequences = [Sequence(self._phase, s.name, lmdb_env=lmdb_env_seq) for s in self._db_sequences]
self._db_sequences = db_read_sequences(args.year, self._phase)
# Load annotations
self.annotations = [Annotation(self._phase,s.name, self._single_object, lmdb_env=lmdb_env_annot) for s in self._db_sequences]
self._db_sequences = db_read_sequences(args.year, self._phase)
# Load Videos
self.videos = []
for seq, s in zip(self.sequences, self._db_sequences):
if s['set'] == self._phase:
self.videos.append(s['name'])
self.imagefiles = []
self.maskfiles = []
self.flowfiles = []
self.edgefiles = []
for _video in self.videos:
imagefiles = sorted(glob.glob(os.path.join(cfg.PATH.SEQUENCES, _video, '*.jpg')))
maskfiles = sorted(glob.glob(os.path.join(cfg.PATH.ANNOTATIONS, _video, '*.png')))
flowfiles = sorted(glob.glob(os.path.join(cfg.PATH.FLOW, _video, '*.png')))
edgefiles = sorted(glob.glob(os.path.join(cfg.PATH.ANNOTATIONS_EDGE, _video, '*.png')))
self.imagefiles.extend(imagefiles[:-1])
self.maskfiles.extend(maskfiles[:-1])
self.flowfiles.extend(flowfiles)
self.edgefiles.extend(edgefiles[:-1])
print('images: ', len(self.imagefiles))
print('masks: ', len(self.maskfiles))
assert(len(self.imagefiles) == len(self.maskfiles) == len(self.flowfiles) == len(self.edgefiles))
def __len__(self):
return len(self.imagefiles)
def __getitem__(self, index):
imagefile = self.imagefiles[index]
maskfile = self.maskfiles[index]
flowfile = self.flowfiles[index]
edgefile = self.edgefiles[index]
image = Image.open(imagefile).convert('RGB')
flow = Image.open(flowfile).convert('RGB')
mask = cv2.imread(maskfile, 0)
mask[mask > 0] = 255
bdry = cv2.imread(edgefile, 0)
#plt.imshow(bdry)
#plt.show()
mask = Image.fromarray(mask)
bdry = Image.fromarray(bdry)
if self.inputRes is not None:
image = imresize(image, self.inputRes)
flow = imresize(flow, self.inputRes)
mask = imresize(mask, self.inputRes, interp='nearest')
bdry = imresize(bdry, self.inputRes, interp='nearest')
sample = {'image': image, 'flow': flow, 'mask': mask, 'bdry': bdry}
if self.augment_transform is not None:
sample = self.augment_transform(sample)
image, flow, mask, bdry = sample['image'], sample['flow'], sample['mask'], sample['bdry']
if self.transform is not None:
image = self.transform(image)
flow = self.transform(flow)
if self.target_transform is not None:
mask = self.target_transform(mask)
bdry = self.target_transform(bdry)
return image, flow, mask, bdry
================================================
FILE: dataloader/davis2017_youtubevos_ehem.py
================================================
from __future__ import division
from torch.utils import data
import os
import cv2
import glob
import lmdb
import numpy as np
from PIL import Image
import os.path as osp
from scipy.misc import imresize
from torchvision import transforms
from dataloader import custom_transforms as tr
from .base import Sequence, Annotation
from misc.config import cfg as cfg_davis
from misc.config_youtubeVOS import cfg as cfg_youtube
from misc.config import db_read_sequences as db_read_sequences_davis
from misc.config_youtubeVOS import db_read_sequences_train as db_read_sequences_train_youtube
class DAVISLoader(data.Dataset):
'''
Dataset for DAVIS
'''
def __init__(self, args, split, inputRes, augment=False,
transform=None, target_transform=None):
self._year = args.year
self._phase = split
self.transform = transform
self.target_transform = target_transform
self.inputRes = inputRes
self.augment = augment
self.augment_transform = None
self._single_object = False
assert args.year == "2017" or args.year == "2016"
if augment:
self.augment_transform = transforms.Compose([
tr.RandomHorizontalFlip(),
tr.ScaleNRotate(rots=(-args.rotation, args.rotation),
scales=(.75, 1.25))])
self.imagefiles = []
self.maskfiles = []
self.flowfiles = []
self.edgefiles = []
self.hedfiles = []
if split == 'train':
self.load_davis(args)
self.load_youtubevos(args)
else:
self.load_davis(args)
def __len__(self):
return len(self.imagefiles)
def __getitem__(self, index):
imagefile = self.imagefiles[index]
maskfile = self.maskfiles[index]
flowfile = self.flowfiles[index]
edgefile = self.edgefiles[index]
hedfile = self.hedfiles[index]
image = Image.open(imagefile).convert('RGB')
flow = Image.open(flowfile).convert('RGB')
mask = cv2.imread(maskfile, 0)
mask[mask > 0] = 255
bdry = cv2.imread(edgefile, 0)
hed = cv2.imread(hedfile, 0)
# enlarge the object mask
kernel = np.ones((11, 11), np.uint8) # use a large kernel
dilated_mask = cv2.dilate(mask, kernel, iterations=1)
inverse_dilated_mask = (255.0 - dilated_mask) / 255.0
negative_pixels = hed * inverse_dilated_mask
kernel = np.ones((5, 5), np.uint8) # use a small kernel
negative_pixels = cv2.dilate(negative_pixels, kernel, iterations=1)
mask = Image.fromarray(mask)
bdry = Image.fromarray(bdry)
negative_pixels = Image.fromarray(negative_pixels)
if self.inputRes is not None:
image = imresize(image, self.inputRes)
flow = imresize(flow, self.inputRes)
mask = imresize(mask, self.inputRes, interp='nearest')
bdry = imresize(bdry, self.inputRes, interp='nearest')
negative_pixels = imresize(negative_pixels, self.inputRes,
interp='nearest')
sample = {'image': image, 'flow': flow, 'mask': mask, 'bdry': bdry,
'negative_pixels': negative_pixels}
if self.augment_transform is not None:
sample = self.augment_transform(sample)
image, flow, mask, bdry, negative_pixels =\
sample['image'], sample['flow'],\
sample['mask'], sample['bdry'], sample['negative_pixels']
if self.transform is not None:
image = self.transform(image)
flow = self.transform(flow)
if self.target_transform is not None:
mask = mask[:, :, np.newaxis]
bdry = bdry[:, :, np.newaxis]
negative_pixels = negative_pixels[:, :, np.newaxis]
mask = self.target_transform(mask)
bdry = self.target_transform(bdry)
negative_pixels = self.target_transform(negative_pixels)
return image, flow, mask, bdry, negative_pixels
def load_youtubevos(self, args):
self._db_sequences = db_read_sequences_train_youtube()
# Check lmdb existance. If not proceed with standard dataloader.
lmdb_env_seq_dir = osp.join(cfg_youtube.PATH.DATA, 'lmdb_seq')
lmdb_env_annot_dir = osp.join(cfg_youtube.PATH.DATA, 'lmdb_annot')
if osp.isdir(lmdb_env_seq_dir) and osp.isdir(lmdb_env_annot_dir):
lmdb_env_seq = lmdb.open(lmdb_env_seq_dir)
lmdb_env_annot = lmdb.open(lmdb_env_annot_dir)
else:
lmdb_env_seq = None
lmdb_env_annot = None
print('LMDB not found. This could affect the data loading time.'
' It is recommended to use LMDB.')
# Load sequences
self.sequences = [Sequence(self._phase, s, lmdb_env=lmdb_env_seq)
for s in self._db_sequences]
# Load sequences
videos = []
for seq, s in zip(self.sequences, self._db_sequences):
videos.append(s)
for _video in videos:
imagefile = sorted(glob.glob(os.path.join(
cfg_youtube.PATH.SEQUENCES_TRAIN, _video, '*.jpg')))
maskfile = sorted(glob.glob(os.path.join(
cfg_youtube.PATH.ANNOTATIONS_TRAIN, _video, '*.png')))
flowfile = sorted(glob.glob(os.path.join(
cfg_youtube.PATH.FLOW, _video, '*.png')))
edgefile = sorted(glob.glob(os.path.join(
cfg_youtube.PATH.ANNOTATIONS_TRAIN_EDGE, _video, '*.png')))
hedfile = sorted(glob.glob(os.path.join(
cfg_youtube.PATH.HED, _video, '*.jpg')))
self.imagefiles.extend(imagefile[:-1:10])
self.maskfiles.extend(maskfile[:-1:10])
self.flowfiles.extend(flowfile[::10])
self.edgefiles.extend(edgefile[:-1:10])
self.hedfiles.extend(hedfile[:-1:10])
print('images: ', len(self.imagefiles))
print('masks: ', len(self.maskfiles))
print('hed: ', len(self.hedfiles))
print('flow: ', len(self.flowfiles))
print('edge: ', len(self.edgefiles))
assert(len(self.imagefiles) == len(self.maskfiles) ==
len(self.flowfiles) == len(self.edgefiles) ==
len(self.hedfiles))
def load_davis(self, args):
self._db_sequences = db_read_sequences_davis(args.year, self._phase)
# Check lmdb existance. If not proceed with standard dataloader.
lmdb_env_seq_dir = osp.join(cfg_davis.PATH.DATA, 'lmdb_seq')
lmdb_env_annot_dir = osp.join(cfg_davis.PATH.DATA, 'lmdb_annot')
if osp.isdir(lmdb_env_seq_dir) and osp.isdir(lmdb_env_annot_dir):
lmdb_env_seq = lmdb.open(lmdb_env_seq_dir)
lmdb_env_annot = lmdb.open(lmdb_env_annot_dir)
else:
lmdb_env_seq = None
lmdb_env_annot = None
print('LMDB not found. This could affect the data loading time.'
' It is recommended to use LMDB.')
self.sequences = [Sequence(self._phase, s.name, lmdb_env=lmdb_env_seq)
for s in self._db_sequences]
self._db_sequences = db_read_sequences_davis(args.year, self._phase)
# Load annotations
self.annotations = [Annotation(
self._phase, s.name, self._single_object, lmdb_env=lmdb_env_annot)
for s in self._db_sequences]
self._db_sequences = db_read_sequences_davis(args.year, self._phase)
# Load Videos
videos = []
for seq, s in zip(self.sequences, self._db_sequences):
if s['set'] == self._phase:
videos.append(s['name'])
for _video in videos:
imagefile = sorted(glob.glob(os.path.join(
cfg_davis.PATH.SEQUENCES, _video, '*.jpg')))
maskfile = sorted(glob.glob(os.path.join(
cfg_davis.PATH.ANNOTATIONS, _video, '*.png')))
flowfile = sorted(glob.glob(os.path.join(
cfg_davis.PATH.FLOW, _video, '*.png')))
edgefile = sorted(glob.glob(os.path.join(
cfg_davis.PATH.ANNOTATIONS_EDGE, _video, '*.png')))
hedfile = sorted(glob.glob(os.path.join(
cfg_davis.PATH.HED, _video, '*.jpg')))
self.imagefiles.extend(imagefile[:-1])
self.maskfiles.extend(maskfile[:-1])
self.flowfiles.extend(flowfile)
self.edgefiles.extend(edgefile[:-1])
self.hedfiles.extend(hedfile[:-1])
print('images: ', len(self.imagefiles))
print('masks: ', len(self.maskfiles))
print('hed: ', len(self.hedfiles))
print('flow: ', len(self.flowfiles))
print('edge: ', len(self.edgefiles))
assert(len(self.imagefiles) == len(self.maskfiles) ==
len(self.flowfiles) == len(self.edgefiles) ==
len(self.hedfiles))
================================================
FILE: dataloader/hed_bsds.py
================================================
import os
from PIL import Image
from torch.utils import data
class HEDBSDSTrain(data.Dataset):
def __init__(self, root_dir, transform=None, target_transform=None):
self.transform = transform
self.target_transform = target_transform
train_pair = os.path.join(root_dir, 'train_pair.lst')
with open(train_pair) as f:
lines = f.readlines()
self.imagefiles = []
self.labelfiles = []
for line in lines:
splits = line.split()
self.imagefiles.append(os.path.join(root_dir, splits[0]))
self.labelfiles.append(os.path.join(root_dir, splits[1]))
def __len__(self):
return len(self.imagefiles)
def __getitem__(self, index):
imagefile = self.imagefiles[index]
labelfile = self.labelfiles[index]
image = Image.open(imagefile).convert('RGB')
label = Image.open(labelfile).convert('L')
if self.transform is not None:
image = self.transform(image)
if self.target_transform is not None:
label = self.target_transform(label)
return image, label
class HEDBSDSTest(data.Dataset):
def __init__(self, root_dir, transform=None):
self.transform = transform
train_pair = os.path.join(root_dir, 'test.lst')
with open(train_pair) as f:
lines = f.readlines()
self.imagefiles = []
for line in lines:
splits = line.split()
self.imagefiles.append(os.path.join(root_dir, splits[0]))
def __len__(self):
return len(self.imagefiles)
def __getitem__(self, index):
imagefile = self.imagefiles[index]
image = Image.open(imagefile).convert('RGB')
if self.transform is not None:
image = self.transform(image)
return image, imagefile
================================================
FILE: dataloader/youtubeVOS.py
================================================
#TODO
from collections import namedtuple
import os
import cv2
import numpy as np
from PIL import Image
from .base_youtube import Sequence, SequenceClip, Annotation, AnnotationClip, BaseLoader, Segmentation, SequenceClip_simple, AnnotationClip_simple
from misc.config_youtubeVOS import cfg,phase,db_read_sequences_train,db_read_sequences_val, db_read_sequences_test, db_read_sequences_trainval
import os.path as osp
import glob
import lmdb
from scipy.misc import imresize
from torch.utils import data
from torchvision import transforms
from dataloader import custom_transforms as tr
class YoutubeVOSLoader(data.Dataset):
def __init__(self,
args,
transform=None,
target_transform=None,
augment=False,
split = 'train',
inputRes = None):
self._phase = split
self._single_object = args.single_object
self._length_clip = args.length_clip
self.transform = transform
self.target_transform = target_transform
self.split = split
self.inputRes = inputRes
self.max_seq_len = args.gt_maxseqlen
self.dataset = args.dataset
self.flip = augment
if augment:
self.augment_transform = transforms.Compose([
tr.RandomHorizontalFlip(),
tr.ScaleNRotate(rots=(-args.rotation, args.rotation), scales=(.75, 1.25))])
else:
self.augment_transform = None
if self._phase == phase.TRAIN.value:
self._db_sequences = db_read_sequences_train()
elif self._phase == phase.VAL.value:
self._db_sequences = db_read_sequences_val()
elif self._phase == phase.TRAINVAL.value:
self._db_sequences = db_read_sequences_trainval()
else: #self._phase == 'test':
self._db_sequences = db_read_sequences_test()
# Check lmdb existance. If not proceed with standard dataloader.
lmdb_env_seq_dir = osp.join(cfg.PATH.DATA, 'lmdb_seq')
lmdb_env_annot_dir = osp.join(cfg.PATH.DATA, 'lmdb_annot')
if osp.isdir(lmdb_env_seq_dir) and osp.isdir(lmdb_env_annot_dir):
lmdb_env_seq = lmdb.open(lmdb_env_seq_dir)
lmdb_env_annot = lmdb.open(lmdb_env_annot_dir)
else:
lmdb_env_seq = None
lmdb_env_annot = None
print('LMDB not found. This could affect the data loading time. It is recommended to use LMDB.')
# Load sequences
self.sequences = [Sequence(self._phase, s, lmdb_env=lmdb_env_seq) for s in self._db_sequences]
# Load annotations
self.annotations = [Annotation(self._phase,s,self._single_object, lmdb_env=lmdb_env_annot) for s in self._db_sequences]
# Load sequences
self.videos = []
for seq, s in zip(self.sequences, self._db_sequences):
self.videos.append(s)
self.imagefiles = []
self.maskfiles = []
for _video in self.videos:
imagefiles = sorted(glob.glob(os.path.join(cfg.PATH.SEQUENCES_TRAIN, _video, '*.jpg')))
maskfiles = sorted(glob.glob(os.path.join(cfg.PATH.ANNOTATIONS_TRAIN, _video, '*.png')))
self.imagefiles.extend(imagefiles)
self.maskfiles.extend(maskfiles)
print('images: ', len(self.imagefiles))
print('masks: ', len(self.maskfiles))
def __len__(self):
return len(self.imagefiles)
def __getitem__(self, index):
imagefile = self.imagefiles[index]
maskfile = self.maskfiles[index]
image = Image.open(imagefile).convert('RGB')
mask = cv2.imread(maskfile, 0)
mask[mask > 0] = 255
mask = Image.fromarray(mask)
if self.inputRes is not None:
image = imresize(image, self.inputRes)
mask = imresize(mask, self.inputRes, interp='nearest')
sample = {'image': image, 'gt': mask}
if self.augment_transform is not None:
sample = self.augment_transform(sample)
image, mask = sample['image'], sample['gt']
if self.transform is not None:
image = self.transform(image)
if self.target_transform is not None:
mask = self.target_transform(mask)
return image, mask
================================================
FILE: dataset_lmdb_generator.py
================================================
# This script generates a data structure in the form of key-value storage. This is made in the huge amount of
# calls to the function os.listdir inside base_youtube.py
import os
import lmdb
from args import get_parser
class LMDBGenerator:
def __init__(self, ext='.jpg', gen_type='seq'):
self.ext = ext
self.gen_type = gen_type
def generate_lmdb_file(self, root_dir, frames_dir):
env = lmdb.open(os.path.join(root_dir, 'lmdb_' + self.gen_type))
root_in_dirs = os.listdir(frames_dir)
for d in root_in_dirs:
folder_dir = os.path.join(frames_dir, d)
_files_basename = sorted([f for f in os.listdir(folder_dir) if f.endswith(self.ext)])
files_str_vec = '|'.join(_files_basename)
print( "Generating lmdb for: " + folder_dir)
with env.begin(write=True) as txn:
txn.put(d.encode('ascii'), files_str_vec.encode())
if __name__ == "__main__":
parser = get_parser()
args = parser.parse_args()
if args.dataset == 'youtube':
from misc.config_youtubeVOS import cfg
frame_lmdb_generator_sequences = LMDBGenerator(ext='.jpg', gen_type='seq')
frame_lmdb_generator_sequences.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.SEQUENCES_TRAIN)
frame_lmdb_generator_sequences.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.SEQUENCES_TEST)
frame_lmdb_generator_annotations = LMDBGenerator(ext='.png', gen_type='annot')
frame_lmdb_generator_annotations.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.ANNOTATIONS_TRAIN)
frame_lmdb_generator_annotations.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.ANNOTATIONS_TEST)
else:
from misc.config import cfg
frame_lmdb_generator_sequences = LMDBGenerator(ext='.jpg', gen_type='seq')
frame_lmdb_generator_sequences.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.SEQUENCES)
frame_lmdb_generator_annotations = LMDBGenerator(ext='.png', gen_type='annot')
frame_lmdb_generator_annotations.generate_lmdb_file(cfg.PATH.DATA, cfg.PATH.ANNOTATIONS)
================================================
FILE: measures/__init__.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from DAVIS 2016 (Federico Perazzi)
# ----------------------------------------------------------------------------
from .jaccard import db_eval_iou
from .f_boundary import db_eval_boundary
from misc import log
try:
from .t_stability import db_eval_t_stab
except:
log.warning("Temporal stability not available")
from .statistics import _statistics
================================================
FILE: measures/f_boundary.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from DAVIS 2016 (Federico Perazzi)
# ----------------------------------------------------------------------------
import numpy as np
""" Utilities for computing, reading and saving benchmark evaluation."""
def db_eval_boundary(foreground_mask,gt_mask,bound_th=0.008):
"""
Compute mean,recall and decay from per-frame evaluation.
Calculates precision/recall for boundaries between foreground_mask and
gt_mask using morphological operators to speed it up.
Arguments:
foreground_mask (ndarray): binary segmentation image.
gt_mask (ndarray): binary annotated image.
Returns:
F (float): boundaries F-measure
P (float): boundaries precision
R (float): boundaries recall
"""
assert np.atleast_3d(foreground_mask).shape[2] == 1
bound_pix = bound_th if bound_th >= 1 else \
np.ceil(bound_th*np.linalg.norm(foreground_mask.shape))
# Get the pixel boundaries of both masks
fg_boundary = seg2bmap(foreground_mask);
gt_boundary = seg2bmap(gt_mask);
from skimage.morphology import binary_dilation,disk
fg_dil = binary_dilation(fg_boundary,disk(bound_pix))
gt_dil = binary_dilation(gt_boundary,disk(bound_pix))
# Get the intersection
gt_match = gt_boundary * fg_dil
fg_match = fg_boundary * gt_dil
# Area of the intersection
n_fg = np.sum(fg_boundary)
n_gt = np.sum(gt_boundary)
#% Compute precision and recall
if n_fg == 0 and n_gt > 0:
precision = 1
recall = 0
elif n_fg > 0 and n_gt == 0:
precision = 0
recall = 1
elif n_fg == 0 and n_gt == 0:
precision = 1
recall = 1
else:
precision = np.sum(fg_match)/float(n_fg)
recall = np.sum(gt_match)/float(n_gt)
# Compute F measure
if precision + recall == 0:
F = 0
else:
F = 2*precision*recall/(precision+recall);
return F
def seg2bmap(seg,width=None,height=None):
"""
From a segmentation, compute a binary boundary map with 1 pixel wide
boundaries. The boundary pixels are offset by 1/2 pixel towards the
origin from the actual segment boundary.
Arguments:
seg : Segments labeled from 1..k.
width : Width of desired bmap <= seg.shape[1]
height : Height of desired bmap <= seg.shape[0]
Returns:
bmap (ndarray): Binary boundary map.
David Martin <dmartin@eecs.berkeley.edu>
January 2003
"""
seg = seg.astype(np.bool)
seg[seg>0] = 1
assert np.atleast_3d(seg).shape[2] == 1
width = seg.shape[1] if width is None else width
height = seg.shape[0] if height is None else height
h,w = seg.shape[:2]
ar1 = float(width) / float(height)
ar2 = float(w) / float(h)
assert not (width>w | height>h | abs(ar1-ar2)>0.01),\
'Can''t convert %dx%d seg to %dx%d bmap.'%(w,h,width,height)
e = np.zeros_like(seg)
s = np.zeros_like(seg)
se = np.zeros_like(seg)
e[:,:-1] = seg[:,1:]
s[:-1,:] = seg[1:,:]
se[:-1,:-1] = seg[1:,1:]
b = seg^e | seg^s | seg^se
b[-1,:] = seg[-1,:]^e[-1,:]
b[:,-1] = seg[:,-1]^s[:,-1]
b[-1,-1] = 0
if w == width and h == height:
bmap = b
else:
bmap = np.zeros((height,width))
for x in range(w):
for y in range(h):
if b[y,x]:
j = 1+floor((y-1)+height / h)
i = 1+floor((x-1)+width / h)
bmap[j,i] = 1;
return bmap
================================================
FILE: measures/jaccard.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from DAVIS 2016 (Federico Perazzi)
# ----------------------------------------------------------------------------
""" Compute Jaccard Index. """
import numpy as np
import matplotlib.pyplot as plt
def db_eval_iou_multi(annotations, segmentations):
iou = 0.0
batch_size = annotations.shape[0]
for i in range(batch_size):
annotation = annotations[i, 0, :, :]
segmentation = segmentations[i, 0, :, :]
iou += db_eval_iou(annotation, segmentation)
iou /= batch_size
return iou
def db_eval_iou(annotation,segmentation):
""" Compute region similarity as the Jaccard Index.
Arguments:
annotation (ndarray): binary annotation map.
segmentation (ndarray): binary segmentation map.
Return:
jaccard (float): region similarity
"""
annotation = annotation > 0.5
segmentation = segmentation > 0.5
if np.isclose(np.sum(annotation), 0) and\
np.isclose(np.sum(segmentation), 0):
return 1
else:
return np.sum((annotation & segmentation)) / \
np.sum((annotation | segmentation), dtype=np.float32)
================================================
FILE: measures/statistics.py
================================================
import numpy as np
import warnings
def mean(X):
"""
Compute average ignoring NaN values.
"""
return np.nanmean(X)
def recall(X,threshold=0.5):
"""
Fraction of values of X scoring higher than 'threshold'
"""
return mean(np.array(X)>threshold)
def decay(X,n_bins=4):
"""
Performance loss over time.
"""
ids = np.round(np.linspace(1,len(X),n_bins+1)+1e-10)-1;
ids = ids.astype(np.uint8)
D_bins = [X[ids[i]:ids[i+1]+1] for i in range(0,4)]
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=RuntimeWarning)
D = np.nanmean(D_bins[0])-np.mean(D_bins[3])
return D
def std(X):
"""
Compute standard deviation.
"""
return np.std(X)
_statistics = {
'decay' : decay,
'mean' : mean,
'recall': recall,
'std' : std
}
def get(name):
return _statistics[name]
================================================
FILE: measures/t_stability.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from DAVIS 2016 (Federico Perazzi)
# ----------------------------------------------------------------------------
import sys
import numpy as np
import scipy.spatial.distance as ssd
from tstab import *
def get_bijective_pairs(pairs,costmat):
bij_pairs = bij_pairs_one_dim(pairs, costmat,0)
bij_pairs = bij_pairs_one_dim(bij_pairs, costmat.T,1)
return bij_pairs
def bij_pairs_one_dim(pairs, costmat, left_or_right):
bij_pairs = []
ids1 = np.unique(pairs[:,left_or_right])
for ii in range(len(ids1)):
curr_pairs = pairs[pairs[:,left_or_right]==ids1[ii],:].astype(np.int)
curr_costs = costmat[curr_pairs[:,left_or_right], curr_pairs[:,1-left_or_right]]
b = np.argmin(curr_costs)
bij_pairs.append(curr_pairs[b])
return np.array(bij_pairs)
def hist_cost_2(BH1,BH2):
nsamp1,nbins=BH1.shape
nsamp2,nbins=BH2.shape
eps = 2.2204e-16
BH1n = BH1 / (np.sum(BH1,axis=1,keepdims=True)+eps)
BH2n = BH2 / (np.sum(BH2,axis=1,keepdims=True)+eps)
tmp1 = np.tile(np.transpose(np.atleast_3d(BH1n),[0,2,1]),(1,nsamp2,1))
tmp2 = np.tile(np.transpose(np.atleast_3d(BH2n.T),[2,1,0]),(nsamp1,1,1))
HC = 0.5*np.sum((tmp1-tmp2)**2/(tmp1+tmp2+eps),axis=2)
return HC
def sc_compute(Bsamp,Tsamp,mean_dist,nbins_theta,nbins_r,r_inner,r_outer,out_vec):
in_vec = (out_vec==0).ravel()
nsamp = Bsamp.shape[1]
r_array=ssd.squareform(ssd.pdist(Bsamp.T)).T
theta_array_abs0=Bsamp[1,:].reshape(-1,1).dot(np.ones((1,nsamp))) - \
np.ones((nsamp,1)).dot(Bsamp[1,:].reshape(1,-1))
theta_array_abs1=Bsamp[0,:].reshape(-1,1).dot(np.ones((1,nsamp))) - \
np.ones((nsamp,1)).dot(Bsamp[0,:].reshape(1,-1))
theta_array_abs = np.arctan2(theta_array_abs0,theta_array_abs1).T
theta_array=theta_array_abs-Tsamp.T.dot(np.ones((1,nsamp)))
if mean_dist is None:
mean_dist = np.mean(r_array[in_vec].T[in_vec].T)
r_array_n = r_array / mean_dist
r_bin_edges=np.logspace(np.log10(r_inner),np.log10(r_outer),nbins_r)
r_array_q=np.zeros((nsamp,nsamp))
for m in range(int(nbins_r)):
r_array_q=r_array_q+(r_array_n<r_bin_edges[m])
fz = r_array_q > 0
theta_array_2 = np.fmod(np.fmod(theta_array,2*np.pi)+2*np.pi,2*np.pi)
theta_array_q = 1+np.floor(theta_array_2/(2*np.pi/nbins_theta))
nbins=nbins_theta*nbins_r
BH=np.zeros((nsamp,nbins))
count = 0
for n in range(nsamp):
fzn=fz[n]&in_vec
Sn = np.zeros((nbins_theta,nbins_r))
coords = np.hstack((theta_array_q[n,fzn].reshape(-1,1),
r_array_q[n,fzn].astype(np.int).reshape(-1,1)))
# SLOW...
#for i,j in coords:
#Sn[i-1,j-1] += 1
# FASTER
ids = np.ravel_multi_index((coords.T-1).astype(np.int),Sn.shape)
Sn = np.bincount(ids.ravel(),minlength = np.prod(Sn.shape)).reshape(Sn.shape)
BH[n,:] = Sn.T[:].ravel()
return BH.astype(np.int),mean_dist
def db_eval_t_stab(fgmask,ground_truth,timing=True):
"""
Calculates the temporal stability index between two masks
Arguments:
fgmask (ndarray): Foreground Object mask at frame t
ground_truth (ndarray): Foreground Object mask at frame t+1
Return:
T (ndarray): Temporal (in-)stability
raw_results (ndarray): Supplemental values
"""
cont_th = 3
cont_th_up = 3
# Shape context parameters
r_inner = 1.0/8.0
r_outer = 2.0
nbins_r = 5.0
nbins_theta = 12.0
poly1 = mask2poly(fgmask,cont_th)
poly2 = mask2poly(ground_truth,cont_th)
if len(poly1.contour_coords) == 0 or \
len(poly2.contour_coords) == 0:
return np.nan
Cs1 = get_longest_cont(poly1.contour_coords)
Cs2 = get_longest_cont(poly2.contour_coords)
upCs1 = contour_upsample(Cs1,cont_th_up)
upCs2 = contour_upsample(Cs2,cont_th_up)
scs1,_=sc_compute(upCs1.T,np.zeros((1,upCs1.shape[0])),None,
nbins_theta,nbins_r,r_inner,r_outer,np.zeros((1,upCs1.shape[0])))
scs2,_=sc_compute(upCs2.T,np.zeros((1,upCs2.shape[0])),None,
nbins_theta,nbins_r,r_inner,r_outer,np.zeros((1,upCs2.shape[0])))
# Match with the 0-0 alignment
costmat = hist_cost_2(scs1,scs2)
pairs ,max_sx,max_sy = match_dijkstra(np.ascontiguousarray(costmat))
# Shift costmat
costmat2 = np.roll(costmat ,-(max_sy+1),axis=1)
costmat2 = np.roll(costmat2,-(max_sx+1),axis=0)
# Redo again with the correct alignment
pairs,_,_ = match_dijkstra(costmat2)
# Put the pairs back to the original place
pairs[:,0] = np.mod(pairs[:,0]+max_sx+1, costmat.shape[0])
pairs[:,1] = np.mod(pairs[:,1]+max_sy+1, costmat.shape[1])
pairs = get_bijective_pairs(pairs,costmat)
pairs_cost = costmat[pairs[:,0], pairs[:,1]]
min_cost = np.average(pairs_cost)
return min_cost
================================================
FILE: misc/__init__.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from DAVIS 2016 (Federico Perazzi)
# ----------------------------------------------------------------------------
from .timer import Timer
from .logger import logging as log
from .config import phase,cfg,db_read_sequences
from .visual import overlay
================================================
FILE: misc/config.py
================================================
#!/usr/bin/env python
import yaml
import os.path as osp
import numpy as np
from easydict import EasyDict as edict
from enum import Enum
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from FAST-RCNN (Ross Girshick)
# ----------------------------------------------------------------------------
""" Configuration file."""
class phase(Enum):
TRAIN = 'train'
VAL = 'val'
TESTDEV = 'test-dev'
TRAINVAL = 'train-val'
__C = edict()
# Public access to configuration settings
cfg = __C
# Number of CPU cores used to parallelize evaluation.
__C.N_JOBS = 32
# Paths to dataset folders
__C.PATH = edict()
# Dataset resolution: ("480p","1080p")
__C.RESOLUTION="480p"
# Dataset year: ("2016","2017")
__C.YEAR = "2017"
__C.PHASE = phase.VAL
# Multiobject evaluation (Set to False only when evaluating DAVIS 2016)
__C.MULTIOBJECT = True
# Root folder of project
__C.PATH.ROOT = osp.abspath('.')
# Data folder
__C.PATH.DATA = osp.abspath('./data/DAVIS2017')
# Path to input images
__C.PATH.SEQUENCES = osp.join(__C.PATH.DATA, "JPEGImages", __C.RESOLUTION)
# Path to annotations
__C.PATH.ANNOTATIONS = osp.join(__C.PATH.DATA, "Annotations", __C.RESOLUTION)
__C.PATH.ANNOTATIONS_EDGE = osp.join(__C.PATH.DATA, "Annotations_edge",
__C.RESOLUTION)
__C.PATH.FLOW = osp.join(__C.PATH.DATA, 'davis2017-flow/')
__C.PATH.HED = osp.join(__C.PATH.DATA, 'davis2017-hed/')
# Color palette
__C.PATH.PALETTE = osp.abspath(osp.join(__C.PATH.ROOT, 'data/palette.txt'))
# Paths to files
__C.FILES = edict()
# Path to property file, holding information on evaluation sequences.
__C.FILES.DB_INFO = osp.abspath(osp.join(__C.PATH.ROOT,
"data/db_info.yaml"))
# Measures and Statistics
__C.EVAL = edict()
# Metrics: J: region similarity, F: contour accuracy, T: temporal stability
__C.EVAL.METRICS = ['J', 'F']
# Statistics computed for each of the metrics listed above
__C.EVAL.STATISTICS = ['mean', 'recall', 'decay']
def db_read_info():
""" Read dataset properties from file."""
with open(cfg.FILES.DB_INFO, 'r') as f:
return edict(yaml.load(f))
def db_read_attributes():
""" Read list of sequences. """
return db_read_info().attributes
def db_read_years():
""" Read list of sequences. """
return db_read_info().years
def db_read_sequences(year=None,db_phase=None):
""" Read list of sequences. """
sequences = db_read_info().sequences
if year is not None:
sequences = filter(
lambda s: int(s.year) <= int(year), sequences)
if db_phase is not None:
if db_phase == phase.TRAINVAL:
sequences = filter(lambda s: ((s.set == phase.VAL) or
(s.set == phase.TRAIN)), sequences)
else:
sequences = filter(lambda s: s.set == db_phase, sequences)
return sequences
# Load all sequences
__C.SEQUENCES = dict([(sequence.name, sequence)
for sequence in db_read_sequences()])
__C.palette = np.loadtxt(__C.PATH.PALETTE, dtype=np.uint8).reshape(-1, 3)
================================================
FILE: misc/config_youtubeVOS.py
================================================
#!/usr/bin/env python
import json
import os.path as osp
from easydict import EasyDict as edict
from enum import Enum
import numpy as np
""" Configuration file."""
class phase(Enum):
TRAIN = 'train'
VAL = 'val'
TESTDEV = 'test-dev'
TRAINVAL = 'trainval'
__C = edict()
# Public access to configuration settings
cfg = __C
# Number of CPU cores used to parallelize evaluation.
__C.N_JOBS = 32
# Paths to dataset folders
__C.PATH = edict()
__C.PHASE = phase.TRAIN
# Multiobject evaluation (Set to False only when evaluating DAVIS 2016)
__C.MULTIOBJECT = True
# Root folder of project
__C.PATH.ROOT = osp.abspath('.')
# Data folder
__C.PATH.DATA = osp.abspath('./data/YouTubeVOS_2018')
# Path to input images
__C.PATH.SEQUENCES_TRAIN = osp.join(__C.PATH.DATA, phase.TRAIN.value,
"JPEGImages")
__C.PATH.SEQUENCES_VAL = osp.join(__C.PATH.DATA, phase.TRAIN.value,
"JPEGImages")
__C.PATH.SEQUENCES_TRAINVAL = osp.join(__C.PATH.DATA, phase.TRAIN.value,
"JPEGImages")
__C.PATH.SEQUENCES_TEST = osp.join(__C.PATH.DATA, phase.VAL.value,
"JPEGImages")
# Path to annotations
__C.PATH.ANNOTATIONS_TRAIN = osp.join(__C.PATH.DATA, phase.TRAIN.value,
"Annotations")
__C.PATH.ANNOTATIONS_VAL = osp.join(__C.PATH.DATA, phase.TRAIN.value,
"Annotations")
__C.PATH.ANNOTATIONS_TRAINVAL = osp.join(__C.PATH.DATA, phase.TRAIN.value,
"Annotations")
__C.PATH.ANNOTATIONS_TEST = osp.join(__C.PATH.DATA, phase.VAL.value,
"Annotations")
__C.PATH.FLOW = osp.join(__C.PATH.DATA, 'YouTubeVOS_2018-flow/')
__C.PATH.HED = osp.join(__C.PATH.DATA, 'YouTubeVOS_2018-hed/')
__C.PATH.ANNOTATIONS_TRAIN_EDGE = osp.join(__C.PATH.DATA, phase.TRAIN.value,
"Annotations_edge")
__C.PATH.PALETTE = osp.abspath(osp.join(__C.PATH.ROOT, 'data/palette.txt'))
# Paths to files
__C.FILES = edict()
# Path to property file, holding information on evaluation sequences.
__C.FILES.DB_INFO_TRAIN = osp.abspath(
osp.join(__C.PATH.DATA, phase.TRAIN.value, "train-train-meta.json"))
__C.FILES.DB_INFO_VAL = osp.abspath(
osp.join(__C.PATH.DATA, phase.TRAIN.value, "train-val-meta.json"))
__C.FILES.DB_INFO_TRAINVAL = osp.abspath(
osp.join(__C.PATH.DATA, phase.TRAIN.value, "meta.json"))
__C.FILES.DB_INFO_TEST = osp.abspath(
osp.join(__C.PATH.DATA, phase.VAL.value, "meta.json"))
# Measures and Statistics
__C.EVAL = edict()
# Metrics: J: region similarity, F: contour accuracy, T: temporal stability
__C.EVAL.METRICS = ['J', 'F']
# Statistics computed for each of the metrics listed above
__C.EVAL.STATISTICS = ['mean', 'recall', 'decay']
def db_read_sequences_train():
""" Read list of sequences. """
json_data = open(__C.FILES.DB_INFO_TRAIN)
data = json.load(json_data)
sequences = data['videos'].keys()
return sequences
def db_read_sequences_val():
""" Read list of sequences. """
json_data = open(__C.FILES.DB_INFO_VAL)
data = json.load(json_data)
sequences = data['videos'].keys()
return sequences
def db_read_sequences_trainval():
""" Read list of sequences. """
json_data = open(__C.FILES.DB_INFO_TRAINVAL)
data = json.load(json_data)
sequences = data['videos'].keys()
return sequences
def db_read_sequences_test():
""" Read list of sequences. """
json_data = open(__C.FILES.DB_INFO_TEST)
data = json.load(json_data)
sequences = data['videos'].keys()
return sequences
# Load all sequences
__C.SEQUENCES_TRAIN = db_read_sequences_train()
__C.SEQUENCES_VAL = db_read_sequences_val()
__C.SEQUENCES_TRAINVAL = db_read_sequences_trainval()
__C.SEQUENCES_TEST = db_read_sequences_test()
__C.palette = np.loadtxt(__C.PATH.PALETTE, dtype=np.uint8).reshape(-1, 3)
================================================
FILE: misc/io_aux.py
================================================
from PIL import Image
import numpy as np
from misc.config import cfg
def imread_indexed(filename):
""" Load image given filename."""
im = Image.open(filename)
annotation = np.atleast_3d(im)[...,0]
return annotation,np.array(im.getpalette()).reshape((-1,3))
def imwrite_indexed(filename,array,color_palette=cfg.palette):
""" Save indexed png."""
if np.atleast_3d(array).shape[2] != 1:
raise Exception("Saving indexed PNGs requires 2D array.")
im = Image.fromarray(array)
im.putpalette(color_palette.ravel())
im.save(filename, format='PNG')
================================================
FILE: misc/logger.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from DAVIS 2016 (Federico Perazzi)
# ----------------------------------------------------------------------------
import logging
FORMAT = '[%(levelname)s]%(asctime)-15s %(message)s'
logging.basicConfig(
level=logging.INFO,format=FORMAT,datefmt='[%d-%m-%Y %H:%M:%S]')
================================================
FILE: misc/parallel.py
================================================
try:
from joblib import Parallel, delayed
except Exception as e:
print(e) #TODO
# if joblib does not exist just run it in a single thread
delayed = lambda x: x
def Parallel( *args, **kwargs ):
return list
# Allow pickling member functions
def _pickle_method(method):
func_name = method.__name__
obj = method.__self__
return _unpickle_method, (func_name, obj)
def _unpickle_method(func_name, obj):
try:
return obj.__getattribute__(func_name)
except AttributeError:
return None
#The copyreg module offers a way to define functions used while pickling specific objects
# Requires to install future module
import types
try:
import copyreg
except:
import six.moves.copyreg as copyreg
copyreg.pickle(types.MethodType, _pickle_method, _unpickle_method)
================================================
FILE: misc/timer.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# Adapted from FAST-RCNN (Ross Girshick)
# ----------------------------------------------------------------------------
"""
A simple wrapper to the built-in python timer.
"""
import time
class Timer(object):
"""docstring for Timer"""
def __init__(self):
super(Timer, self).__init__()
self._start = None
def tic(self):
self._start = time.time()
return self
def toc(self):
assert self._start != None,\
'Timer uninitialized. Call "toc()" first.'
return time.time() - self._start
================================================
FILE: misc/visual.py
================================================
# ----------------------------------------------------------------------------
# The 2017 DAVIS Challenge on Video Object Segmentation
#-----------------------------------------------------------------------------
# Copyright (c) 2017 Federico Perazzi
# Licensed under the BSD License [see LICENSE for details]
# Written by Federico Perazzi (federico@disneyresearch.com)
# ----------------------------------------------------------------------------
import skimage
import numpy as np
def overlay(image, mask, colors=[255, 0, 0], cscale=2, alpha=0.4):
""" Overlay segmentation on top of RGB image. """
colors = np.atleast_2d(colors) * cscale
im_overlay = image.copy()
object_ids = np.unique(mask)
for object_id in object_ids[1:]:
# Overlay color on binary mask
foreground = image*alpha + np.ones(image.shape)*(1-alpha) * np.array(colors[object_id])
binary_mask = mask == object_id
# Compose image
im_overlay[binary_mask] = foreground[binary_mask]
countours = skimage.morphology.binary.binary_dilation(binary_mask) - binary_mask
im_overlay[countours, :] = 0
return im_overlay.astype(image.dtype)
================================================
FILE: modules/MATNet.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
resnet_im = models.resnet101(pretrained=True)
self.conv1_1 = resnet_im.conv1
self.bn1_1 = resnet_im.bn1
self.relu_1 = resnet_im.relu
self.maxpool_1 = resnet_im.maxpool
self.res2_1 = resnet_im.layer1
self.res3_1 = resnet_im.layer2
self.res4_1 = resnet_im.layer3
self.res5_1 = resnet_im.layer4
resnet_fl = models.resnet101(pretrained=True)
self.conv1_2 = resnet_fl.conv1
self.bn1_2 = resnet_fl.bn1
self.relu_2 = resnet_fl.relu
self.maxpool_2 = resnet_fl.maxpool
self.res2_2 = resnet_fl.layer1
self.res3_2 = resnet_fl.layer2
self.res4_2 = resnet_fl.layer3
self.res5_2 = resnet_fl.layer4
self.gated_res2 = Gated(256*2)
self.gated_res3 = Gated(512*2)
self.gated_res4 = Gated(1024*2)
self.gated_res5 = Gated(2048*2)
self.coa_res3 = CoAttention(channel=512)
self.coa_res4 = CoAttention(channel=1024)
self.coa_res5 = CoAttention(channel=2048)
def forward_res2(self, f1, f2):
x1 = self.conv1_1(f1)
x1 = self.bn1_1(x1)
x1 = self.relu_1(x1)
x1 = self.maxpool_1(x1)
r2_1 = self.res2_1(x1)
x2 = self.conv1_2(f2)
x2 = self.bn1_2(x2)
x2 = self.relu_2(x2)
x2 = self.maxpool_2(x2)
r2_2 = self.res2_2(x2)
return r2_1, r2_2
def forward(self, f1, f2):
r2_1, r2_2 = self.forward_res2(f1, f2)
r2 = torch.cat([r2_1, r2_2], dim=1)
# res3
r3_1 = self.res3_1(r2_1)
r3_2 = self.res3_2(r2_2)
Za, Zb, Qa, Qb = self.coa_res3(r3_1, r3_2)
r3_1 = F.relu(Zb + r3_1)
r3_2 = F.relu(Qb + r3_2)
r3 = torch.cat([r3_1, r3_2], dim=1)
# res4
r4_1 = self.res4_1(r3_1)
r4_2 = self.res4_2(r3_2)
Za, Zb, Qa, Qb = self.coa_res4(r4_1, r4_2)
r4_1 = F.relu(Zb + r4_1)
r4_2 = F.relu(Qb + r4_2)
r4 = torch.cat([r4_1, r4_2], dim=1)
# res5
r5_1 = self.res5_1(r4_1)
r5_2 = self.res5_2(r4_2)
Za, Zb, Qa, Qb = self.coa_res5(r5_1, r5_2)
r5_1 = F.relu(Zb + r5_1)
r5_2 = F.relu(Qb + r5_2)
r5 = torch.cat([r5_1, r5_2], dim=1)
r5_gated = self.gated_res5(r5)
r4_gated = self.gated_res4(r4)
r3_gated = self.gated_res3(r3)
r2_gated = self.gated_res2(r2)
return r5_gated, r4_gated, r3_gated, r2_gated
class CoAttention(nn.Module):
def __init__(self, channel):
super(CoAttention, self).__init__()
d = channel // 16
self.proja = nn.Conv2d(channel, d, kernel_size=1)
self.projb = nn.Conv2d(channel, d, kernel_size=1)
self.bottolneck1 = nn.Sequential(
nn.Conv2d(channel, channel, kernel_size=1),
nn.BatchNorm2d(channel),
nn.ReLU(inplace=True),
)
self.bottolneck2 = nn.Sequential(
nn.Conv2d(channel, channel, kernel_size=1),
nn.BatchNorm2d(channel),
nn.ReLU(inplace=True),
)
self.proj1 = nn.Conv2d(channel, 1, kernel_size=1)
self.proj2 = nn.Conv2d(channel, 1, kernel_size=1)
self.bna = nn.BatchNorm2d(channel)
self.bnb = nn.BatchNorm2d(channel)
self.relu = nn.ReLU(inplace=True)
def forward(self, Qa, Qb):
# cascade 1
Qa_1, Qb_1 = self.forward_sa(Qa, Qb)
_, Zb = self.forward_co(Qa_1, Qb_1)
Pa = F.relu(Zb + Qa)
Pb = F.relu(Qb_1 + Qb)
# cascade 2
Qa_2, Qb_2 = self.forward_sa(Pa, Pb)
_, Zb = self.forward_co(Qa_2, Qb_2)
Pa = F.relu(Zb + Pa)
Pb = F.relu(Qb_2 + Pb)
# cascade 3
Qa_3, Qb_3 = self.forward_sa(Pa, Pb)
Za, Zb = self.forward_co(Qa_3, Qb_3)
Pa = F.relu(Zb + Pa)
Pb = F.relu(Qb_3 + Pb)
# cascade 4
Qa_4, Qb_4 = self.forward_sa(Pa, Pb)
Za, Zb = self.forward_co(Qa_4, Qb_4)
Pa = F.relu(Zb + Pa)
Pb = F.relu(Qb_4 + Pb)
# cascade 5
Qa_5, Qb_5 = self.forward_sa(Pa, Pb)
Za, Zb = self.forward_co(Qa_5, Qb_5)
return Za, Zb, Qa_5, Qb_5
def forward_sa(self, Qa, Qb):
Aa = self.proj1(Qa)
Ab = self.proj2(Qb)
n, c, h, w = Aa.shape
Aa = Aa.view(-1, h*w)
Ab = Ab.view(-1, h*w)
Aa = F.softmax(Aa)
Ab = F.softmax(Ab)
Aa = Aa.view(n, c, h, w)
Ab = Ab.view(n, c, h, w)
Qa_attened = Aa * Qa
Qb_attened = Ab * Qb
return Qa_attened, Qb_attened
def forward_co(self, Qa, Qb):
Qa_low = self.proja(Qa)
Qb_low = self.projb(Qb)
N, C, H, W = Qa_low.shape
Qa_low = Qa_low.view(N, C, H * W)
Qb_low = Qb_low.view(N, C, H * W)
Qb_low = torch.transpose(Qb_low, 1, 2)
L = torch.bmm(Qb_low, Qa_low)
Aa = F.tanh(L)
Ab = torch.transpose(Aa, 1, 2)
N, C, H, W = Qa.shape
Qa_ = Qa.view(N, C, H * W)
Qb_ = Qb.view(N, C, H * W)
Za = torch.bmm(Qb_, Aa)
Zb = torch.bmm(Qa_, Ab)
Za = Za.view(N, C, H, W)
Zb = Zb.view(N, C, H, W)
Za = F.normalize(Za)
Zb = F.normalize(Zb)
return Za, Zb
class Gated(nn.Module):
def __init__(self, channel, reduction=16):
super(Gated, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.excitation_1 = nn.Sequential(
nn.Linear(channel, channel // reduction),
nn.ReLU(inplace=True))
self.excitation_2 = nn.Sequential(
nn.Linear(channel // reduction, channel),
nn.Sigmoid()
)
self.global_attention = nn.Sequential(
nn.Linear(channel // reduction, 1),
nn.Sigmoid()
)
kernel_size = 7
self.spatial = BasicConv(1, 1, kernel_size, stride=1,
padding=(kernel_size-1) // 2, relu=False)
def forward(self, U):
# se layer
b, c, h, w = U.shape
S = self.avg_pool(U).view(b, c)
E_1 = self.excitation_1(S)
E_local = self.excitation_2(E_1).view(b, c, 1, 1)
U_se = E_local * U
# spatial layer
U_se_max = torch.max(U_se, 1)[0].unsqueeze(1)
SP_Att = self.spatial(U_se_max)
U_se_sp = SP_Att * U_se
# global layer
E_global = self.global_attention(E_1).view(b, 1, 1, 1)
V = E_global * U_se_sp
# residual layer
O = U + V
return O
class BasicConv(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride=1,
padding=0, dilation=1, groups=1, relu=True, bn=True,
bias=False):
super(BasicConv, self).__init__()
self.out_channels = out_planes
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size,
stride=stride, padding=padding,
dilation=dilation, groups=groups, bias=bias)
self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01,
affine=True) if bn else None
self.relu = nn.ReLU() if relu else None
def forward(self, x):
x = self.conv(x)
if self.bn is not None:
x = self.bn(x)
if self.relu is not None:
x = self.relu(x)
return x
class BoundaryModule(nn.Module):
def __init__(self, inchannel):
super(BoundaryModule, self).__init__()
self.bn1 = nn.BatchNorm2d(inchannel)
self.conv1 = nn.Conv2d(inchannel, 64, kernel_size=3, stride=1,
padding=1)
self.relu = nn.ReLU()
self.bn2 = nn.BatchNorm2d(64)
self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(64, 1, kernel_size=1, stride=1, padding=0)
self.sigmoid = nn.Sigmoid()
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
m.weight.data.normal_(0, 0.001)
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
def forward(self, x):
x = self.bn1(x)
x = self.conv1(x)
x = self.relu(x)
x = self.bn2(x)
x = self.conv2(x)
x = self.relu(x)
x = self.conv3(x)
return x
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
mdim = 256
self.GC = GC(4096+1, mdim)
self.convG1 = nn.Conv2d(mdim, mdim, kernel_size=3, padding=1)
self.convG2 = nn.Conv2d(mdim, mdim, kernel_size=3, padding=1)
self.RF4 = Refine(2048+1, mdim)
self.RF3 = Refine(1024+1, mdim)
self.RF2 = Refine(512+1, mdim)
self.pred5 = nn.Conv2d(mdim, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)
self.pred4 = nn.Conv2d(mdim, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)
self.pred3 = nn.Conv2d(mdim, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)
self.pred2 = nn.Conv2d(mdim, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)
self.concat = nn.Conv2d(4, 1, kernel_size=(3, 3), padding=(1, 1), stride=1)
self.bdry5 = BoundaryModule(4096)
self.bdry4 = BoundaryModule(2048)
self.bdry3 = BoundaryModule(1024)
self.bdry2 = BoundaryModule(512)
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
m.weight.data.normal_(0, 0.001)
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
def forward(self, r5, r4, r3, r2):
p5 = self.bdry5(r5)
p4 = self.bdry4(r4)
p3 = self.bdry3(r3)
p2 = self.bdry2(r2)
p2_up = F.interpolate(p2, size=(473, 473), mode='bilinear')
p3_up = F.interpolate(p3, size=(473, 473), mode='bilinear')
p4_up = F.interpolate(p4, size=(473, 473), mode='bilinear')
p5_up = F.interpolate(p5, size=(473, 473), mode='bilinear')
concat = torch.cat([p2_up, p3_up, p4_up, p5_up], dim=1)
p = self.concat(concat)
p2_up = torch.sigmoid(p2_up)
p3_up = torch.sigmoid(p3_up)
p4_up = torch.sigmoid(p4_up)
p5_up = torch.sigmoid(p5_up)
p = torch.sigmoid(p)
r5 = torch.cat((r5, p5), dim=1)
r4 = torch.cat((r4, p4), dim=1)
r3 = torch.cat((r3, p3), dim=1)
r2 = torch.cat((r2, p2), dim=1)
m = self.forward_mask(r5, r4, r3, r2)
return m, p, p2_up, p3_up, p4_up, p5_up
def forward_mask(self, x, r4, r3, r2):
x = self.GC(x)
r = self.convG1(F.relu(x))
r = self.convG2(F.relu(r))
m5 = x + r
m4 = self.RF4(r4, m5)
m3 = self.RF3(r3, m4)
m2 = self.RF2(r2, m3)
p2 = self.pred2(F.relu(m2))
p2_up = F.interpolate(p2, size=(473, 473), mode='bilinear')
p2_s = torch.sigmoid(p2_up)
return p2_s
class GC(nn.Module):
def __init__(self, inplanes, planes, kh=7, kw=7):
super(GC, self).__init__()
self.conv_l1 = nn.Conv2d(inplanes, 256, kernel_size=(kh, 1),
padding=(int(kh/2), 0))
self.conv_l2 = nn.Conv2d(256, planes, kernel_size=(1, kw),
padding=(0, int(kw/2)))
self.conv_r1 = nn.Conv2d(inplanes, 256, kernel_size=(1, kw),
padding=(0, int(kw/2)))
self.conv_r2 = nn.Conv2d(256, planes, kernel_size=(kh, 1),
padding=(int(kh/2), 0))
def forward(self, x):
x_l = self.conv_l2(self.conv_l1(x))
x_r = self.conv_r2(self.conv_r1(x))
x = x_l + x_r
return x
class AtrousBlock(nn.Module):
def __init__(self, inplanes, planes, rate, stride=1):
super(AtrousBlock, self).__init__()
self.conv = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
dilation=rate, padding=rate)
def forward(self, x):
return self.conv(x)
class PyramidDilationConv(nn.Module):
def __init__(self, inplanes, planes):
super(PyramidDilationConv, self).__init__()
rate = [3, 5, 7]
self.block0 = nn.Conv2d(inplanes, planes, kernel_size=1)
self.block1 = AtrousBlock(inplanes, planes, rate[0])
self.block2 = AtrousBlock(inplanes, planes, rate[1])
self.block3 = AtrousBlock(inplanes, planes, rate[2])
self.bn = nn.BatchNorm2d(planes*4)
def forward(self, x):
x0 = self.block0(x)
x1 = self.block1(x)
x2 = self.block1(x)
x3 = self.block1(x)
xx = torch.cat([x0, x1, x2, x3], dim=1)
xx = self.bn(xx)
return xx
class Refine(nn.Module):
def __init__(self, inplanes, planes, scale_factor=2):
super(Refine, self).__init__()
self.convFS1 = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1)
self.convFS2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
self.convFS3 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
self.convMM1 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
self.convMM2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
self.scale_factor = scale_factor
outplanes = int(planes / 4)
self.pdc = PyramidDilationConv(inplanes, outplanes)
def forward(self, f, pm):
s = self.pdc(f)
sr = self.convFS2(F.relu(s))
sr = self.convFS3(F.relu(sr))
s = s + sr
m = s + F.interpolate(pm, size=s.shape[2:4], mode='bilinear')
mr = self.convMM1(F.relu(m))
mr = self.convMM2(F.relu(mr))
m = m + mr
return m
================================================
FILE: modules/cbam.py
================================================
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
class BasicConv(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
super(BasicConv, self).__init__()
self.out_channels = out_planes
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None
self.relu = nn.ReLU() if relu else None
def forward(self, x):
x = self.conv(x)
if self.bn is not None:
x = self.bn(x)
if self.relu is not None:
x = self.relu(x)
return x
class Flatten(nn.Module):
def forward(self, x):
return x.view(x.size(0), -1)
class ChannelGate(nn.Module):
def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']):
super(ChannelGate, self).__init__()
self.gate_channels = gate_channels
self.mlp = nn.Sequential(
Flatten(),
nn.Linear(gate_channels, gate_channels // reduction_ratio),
nn.ReLU(),
nn.Linear(gate_channels // reduction_ratio, gate_channels)
)
self.pool_types = pool_types
def forward(self, x):
channel_att_sum = None
for pool_type in self.pool_types:
if pool_type=='avg':
avg_pool = F.avg_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
channel_att_raw = self.mlp( avg_pool )
elif pool_type=='max':
max_pool = F.max_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
channel_att_raw = self.mlp( max_pool )
elif pool_type=='lp':
lp_pool = F.lp_pool2d( x, 2, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
channel_att_raw = self.mlp( lp_pool )
elif pool_type=='lse':
# LSE pool only
lse_pool = logsumexp_2d(x)
channel_att_raw = self.mlp( lse_pool )
if channel_att_sum is None:
channel_att_sum = channel_att_raw
else:
channel_att_sum = channel_att_sum + channel_att_raw
scale = F.sigmoid( channel_att_sum ).unsqueeze(2).unsqueeze(3).expand_as(x)
return x * scale
def logsumexp_2d(tensor):
tensor_flatten = tensor.view(tensor.size(0), tensor.size(1), -1)
s, _ = torch.max(tensor_flatten, dim=2, keepdim=True)
outputs = s + (tensor_flatten - s).exp().sum(dim=2, keepdim=True).log()
return outputs
class ChannelPool(nn.Module):
def forward(self, x):
return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )
class SpatialGate(nn.Module):
def __init__(self):
super(SpatialGate, self).__init__()
kernel_size = 7
self.compress = ChannelPool()
self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)
def forward(self, x):
x_compress = self.compress(x)
x_out = self.spatial(x_compress)
scale = F.sigmoid(x_out) # broadcasting
return x * scale
class CBAM(nn.Module):
def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False):
super(CBAM, self).__init__()
self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)
self.no_spatial=no_spatial
if not no_spatial:
self.SpatialGate = SpatialGate()
def forward(self, x):
x_out = self.ChannelGate(x)
if not self.no_spatial:
x_out = self.SpatialGate(x_out)
return x_out
================================================
FILE: requirements.txt
================================================
easydict==1.9
lmdb==0.94
torchvision==0.2.2
opencv_contrib_python==4.0.0.21
scipy==1.2.1
six==1.12.0
torch==1.0.1.post2
pydensecrf==1.0rc2
tqdm==4.31.1
scikit_image==0.15.0
joblib==0.13.2
numpy==1.16.2
matplotlib==3.0.3
Pillow==8.1.1
PyYAML==5.1.2
skimage==0.0
================================================
FILE: test_MATNet.py
================================================
import torch
from torchvision import transforms
import os
import glob
from tqdm import tqdm
from PIL import Image
from scipy.misc import imresize
from modules.MATNet import Encoder, Decoder
from utils.utils import check_parallel
from utils.utils import load_checkpoint_epoch
def flip(x, dim):
if x.is_cuda:
return torch.index_select(x, dim, torch.arange(x.size(dim) - 1, -1, -1).long().cuda(0))
else:
return torch.index_select(x, dim, torch.arange(x.size(dim) - 1, -1, -1).long())
inputRes = (473, 473)
use_flip = True
to_tensor = transforms.ToTensor()
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
image_transforms = transforms.Compose([to_tensor, normalize])
model_name = 'MATNet' # specify the model name
epoch = 0 # specify the epoch number
davis_result_dir = './output/davis16'
encoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, load_args =\
load_checkpoint_epoch(model_name, epoch, True, False)
encoder = Encoder()
decoder = Decoder()
encoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict)
encoder.load_state_dict(encoder_dict)
decoder.load_state_dict(decoder_dict)
encoder.cuda()
decoder.cuda()
encoder.train(False)
decoder.train(False)
val_set = 'data/DAVIS2017/ImageSets/2016/val.txt'
with open(val_set) as f:
seqs = f.readlines()
seqs = [seq.strip() for seq in seqs]
for video in tqdm(seqs):
davis_root_dir = 'data/DAVIS2017/JPEGImages/480p'
davis_flow_dir = 'data/DAVIS2017/davis2017-flow'
image_dir = os.path.join(davis_root_dir, video)
flow_dir = os.path.join(davis_flow_dir, video)
imagefiles = sorted(glob.glob(os.path.join(image_dir, '*.jpg')))[:-1]
flowfiles = sorted(glob.glob(os.path.join(flow_dir, '*.png')))
with torch.no_grad():
for imagefile, flowfile in zip(imagefiles, flowfiles):
image = Image.open(imagefile).convert('RGB')
flow = Image.open(flowfile).convert('RGB')
width, height = image.size
image = imresize(image, inputRes)
flow = imresize(flow, inputRes)
image = image_transforms(image)
flow = image_transforms(flow)
image = image.unsqueeze(0)
flow = flow.unsqueeze(0)
image, flow = image.cuda(), flow.cuda()
r5, r4, r3, r2 = encoder(image, flow)
mask_pred, bdry_pred, p2, p3, p4, p5 = decoder(r5, r4, r3, r2)
if use_flip:
image_flip = flip(image, 3)
flow_flip = flip(flow, 3)
r5, r4, r3, r2 = encoder(image_flip, flow_flip)
mask_pred_flip, bdry_pred_flip, p2, p3, p4, p5 =\
decoder(r5, r4, r3, r2)
mask_pred_flip = flip(mask_pred_flip, 3)
bdry_pred_flip = flip(bdry_pred_flip, 3)
mask_pred = (mask_pred + mask_pred_flip) / 2.0
bdry_pred = (bdry_pred + bdry_pred_flip) / 2.0
mask_pred = mask_pred[0, 0, :, :]
mask_pred = Image.fromarray(mask_pred.cpu().detach().numpy() * 255).convert('L')
save_folder = '{}/{}_epoch{}/{}'.format(davis_result_dir,
model_name, epoch, video)
if not os.path.exists(save_folder):
os.makedirs(save_folder)
save_file = os.path.join(save_folder,
os.path.basename(imagefile)[:-4] + '.png')
mask_pred = mask_pred.resize((width, height))
mask_pred.save(save_file)
================================================
FILE: train_MATNet.py
================================================
import torch
from torch.utils import data
from torchvision import transforms
import os
import sys
import time
import random
import numpy as np
from modules.MATNet import Encoder, Decoder
from args import get_parser
from utils.utils import get_optimizer
from utils.utils import make_dir, check_parallel
from dataloader.dataset_utils import get_dataset_davis_youtube_ehem
from utils.utils import save_checkpoint_epoch, load_checkpoint_epoch
from utils.objectives import WeightedBCE2d
from measures.jaccard import db_eval_iou_multi
def init_dataloaders(args):
loaders = {}
# init dataloaders for training and validation
for split in ['train', 'val']:
batch_size = args.batch_size
to_tensor = transforms.ToTensor()
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
image_transforms = transforms.Compose([to_tensor, normalize])
target_transforms = transforms.Compose([to_tensor])
dataset = get_dataset_davis_youtube_ehem(
args, split=split, image_transforms=image_transforms,
target_transforms=target_transforms,
augment=args.augment and split == 'train',
inputRes=(473, 473))
shuffle = True if split == 'train' else False
loaders[split] = data.DataLoader(dataset,
batch_size=batch_size,
shuffle=shuffle,
num_workers=args.num_workers,
drop_last=True)
return loaders
def trainIters(args):
print(args)
model_dir = os.path.join('ckpt/', args.model_name)
make_dir(model_dir)
epoch_resume = 0
if args.resume:
encoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, load_args = \
load_checkpoint_epoch(args.model_name, args.epoch_resume,
args.use_gpu)
epoch_resume = args.epoch_resume
encoder = Encoder()
decoder = Decoder()
encoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict)
encoder.load_state_dict(encoder_dict)
decoder.load_state_dict(decoder_dict)
else:
encoder = Encoder()
decoder = Decoder()
criterion = WeightedBCE2d()
if args.use_gpu:
encoder.cuda()
decoder.cuda()
criterion.cuda()
encoder_params = list(encoder.parameters())
decoder_params = list(decoder.parameters())
dec_opt = get_optimizer(args.optim, args.lr, decoder_params,
args.weight_decay)
enc_opt = get_optimizer(args.optim_cnn, args.lr_cnn, encoder_params,
args.weight_decay_cnn)
loaders = init_dataloaders(args)
best_iou = 0
start = time.time()
for e in range(epoch_resume, args.max_epoch):
print("Epoch", e)
epoch_losses = {'train': {'total': [], 'iou': [],
'mask_loss': [], 'bdry_loss': []},
'val': {'total': [], 'iou': [],
'mask_loss': [], 'bdry_loss': []}}
for split in ['train', 'val']:
if split == 'train':
encoder.train(True)
decoder.train(True)
else:
encoder.train(False)
decoder.train(False)
for batch_idx, (image, flow, mask, bdry, negative_pixels) in\
enumerate(loaders[split]):
image, flow, mask, bdry, negative_pixels = \
image.cuda(), flow.cuda(), mask.cuda(), bdry.cuda(),\
negative_pixels.cuda()
if split == 'train':
r5, r4, r3, r2 = encoder(image, flow)
mask_pred, p1, p2, p3, p4, p5 = decoder(r5, r4, r3, r2)
mask_loss = criterion(mask_pred, mask, negative_pixels)
bdry_loss = criterion(p1, bdry, negative_pixels) + \
criterion(p2, bdry, negative_pixels) + \
criterion(p3, bdry, negative_pixels) + \
criterion(p4, bdry, negative_pixels) + \
criterion(p5, bdry, negative_pixels)
loss = mask_loss + 0.2 * bdry_loss
iou = db_eval_iou_multi(mask.cpu().detach().numpy(),
mask_pred.cpu().detach().numpy())
dec_opt.zero_grad()
enc_opt.zero_grad()
loss.backward()
enc_opt.step()
dec_opt.step()
else:
with torch.no_grad():
r5, r4, r3, r2 = encoder(image, flow)
mask_pred, p1, p2, p3, p4, p5 = decoder(r5, r4, r3, r2)
mask_loss = criterion(mask_pred, mask, negative_pixels)
bdry_loss = criterion(p1, bdry, negative_pixels) + \
criterion(p2, bdry, negative_pixels) + \
criterion(p3, bdry, negative_pixels) + \
criterion(p4, bdry, negative_pixels) + \
criterion(p5, bdry, negative_pixels)
loss = mask_loss + 0.2 * bdry_loss
iou = db_eval_iou_multi(mask.cpu().detach().numpy(),
mask_pred.cpu().detach().numpy())
epoch_losses[split]['total'].append(loss.data.item())
epoch_losses[split]['mask_loss'].append(mask_loss.data.item())
epoch_losses[split]['bdry_loss'].append(bdry_loss.data.item())
epoch_losses[split]['iou'].append(iou)
if (batch_idx + 1) % args.print_every == 0:
mt = np.mean(epoch_losses[split]['total'])
mmask = np.mean(epoch_losses[split]['mask_loss'])
mbdry = np.mean(epoch_losses[split]['bdry_loss'])
miou = np.mean(epoch_losses[split]['iou'])
te = time.time() - start
print('Epoch: [{}/{}][{}/{}]\tTime {:.3f}s\tLoss: {:.4f}'
'\tMask Loss: {:.4f}\tBdry Loss: {:.4f}'
'\tIOU: {:.4f}'.format(e, args.max_epoch, batch_idx,
len(loaders[split]), te, mt,
mmask, mbdry, miou))
start = time.time()
miou = np.mean(epoch_losses['val']['iou'])
if miou > best_iou:
best_iou = miou
save_checkpoint_epoch(args, encoder, decoder,
enc_opt, dec_opt, e, False)
if __name__ == '__main__':
parser = get_parser()
args = parser.parse_args()
torch.manual_seed(args.seed)
random.seed(args.seed)
args.model_name = 'MATNet'
args.batch_size = 2
args.max_epoch = 25
args.year = '2016'
gpu_id = args.gpu_id
print('gpu_id: ', gpu_id)
print('use_gpu: ', args.use_gpu)
if args.use_gpu:
torch.cuda.set_device(device=gpu_id)
torch.cuda.manual_seed(args.seed)
trainIters(args)
================================================
FILE: utils/__init__.py
================================================
================================================
FILE: utils/objectives.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
class WeightedBCE2d(nn.Module):
def __init__(self):
super(WeightedBCE2d, self).__init__()
def forward(self, input, target, negative_pixels):
log_p = input.transpose(1, 2).transpose(2, 3).contiguous().view(1, -1)
target_t = target.transpose(1, 2).transpose(2, 3).contiguous().view(1, -1)
negative_pixels_t = negative_pixels.transpose(1, 2).transpose(2, 3).contiguous().view(1, -1)
pos_index = (target_t > 0)
hard_negative_index = (negative_pixels_t > 0)
easy_negative_index = (negative_pixels_t == 0)
pos_index = pos_index.data.cpu().numpy().astype(bool)
easy_negative_index = easy_negative_index.data.cpu().numpy().astype(bool)
hard_negative_index = hard_negative_index.data.cpu().numpy().astype(bool)
hard_negative_index_ = hard_negative_index.nonzero()
weight = torch.Tensor(log_p.size()).fill_(0)
weight = weight.numpy()
weight[pos_index] = 1.0
weight[easy_negative_index] = 1.0
weight[hard_negative_index] = 1.0 + negative_pixels_t[hard_negative_index_].cpu().numpy()
weight = torch.from_numpy(weight)
weight = weight.cuda()
loss = F.binary_cross_entropy(log_p, target_t, weight, size_average=True)
return loss
================================================
FILE: utils/utils.py
================================================
import torch
import os
import pickle
from collections import OrderedDict
def make_dir(dir):
if not os.path.exists(dir):
os.makedirs(dir)
def check_parallel(encoder_dict, decoder_dict):
trained_parallel = False
for k, v in encoder_dict.items():
if k[:7] == "module.":
trained_parallel = True
break
if trained_parallel:
# create new OrderedDict that does not contain "module."
new_encoder_state_dict = OrderedDict()
new_decoder_state_dict = OrderedDict()
for k, v in encoder_dict.items():
name = k[7:] # remove "module."
new_encoder_state_dict[name] = v
for k, v in decoder_dict.items():
name = k[7:] # remove "module."
new_decoder_state_dict[name] = v
encoder_dict = new_encoder_state_dict
decoder_dict = new_decoder_state_dict
return encoder_dict, decoder_dict
def get_base_params(args, model):
b = []
b.append(model.conv1)
b.append(model.bn1)
b.append(model.res2)
b.append(model.res3)
b.append(model.res4)
b.append(model.res5)
for i in range(len(b)):
for j in b[i].modules():
jj = 0
for k in j.parameters():
jj+=1
if k.requires_grad:
yield k
def get_skip_params(model):
b = []
b.append(model.sk2.parameters())
b.append(model.sk3.parameters())
b.append(model.sk4.parameters())
b.append(model.sk5.parameters())
b.append(model.bn2.parameters())
b.append(model.bn3.parameters())
b.append(model.bn4.parameters())
b.append(model.bn5.parameters())
for j in range(len(b)):
for i in b[j]:
yield i
def merge_params(params):
for j in range(len(params)):
for i in params[j]:
yield i
def get_optimizer(optim_name, lr, parameters, weight_decay=0, momentum=0.9):
if optim_name == 'sgd':
opt = torch.optim.SGD(filter(lambda p: p.requires_grad, parameters),
lr=lr, weight_decay=weight_decay,
momentum=momentum)
elif optim_name == 'adam':
opt = torch.optim.Adam(filter(lambda p: p.requires_grad, parameters),
lr=lr, weight_decay=weight_decay)
return opt
def save_checkpoint_epoch(args, encoder, decoder, enc_opt, dec_opt, epoch, best=False):
torch.save(encoder.state_dict(), os.path.join('ckpt', args.model_name, 'encoder_{}.pt'.format(epoch)))
torch.save(decoder.state_dict(), os.path.join('ckpt', args.model_name, 'decoder_{}.pt'.format(epoch)))
torch.save(enc_opt.state_dict(), os.path.join('ckpt', args.model_name, 'enc_opt_{}.pt'.format(epoch)))
torch.save(dec_opt.state_dict(), os.path.join('ckpt', args.model_name, 'dec_opt_{}.pt'.format(epoch)))
if best:
torch.save(encoder.state_dict(), os.path.join('ckpt', args.model_name, 'encoder.pt'))
torch.save(decoder.state_dict(), os.path.join('ckpt', args.model_name, 'decoder.pt'))
torch.save(enc_opt.state_dict(), os.path.join('ckpt', args.model_name, 'enc_opt.pt'))
torch.save(dec_opt.state_dict(), os.path.join('ckpt', args.model_name, 'dec_opt.pt'))
# save parameters for future use
pickle.dump(args, open(os.path.join('ckpt', args.model_name, 'args.pkl'), 'wb'))
def load_checkpoint_epoch(model_name, epoch, use_gpu=True, load_opt=True):
if use_gpu:
encoder_dict = torch.load(os.path.join('ckpt', model_name, 'encoder_{}.pt'.format(epoch)))
decoder_dict = torch.load(os.path.join('ckpt', model_name, 'decoder_{}.pt'.format(epoch)))
if load_opt:
enc_opt_dict = torch.load(os.path.join('ckpt', model_name, 'enc_opt_{}.pt'.format(epoch)))
dec_opt_dict = torch.load(os.path.join('ckpt', model_name, 'dec_opt_{}.pt'.format(epoch)))
else:
encoder_dict = torch.load(os.path.join('ckpt', model_name, 'encoder_{}.pt'.format(epoch)), map_location=lambda storage, location: storage)
decoder_dict = torch.load(os.path.join('ckpt', model_name, 'decoder_{}.pt'.format(epoch)), map_location=lambda storage, location: storage)
enc_opt_dict = torch.load(os.path.join('ckpt', model_name, 'enc_opt_{}.pt'.format(epoch)), map_location=lambda storage, location: storage)
dec_opt_dict = torch.load(os.path.join('ckpt', model_name, 'dec_opt_{}.pt'.format(epoch)), map_location=lambda storage, location: storage)
# save parameters for future use
if load_opt:
args = pickle.load(open(os.path.join('ckpt', model_name, 'args.pkl'), 'rb'))
return encoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, args
else:
return encoder_dict, decoder_dict, None, None, None
gitextract_05390pb8/
├── .gitignore
├── .idea/
│ ├── aaai20-zero-ShotVOS-github.iml
│ ├── inspectionProfiles/
│ │ └── Project_Default.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── vcs.xml
│ └── workspace.xml
├── 3rdparty/
│ ├── run_davis.py
│ ├── run_davis2017.m
│ ├── run_davis_flow.py
│ ├── run_youtube.m
│ ├── run_youtube.py
│ ├── run_youtube_flow.py
│ └── seg2edge.m
├── README.md
├── apply_densecrf_davis.py
├── args.py
├── data/
│ ├── db_info.yaml
│ ├── palette.txt
│ ├── run_davis2017.m
│ ├── run_youtube.m
│ └── seg2edge.m
├── dataloader/
│ ├── base.py
│ ├── base_youtube.py
│ ├── custom_transforms.py
│ ├── dataset_utils.py
│ ├── davis2017.py
│ ├── davis2017_youtubevos_ehem.py
│ ├── hed_bsds.py
│ └── youtubeVOS.py
├── dataset_lmdb_generator.py
├── measures/
│ ├── __init__.py
│ ├── f_boundary.py
│ ├── jaccard.py
│ ├── statistics.py
│ └── t_stability.py
├── misc/
│ ├── __init__.py
│ ├── config.py
│ ├── config_youtubeVOS.py
│ ├── io_aux.py
│ ├── logger.py
│ ├── parallel.py
│ ├── timer.py
│ └── visual.py
├── modules/
│ ├── MATNet.py
│ └── cbam.py
├── requirements.txt
├── test_MATNet.py
├── train_MATNet.py
└── utils/
├── __init__.py
├── objectives.py
└── utils.py
SYMBOL INDEX (191 symbols across 31 files)
FILE: 3rdparty/run_davis.py
function main (line 8) | def main():
FILE: 3rdparty/run_davis_flow.py
function main (line 13) | def main():
function run (line 36) | def run(imagefile1, imagefile2, save_file):
FILE: 3rdparty/run_youtube.py
function main (line 8) | def main():
FILE: 3rdparty/run_youtube_flow.py
function main (line 14) | def main():
function run (line 37) | def run(imagefile1, imagefile2, save_file):
FILE: apply_densecrf_davis.py
function sigmoid (line 15) | def sigmoid(x):
FILE: args.py
function get_parser (line 4) | def get_parser():
FILE: dataloader/base.py
function _load_annotation (line 24) | def _load_annotation(filename, single_object):
function _get_num_objects (line 35) | def _get_num_objects(annotation):
class BaseLoader (line 53) | class BaseLoader(ImageCollection):
method __init__ (line 64) | def __init__(self, split, path, regex, load_func=None, lmdb_env=None):
method __str__ (line 81) | def __str__(self):
class Sequence (line 86) | class Sequence(BaseLoader):
method __init__ (line 96) | def __init__(self, split, name, regex="*.jpg", lmdb_env=None):
class SequenceClip_simple (line 103) | class SequenceClip_simple:
method __init__ (line 113) | def __init__(self, seq, starting_frame):
method __str__ (line 118) | def __str__(self):
class SequenceClip (line 125) | class SequenceClip(BaseLoader):
method __init__ (line 135) | def __init__(self, split, name, starting_frame, regex="*.jpg",
method __str__ (line 142) | def __str__(self):
class Segmentation (line 150) | class Segmentation(BaseLoader):
method __init__ (line 162) | def __init__(self,split,path,single_object,regex="*.png", lmdb_env=None):
method iter_objects_id (line 168) | def iter_objects_id(self):
method iter_objects (line 175) | def iter_objects(self):
class Annotation (line 184) | class Annotation(Segmentation):
method __init__ (line 196) | def __init__(self,split,name,single_object,regex="*.png", lmdb_env=None):
class AnnotationClip_simple (line 202) | class AnnotationClip_simple:
method __init__ (line 213) | def __init__(self, annot, starting_frame):
class AnnotationClip (line 218) | class AnnotationClip(Segmentation):
method __init__ (line 230) | def __init__(self,split,name,starting_frame,single_object,regex="*.png...
FILE: dataloader/base_youtube.py
function _load_annotation (line 16) | def _load_annotation(filename,single_object):
function _get_num_objects (line 26) | def _get_num_objects(annotation):
class BaseLoader (line 43) | class BaseLoader(ImageCollection):
method __init__ (line 55) | def __init__(self,split,path,regex,load_func=None, lmdb_env=None):
method __str__ (line 85) | def __str__(self):
class Sequence (line 89) | class Sequence(BaseLoader):
method __init__ (line 100) | def __init__(self,split,name,regex="*.jpg", lmdb_env=None):
class SequenceClip_simple (line 115) | class SequenceClip_simple:
method __init__ (line 125) | def __init__(self, seq, starting_frame):
method __str__ (line 132) | def __str__(self):
class SequenceClip (line 136) | class SequenceClip(BaseLoader):
method __init__ (line 147) | def __init__(self,split,name,starting_frame,regex="*.jpg", lmdb_env=No...
method __str__ (line 162) | def __str__(self):
class Segmentation (line 166) | class Segmentation(BaseLoader):
method __init__ (line 178) | def __init__(self,split,path,single_object,regex="*.png", lmdb_env=None):
method iter_objects_id (line 184) | def iter_objects_id(self):
method iter_objects (line 191) | def iter_objects(self):
class Annotation (line 200) | class Annotation(Segmentation):
method __init__ (line 212) | def __init__(self,split,name,single_object,regex="*.png", lmdb_env=None):
class AnnotationClip_simple (line 227) | class AnnotationClip_simple:
method __init__ (line 238) | def __init__(self, annot, starting_frame):
class AnnotationClip (line 245) | class AnnotationClip(Segmentation):
method __init__ (line 257) | def __init__(self,split,name,starting_frame,single_object,regex="*.png...
FILE: dataloader/custom_transforms.py
class ScaleNRotate (line 8) | class ScaleNRotate(object):
method __init__ (line 17) | def __init__(self, rots=(-30, 30), scales=(.75, 1.25)):
method __call__ (line 22) | def __call__(self, sample):
class Resize (line 58) | class Resize(object):
method __init__ (line 63) | def __init__(self, scales=[0.5, 0.8, 1]):
method __call__ (line 66) | def __call__(self, sample):
class RandomHorizontalFlip (line 88) | class RandomHorizontalFlip(object):
method __call__ (line 91) | def __call__(self, sample):
class ToTensor (line 103) | class ToTensor(object):
method __call__ (line 106) | def __call__(self, sample):
FILE: dataloader/dataset_utils.py
function get_dataset_davis_youtube_ehem (line 1) | def get_dataset_davis_youtube_ehem(args, split, image_transforms=None,
FILE: dataloader/davis2017.py
class DAVISLoader (line 21) | class DAVISLoader(data.Dataset):
method __init__ (line 26) | def __init__(self, args, split, inputRes, augment=False,
method __len__ (line 94) | def __len__(self):
method __getitem__ (line 98) | def __getitem__(self, index):
FILE: dataloader/davis2017_youtubevos_ehem.py
class DAVISLoader (line 24) | class DAVISLoader(data.Dataset):
method __init__ (line 29) | def __init__(self, args, split, inputRes, augment=False,
method __len__ (line 60) | def __len__(self):
method __getitem__ (line 63) | def __getitem__(self, index):
method load_youtubevos (line 123) | def load_youtubevos(self, args):
method load_davis (line 176) | def load_davis(self, args):
FILE: dataloader/hed_bsds.py
class HEDBSDSTrain (line 6) | class HEDBSDSTrain(data.Dataset):
method __init__ (line 7) | def __init__(self, root_dir, transform=None, target_transform=None):
method __len__ (line 23) | def __len__(self):
method __getitem__ (line 27) | def __getitem__(self, index):
class HEDBSDSTest (line 43) | class HEDBSDSTest(data.Dataset):
method __init__ (line 44) | def __init__(self, root_dir, transform=None):
method __len__ (line 56) | def __len__(self):
method __getitem__ (line 59) | def __getitem__(self, index):
FILE: dataloader/youtubeVOS.py
class YoutubeVOSLoader (line 22) | class YoutubeVOSLoader(data.Dataset):
method __init__ (line 23) | def __init__(self,
method __len__ (line 93) | def __len__(self):
method __getitem__ (line 96) | def __getitem__(self, index):
FILE: dataset_lmdb_generator.py
class LMDBGenerator (line 10) | class LMDBGenerator:
method __init__ (line 11) | def __init__(self, ext='.jpg', gen_type='seq'):
method generate_lmdb_file (line 15) | def generate_lmdb_file(self, root_dir, frames_dir):
FILE: measures/f_boundary.py
function db_eval_boundary (line 14) | def db_eval_boundary(foreground_mask,gt_mask,bound_th=0.008):
function seg2bmap (line 71) | def seg2bmap(seg,width=None,height=None):
FILE: measures/jaccard.py
function db_eval_iou_multi (line 16) | def db_eval_iou_multi(annotations, segmentations):
function db_eval_iou (line 30) | def db_eval_iou(annotation,segmentation):
FILE: measures/statistics.py
function mean (line 4) | def mean(X):
function recall (line 11) | def recall(X,threshold=0.5):
function decay (line 17) | def decay(X,n_bins=4):
function std (line 32) | def std(X):
function get (line 45) | def get(name):
FILE: measures/t_stability.py
function get_bijective_pairs (line 15) | def get_bijective_pairs(pairs,costmat):
function bij_pairs_one_dim (line 20) | def bij_pairs_one_dim(pairs, costmat, left_or_right):
function hist_cost_2 (line 33) | def hist_cost_2(BH1,BH2):
function sc_compute (line 48) | def sc_compute(Bsamp,Tsamp,mean_dist,nbins_theta,nbins_r,r_inner,r_outer...
function db_eval_t_stab (line 99) | def db_eval_t_stab(fgmask,ground_truth,timing=True):
FILE: misc/config.py
class phase (line 21) | class phase(Enum):
function db_read_info (line 89) | def db_read_info():
function db_read_attributes (line 95) | def db_read_attributes():
function db_read_years (line 100) | def db_read_years():
function db_read_sequences (line 105) | def db_read_sequences(year=None,db_phase=None):
FILE: misc/config_youtubeVOS.py
class phase (line 11) | class phase(Enum):
function db_read_sequences_train (line 91) | def db_read_sequences_train():
function db_read_sequences_val (line 101) | def db_read_sequences_val():
function db_read_sequences_trainval (line 111) | def db_read_sequences_trainval():
function db_read_sequences_test (line 121) | def db_read_sequences_test():
FILE: misc/io_aux.py
function imread_indexed (line 6) | def imread_indexed(filename):
function imwrite_indexed (line 14) | def imwrite_indexed(filename,array,color_palette=cfg.palette):
FILE: misc/parallel.py
function Parallel (line 7) | def Parallel( *args, **kwargs ):
function _pickle_method (line 11) | def _pickle_method(method):
function _unpickle_method (line 16) | def _unpickle_method(func_name, obj):
FILE: misc/timer.py
class Timer (line 16) | class Timer(object):
method __init__ (line 18) | def __init__(self):
method tic (line 22) | def tic(self):
method toc (line 26) | def toc(self):
FILE: misc/visual.py
function overlay (line 13) | def overlay(image, mask, colors=[255, 0, 0], cscale=2, alpha=0.4):
FILE: modules/MATNet.py
class Encoder (line 7) | class Encoder(nn.Module):
method __init__ (line 8) | def __init__(self):
method forward_res2 (line 42) | def forward_res2(self, f1, f2):
method forward (line 57) | def forward(self, f1, f2):
class CoAttention (line 96) | class CoAttention(nn.Module):
method __init__ (line 97) | def __init__(self, channel):
method forward (line 123) | def forward(self, Qa, Qb):
method forward_sa (line 158) | def forward_sa(self, Qa, Qb):
method forward_co (line 177) | def forward_co(self, Qa, Qb):
class Gated (line 207) | class Gated(nn.Module):
method __init__ (line 208) | def __init__(self, channel, reduction=16):
method forward (line 231) | def forward(self, U):
class BasicConv (line 254) | class BasicConv(nn.Module):
method __init__ (line 255) | def __init__(self, in_planes, out_planes, kernel_size, stride=1,
method forward (line 267) | def forward(self, x):
class BoundaryModule (line 276) | class BoundaryModule(nn.Module):
method __init__ (line 277) | def __init__(self, inchannel):
method _initialize_weights (line 293) | def _initialize_weights(self):
method forward (line 306) | def forward(self, x):
class Decoder (line 318) | class Decoder(nn.Module):
method __init__ (line 319) | def __init__(self):
method _initialize_weights (line 343) | def _initialize_weights(self):
method forward (line 356) | def forward(self, r5, r4, r3, r2):
method forward_mask (line 385) | def forward_mask(self, x, r4, r3, r2):
class GC (line 401) | class GC(nn.Module):
method __init__ (line 402) | def __init__(self, inplanes, planes, kh=7, kw=7):
method forward (line 413) | def forward(self, x):
class AtrousBlock (line 420) | class AtrousBlock(nn.Module):
method __init__ (line 421) | def __init__(self, inplanes, planes, rate, stride=1):
method forward (line 427) | def forward(self, x):
class PyramidDilationConv (line 431) | class PyramidDilationConv(nn.Module):
method __init__ (line 432) | def __init__(self, inplanes, planes):
method forward (line 443) | def forward(self, x):
class Refine (line 454) | class Refine(nn.Module):
method __init__ (line 455) | def __init__(self, inplanes, planes, scale_factor=2):
method forward (line 467) | def forward(self, f, pm):
FILE: modules/cbam.py
class BasicConv (line 6) | class BasicConv(nn.Module):
method __init__ (line 7) | def __init__(self, in_planes, out_planes, kernel_size, stride=1, paddi...
method forward (line 14) | def forward(self, x):
class Flatten (line 22) | class Flatten(nn.Module):
method forward (line 23) | def forward(self, x):
class ChannelGate (line 26) | class ChannelGate(nn.Module):
method __init__ (line 27) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 37) | def forward(self, x):
function logsumexp_2d (line 62) | def logsumexp_2d(tensor):
class ChannelPool (line 68) | class ChannelPool(nn.Module):
method forward (line 69) | def forward(self, x):
class SpatialGate (line 72) | class SpatialGate(nn.Module):
method __init__ (line 73) | def __init__(self):
method forward (line 78) | def forward(self, x):
class CBAM (line 84) | class CBAM(nn.Module):
method __init__ (line 85) | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg...
method forward (line 91) | def forward(self, x):
FILE: test_MATNet.py
function flip (line 15) | def flip(x, dim):
FILE: train_MATNet.py
function init_dataloaders (line 21) | def init_dataloaders(args):
function trainIters (line 49) | def trainIters(args):
FILE: utils/objectives.py
class WeightedBCE2d (line 6) | class WeightedBCE2d(nn.Module):
method __init__ (line 7) | def __init__(self):
method forward (line 10) | def forward(self, input, target, negative_pixels):
FILE: utils/utils.py
function make_dir (line 7) | def make_dir(dir):
function check_parallel (line 12) | def check_parallel(encoder_dict, decoder_dict):
function get_base_params (line 34) | def get_base_params(args, model):
function get_skip_params (line 52) | def get_skip_params(model):
function merge_params (line 68) | def merge_params(params):
function get_optimizer (line 74) | def get_optimizer(optim_name, lr, parameters, weight_decay=0, momentum=0...
function save_checkpoint_epoch (line 85) | def save_checkpoint_epoch(args, encoder, decoder, enc_opt, dec_opt, epoc...
function load_checkpoint_epoch (line 101) | def load_checkpoint_epoch(model_name, epoch, use_gpu=True, load_opt=True):
Condensed preview — 52 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (182K chars).
[
{
"path": ".gitignore",
"chars": 1209,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": ".idea/aaai20-zero-ShotVOS-github.iml",
"chars": 447,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<module type=\"PYTHON_MODULE\" version=\"4\">\n <component name=\"NewModuleRootManager"
},
{
"path": ".idea/inspectionProfiles/Project_Default.xml",
"chars": 654,
"preview": "<component name=\"InspectionProjectProfileManager\">\n <profile version=\"1.0\">\n <option name=\"myName\" value=\"Project De"
},
{
"path": ".idea/misc.xml",
"chars": 304,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"JavaScriptSettings\">\n <option name=\"l"
},
{
"path": ".idea/modules.xml",
"chars": 304,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ProjectModuleManager\">\n <modules>\n "
},
{
"path": ".idea/vcs.xml",
"chars": 326,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"VcsDirectoryMappings\">\n <mapping dire"
},
{
"path": ".idea/workspace.xml",
"chars": 24636,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ChangeListManager\">\n <list default=\"t"
},
{
"path": "3rdparty/run_davis.py",
"chars": 1163,
"preview": "import os\nimport glob\nimport torch\nimport numpy\nimport PIL\nfrom run import estimate\n\ndef main():\n davis_folder = '/me"
},
{
"path": "3rdparty/run_davis2017.m",
"chars": 704,
"preview": "clc\n\nsave_dir = '/media/iiai/data/VOS/DAVIS2017/Annotations_edge/480p';\n\n\nsrc_dir = '/media/iiai/data/VOS/DAVIS2017/Anno"
},
{
"path": "3rdparty/run_davis_flow.py",
"chars": 1826,
"preview": "import torch\nimport glob\nimport getopt\nimport math\nimport numpy\nimport os\nimport PIL\nimport PIL.Image\nimport sys\nfrom ru"
},
{
"path": "3rdparty/run_youtube.m",
"chars": 746,
"preview": "clc\n\nsave_dir = '/media/tfzhou/Yi Passport/YouTubeVOS_2018/train/Annotations_edge';\n\n\nsrc_dir = '/media/tfzhou/Yi Passpo"
},
{
"path": "3rdparty/run_youtube.py",
"chars": 1176,
"preview": "import os\nimport glob\nimport torch\nimport numpy\nimport PIL\nfrom run import estimate\n\ndef main():\n davis_folder = '/me"
},
{
"path": "3rdparty/run_youtube_flow.py",
"chars": 1835,
"preview": "import torch\nimport glob\nimport getopt\nimport math\nimport numpy\nimport os\nimport PIL\nimport PIL.Image\nimport sys\nimport "
},
{
"path": "3rdparty/seg2edge.m",
"chars": 2202,
"preview": "% This function takes an input segment and produces binary boundaries.\n% Multi-channel input segments are supported by t"
},
{
"path": "README.md",
"chars": 5966,
"preview": "## Motion-Attentive Transition for Zero-Shot Video Object Segmentation\n\n[:\n parser = argparse.ArgumentParser(description='MATNet')\n\n parser.add_argument("
},
{
"path": "data/db_info.yaml",
"chars": 12687,
"preview": "attributes: [AC, BC, CS, DB, DEF, EA, FM, HO, IO, LR, MB, OCC, OV, ROT, SC, SV]\nsets: [train, val, val-dev]\nyears: [201"
},
{
"path": "data/palette.txt",
"chars": 2775,
"preview": "0 0 0\n128 0 0\n0 128 0\n128 128 0\n0 0 128\n128 0 128\n0 128 128\n128 128 128\n64 0 0\n191 0 0\n64 128 0\n191 128 0\n64 0 128\n191 0"
},
{
"path": "data/run_davis2017.m",
"chars": 671,
"preview": "clc\n\nsrc_dir = 'data/DAVIS2017/Annotations/480p';\nsave_dir = 'data/DAVIS2017/Annotations_edge/480p';\n\nseqs = dir(src_dir"
},
{
"path": "data/run_youtube.m",
"chars": 703,
"preview": "clc\n\nsrc_dir = 'data/YouTubeVOS_2018/train/Annotations';\nsave_dir = 'data/YouTubeVOS_2018/train/Annotations_edge';\n\nseqs"
},
{
"path": "data/seg2edge.m",
"chars": 2202,
"preview": "% This function takes an input segment and produces binary boundaries.\n% Multi-channel input segments are supported by t"
},
{
"path": "dataloader/base.py",
"chars": 6864,
"preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
},
{
"path": "dataloader/base_youtube.py",
"chars": 9018,
"preview": "import functools\nimport os.path as osp\n\nimport numpy as np\n\nfrom PIL import Image\nfrom skimage.io import ImageCollection"
},
{
"path": "dataloader/custom_transforms.py",
"chars": 3587,
"preview": "import random\nimport cv2\nimport numpy as np\nimport torch\nimport math\n\n\nclass ScaleNRotate(object):\n \"\"\"Scale (zoom-in"
},
{
"path": "dataloader/dataset_utils.py",
"chars": 500,
"preview": "def get_dataset_davis_youtube_ehem(args, split, image_transforms=None,\n target_transfo"
},
{
"path": "dataloader/davis2017.py",
"chars": 4848,
"preview": "from __future__ import division\n\nimport torch\nfrom torch.utils import data\n\nimport os\nimport cv2\nimport glob\nimport lmdb"
},
{
"path": "dataloader/davis2017_youtubevos_ehem.py",
"chars": 9008,
"preview": "from __future__ import division\n\nfrom torch.utils import data\n\nimport os\nimport cv2\nimport glob\nimport lmdb\nimport numpy"
},
{
"path": "dataloader/hed_bsds.py",
"chars": 1856,
"preview": "import os\nfrom PIL import Image\n\nfrom torch.utils import data\n\nclass HEDBSDSTrain(data.Dataset):\n def __init__(self, "
},
{
"path": "dataloader/youtubeVOS.py",
"chars": 4300,
"preview": "#TODO\n\nfrom collections import namedtuple\n\nimport os\nimport cv2\nimport numpy as np\n\nfrom PIL import Image\nfrom .base_you"
},
{
"path": "dataset_lmdb_generator.py",
"chars": 2109,
"preview": "# This script generates a data structure in the form of key-value storage. This is made in the huge amount of \n# calls t"
},
{
"path": "measures/__init__.py",
"chars": 739,
"preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
},
{
"path": "measures/f_boundary.py",
"chars": 3534,
"preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
},
{
"path": "measures/jaccard.py",
"chars": 1513,
"preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
},
{
"path": "measures/statistics.py",
"chars": 857,
"preview": "import numpy as np\nimport warnings\n\ndef mean(X):\n \"\"\"\n Compute average ignoring NaN values.\n \"\"\"\n\n return np.nanmean"
},
{
"path": "measures/t_stability.py",
"chars": 4882,
"preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
},
{
"path": "misc/__init__.py",
"chars": 633,
"preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
},
{
"path": "misc/config.py",
"chars": 3440,
"preview": "#!/usr/bin/env python\nimport yaml\nimport os.path as osp\nimport numpy as np\nfrom easydict import EasyDict as edict\n\nfrom "
},
{
"path": "misc/config_youtubeVOS.py",
"chars": 3998,
"preview": "#!/usr/bin/env python\nimport json\nimport os.path as osp\nfrom easydict import EasyDict as edict\nfrom enum import Enum\nimp"
},
{
"path": "misc/io_aux.py",
"chars": 567,
"preview": "from PIL import Image\nimport numpy as np\n\nfrom misc.config import cfg\n\ndef imread_indexed(filename):\n \"\"\" Load image gi"
},
{
"path": "misc/logger.py",
"chars": 653,
"preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
},
{
"path": "misc/parallel.py",
"chars": 821,
"preview": "try:\n from joblib import Parallel, delayed\nexcept Exception as e:\n print(e) #TODO\n # if joblib does not exist j"
},
{
"path": "misc/timer.py",
"chars": 894,
"preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
},
{
"path": "misc/visual.py",
"chars": 1185,
"preview": "# ----------------------------------------------------------------------------\n# The 2017 DAVIS Challenge on Video Objec"
},
{
"path": "modules/MATNet.py",
"chars": 14571,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torchvision import models\n\n\nclass Encoder(nn.Mod"
},
{
"path": "modules/cbam.py",
"chars": 3860,
"preview": "import torch\nimport math\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nclass BasicConv(nn.Module):\n def __in"
},
{
"path": "requirements.txt",
"chars": 261,
"preview": "easydict==1.9\nlmdb==0.94\ntorchvision==0.2.2\nopencv_contrib_python==4.0.0.21\nscipy==1.2.1\nsix==1.12.0\ntorch==1.0.1.post2\n"
},
{
"path": "test_MATNet.py",
"chars": 3612,
"preview": "import torch\nfrom torchvision import transforms\n\nimport os\nimport glob\nfrom tqdm import tqdm\nfrom PIL import Image\nfrom "
},
{
"path": "train_MATNet.py",
"chars": 7391,
"preview": "import torch\nfrom torch.utils import data\nfrom torchvision import transforms\n\nimport os\nimport sys\nimport time\nimport ra"
},
{
"path": "utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "utils/objectives.py",
"chars": 1394,
"preview": "import torch\r\nimport torch.nn as nn\r\nimport torch.nn.functional as F\r\n\r\n\r\nclass WeightedBCE2d(nn.Module):\r\n def __ini"
},
{
"path": "utils/utils.py",
"chars": 4885,
"preview": "import torch\r\nimport os\r\nimport pickle\r\nfrom collections import OrderedDict\r\n\r\n\r\ndef make_dir(dir):\r\n if not os.path."
}
]
About this extraction
This page contains the full source code of the tfzhou/MATNet GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 52 files (167.4 KB), approximately 48.2k tokens, and a symbol index with 191 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.