Repository: GewelsJI/VPS Branch: main Commit: 4b2d87e8265a Files: 68 Total size: 263.4 KB Directory structure: gitextract_qn90sj5e/ ├── .gitignore ├── LICENSE ├── README.md ├── docs/ │ ├── AWESOME_VPS.md │ ├── DATA_DESCRIPTION.md │ ├── DATA_PREPARATION.md │ ├── INFO_NEGATIVE_CASES.md │ ├── INFO_POSITIVE_CASES.md │ └── RELEASE_NOTES.md ├── eval/ │ ├── README.md │ ├── eval-result/ │ │ ├── 2015-MICCAI-UNet/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ ├── 2018-TMI-UNet++/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ ├── 2019-TPAMI-COSNet/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ ├── 2020-AAAI-PCSA/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ ├── 2020-MICCAI-23DCNN/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ ├── 2020-MICCAI-ACSNet/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ ├── 2020-MICCAI-PraNet/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ ├── 2020-TIP-MATNet/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ ├── 2021-ICCV-DCFNet/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ ├── 2021-ICCV-FSNet/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ ├── 2021-MICCAI-PNSNet/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ ├── 2021-MICCAI-SANet/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ ├── 2021-NIPS-AMD/ │ │ │ ├── TestEasyDataset_eval.txt │ │ │ └── TestHardDataset_eval.txt │ │ └── 2022-TMI-PNSPlus/ │ │ ├── TestEasyDataset_eval.txt │ │ └── TestHardDataset_eval.txt │ ├── eval.sh │ ├── metrics.py │ └── vps_evaluator.py ├── lib/ │ ├── __init__.py │ ├── dataloader/ │ │ ├── __init__.py │ │ ├── dataloader.py │ │ ├── preprocess.py │ │ └── statistics.pth │ ├── module/ │ │ ├── LightRFB.py │ │ ├── PNS/ │ │ │ ├── PNS_Module/ │ │ │ │ ├── CMakeLists.txt │ │ │ │ ├── reference.cpp │ │ │ │ ├── reference.h │ │ │ │ ├── sa.cu │ │ │ │ ├── sa_ext.cpp │ │ │ │ ├── timer.h │ │ │ │ └── utils.h │ │ │ └── setup.py │ │ ├── PNSPlusModule.py │ │ ├── PNSPlusNetwork.py │ │ ├── Res2Net_v1b.py │ │ └── __init__.py │ └── utils/ │ ├── __init__.py │ └── utils.py ├── scripts/ │ ├── config.py │ ├── eval_eff.py │ ├── my_test.py │ └── my_train.py ├── snapshot/ │ └── .placeholder └── utils/ ├── reorganize.py └── reorganize.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # For projects using Nanoc (http://nanoc.ws/) # Default location for output (needs to match output_dir's value found in nanoc.yaml) output/ # Temporary file directory tmp/nanoc/ # Crash Log crash.log data/ res/ eval/loggings/ eval/__pycache__/ backup/ ./TODOLIST.md ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ #

`Video Polyp Segmentation: A Deep Learning Perspective (MIR 2022)`

[![license: mit](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) ![LAST COMMIT](https://img.shields.io/github/last-commit/GewelsJI/VPS) ![ISSUES](https://img.shields.io/github/issues/GewelsJI/VPS) ![STARS](https://img.shields.io/github/stars/GewelsJI/VPS) [![ARXIV PAPER](https://img.shields.io/badge/Arxiv-Paper-red)](https://arxiv.org/pdf/2203.14291v3.pdf) [![Gitter](https://badges.gitter.im/video-polyp-segmentation/community.svg)](https://gitter.im/video-polyp-segmentation/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/video-polyp-segmentation-a-deep-learning/video-polyp-segmentation-on-sun-seg-easy)](https://paperswithcode.com/sota/video-polyp-segmentation-on-sun-seg-easy?p=video-polyp-segmentation-a-deep-learning) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/video-polyp-segmentation-a-deep-learning/video-polyp-segmentation-on-sun-seg-hard)](https://paperswithcode.com/sota/video-polyp-segmentation-on-sun-seg-hard?p=video-polyp-segmentation-a-deep-learning) --- > [!note] > The original SUN Database website (http://amed8k.sundatabase.org/) is no longer maintained and is sometimes inaccessible, we provide a backup website for the SUN Database here: [website backup](https://github.com/GewelsJI/VPS/blob/main/docs/%5Boriginal%20backup%5D%20SUN%20Colonoscopy%20Video%20Database.pdf) > > Regarding recent requests for a large volume of data, we suggest you emailing me at gepengai.ji@gmail.com for alternative access options. > > 🔴 We're currently pushing intelligent colonoscopy (refer to our [research gallery](https://github.com/ai4colonoscopy) page) into the multimodal era. Recommen you reading two new research works from our team: > - :boom: The pioneering multimodal analysis solution for colonoscopy: **ColonINST** & **ColonGPT** ([paper](https://arxiv.org/abs/2410.17241) & [project page](https://github.com/ai4colonoscopy/IntelliScope)) > - :boom: The current largest multimodal dataset **ColonVQA (1.1+ million entries)** in colonoscopy. We also introduce the first reasoning-centric dataset **ColonReason**, along with an R1-styled model, **ColonR1**, tailored for colonoscopy tasks. ([paper](https://arxiv.org/abs/2512.03667) & [project page](https://github.com/ai4colonoscopy/Colon-X)) ---


- **Title:** Video Polyp Segmentation: A Deep Learning Perspective (accepted by Machine Intelligence Research, please see [arXiv version](https://arxiv.org/pdf/2203.14291v3.pdf) & [Spriner version](https://link.springer.com/article/10.1007/s11633-022-1371-y)) - **Authors:** [Ge-Peng Ji](https://scholar.google.com/citations?view_op=list_works&hl=en&hl=en&user=oaxKYKUAAAAJ)^, [Guobao Xiao](https://guobaoxiao.github.io)^, [Yu-Cheng Chou](https://sites.google.com/view/yu-cheng-chou)^, [Deng-Ping Fan](https://dengpingfan.github.io/)*, [Kai Zhao](https://kaizhao.net/), [Geng Chen](https://scholar.google.com/citations?user=sJGCnjsAAAAJ&hl=en), and [Luc Van Gool](https://scholar.google.com/citations?user=TwMib_QAAAAJ&hl=en). - **Contact:** We invite all to contribute to making it more accessible and useful. If you have any questions, please feel free to drop us an e-mail (gepengai.ji@gmail.com & dengpfan@gmail.com) or directly report the issue or push a PR. Your star is our motivation, let's enjoy it! - Welcome any discussions on video polyp segmentation at [Gitter room](https://gitter.im/video-polyp-segmentation/community?utm_source=share-link&utm_medium=link&utm_campaign=share-link) or join our [WeChat group](https://github.com/GewelsJI/VPS/blob/main/assets/wechat_group_qr_code_20240816.JPG). - The following is a video to quickly access the core points of our work. https://github.com/GewelsJI/VPS/assets/38354957/9bea01ae-9582-494f-8bf6-f83307eebc08 # Contents - [1. Features](#1-features) - [2. News](#2-news) - [3. VPS Dataset](#3-vps-dataset) - [4. VPS Baseline](#4-vps-baseline) - [5. VPS Benchmark](#5-vps-benchmark) - [6. Tracking Trends](#6-tracking-trends) - [7. Citations](#7-citations) - [8. FAQ](#8-faq) - [9. License](#9-license) - [10. Acknowledgments](#10-acknowledgments) # 1. Features In the deep learning era, we present the first comprehensive video polyp segmentation (VPS) study. Over the years, developments on VPS have not moved forward with ease since large-scale fine-grained segmentation masks are still not made publicly available. To tackle this issue, we first introduce a long-awaited high-quality per-frame annotated VPS dataset. There are four features of our work: - **VPS Dataset:** We recognize the importance of annotated medical data for substantial progress in research on medical AI systems’ development. Thus, our SUN-SEG dataset is open access, a non-profit database of the high-quality, large-scale, densely annotated dataset for facilitating colonoscopy diagnosis, localization, and derivative tasks. Our vision aims to provide data and knowledge to aid and educate clinicians, and also for the development of automated medical decision support systems. - **VPS Baseline:** We propose a simple but efficient baseline, which outperforms the 13 cutting-edge polyp segmentation approaches and runs in super real-time (170fps). We hope such a baseline could attract more researchers to join our community and inspire them to develop more interesting solutions. - **VPS Benchmark:** For a fair comparison, we build an online leaderboard to keep up with the new progress of the VPS community. Besides, we provide an out-of-the-box evaluation toolbox for the VPS task. - **Tracking Trends:** We elaborately collect a paper reading list ( :boom: [Awesome Paper List](https://github.com/GewelsJI/VPS/blob/main/docs/AWESOME_VPS.md) :boom: ) to continuously track the latest updates in this rapidly advancing field. # 2. News - *[Jul/15/2024]* Thanks to [@Yuli Zhou](https://github.com/zhoustan) for raising up the frame sorting issues in the inference and evaluation code (check out the pull request here: https://github.com/GewelsJI/VPS/pull/48). It turns out it only slightly impacted our final evaluation performance (check out the evidence [here](https://github.com/GewelsJI/VPS/blob/main/docs/RELEASE_NOTES.md)). - *[Oct/26/2023]* The video-level attributes have released at [Google Drive](https://docs.google.com/spreadsheets/d/1J33EvrEcZp5CMWtKN_4VNdhp4EQsjMSf/edit?usp=sharing&ouid=117958307137184272405&rtpof=true&sd=true). - *[Jan/30/2023]* We update the bounding box annotation with COCO-like format, ie, `[x,y,width,height]` where x and y are the upper-left coordinates of the bounding box. Please download the latest compressed file at [here](https://github.com/GewelsJI/VPS/blob/main/docs/DATA_PREPARATION.md#step-1-request-and-download). Thanks for Yingling Lu for pointing out this issue. - *[August/24/2022]* :boom: Our paper has been accepted by [Machine Intelligence Research (MIR)](https://www.springer.com/journal/11633) journal. - *[July/03/2022]* :boom: We update a new version of SUN-SEG with more fine-grained data splits, including seen/unseen scenarios. For more details refer to our technical report. Also, the new PaperWithCode page refers to [SUN-SEG-Easy](https://paperswithcode.com/dataset/sun-seg-easy) & [SUN-SEG-Hard](https://paperswithcode.com/dataset/sun-seg-hard). - *[May/11/2022]* Release rejected labels: [SUN-SEG-Rejected-Labels (Google Drive, 120.7MB)](https://drive.google.com/file/d/1OtK2PR6gKQv56dIFjw0rXadcgGonf93S/view?usp=sharing). For more details see [here](https://github.com/GewelsJI/VPS/blob/main/docs/DATA_DESCRIPTION.md#rejected-labels). - *[March/27/2022]* Release pretrained checkpoints and whole benchamrks results. - *[March/18/2022]* Upload the whole training/testing code for our enhanced model PNS+. - *[March/15/2022]* Release the evaluation toolbox for the VPS task. Add a [Awesome_Video_Polyp_Segmentation.md](https://github.com/GewelsJI/VPS/blob/main/docs/AWESOME_VPS.md) for tracking latest trends of this community. - *[March/14/2022]* Create the project page. # 3. VPS Dataset


Figure 1: Annotation of SUN-SEG dataset. The object-level segmentation masks in the SUN-SEG dataset of different pathological categories, which is densely annotated with experienced annotators and verified by colonoscopy-related researchers to ensure the quality of the proposed dataset.

Notably, based on some necessary privacy-preserving considerations from the SUN dataset, we could not directly share the download link of the video dataset with you without authorization. And please inform us of your institution and the purpose of using SUN-SEG in the email. Thank you for your understanding! - How to get access to our SUN-SEG dataset? Please refer to [`DATA_PREPARATION`](https://github.com/GewelsJI/VPS/blob/main/docs/DATA_PREPARATION.md). - If you wanna know more descriptions about our SUN-SEG dataset. Please refer to our [`DATA_DESCRIPTION.md`](https://github.com/GewelsJI/VPS/blob/main/docs/DATA_DESCRIPTION.md). # 4. VPS Baseline > This work is the extension version of our conference paper (Progressively Normalized Self-Attention Network for Video Polyp Segmentation) accepted at MICCAI-2021. More details could refer to [arXiv](https://arxiv.org/abs/2105.08468) and [Github Link](https://github.com/GewelsJI/PNS-Net)


Figure 2: The pipeline of the proposed (a) PNS+ network, which is based on (b) the normalized self-attention (NS) block.

There are three simple-to-use steps to access our project code (PNS+): - Prerequisites of environment: ```bash conda create -n PNS+ python=3.6 conda activate PNS+ conda install pytorch=1.1.0 torchvision -c pytorch pip install tensorboardX tqdm Pillow==6.2.2 pip install git+https://github.com/pytorch/tnt.git@master ``` - Compiling the project: ```bash cd ./lib/module/PNS python setup.py build develop ``` - Training: ```bash python ./scripts/my_train.py ``` - Testing: Downloading pre-trained weights and move it into `snapshot/PNSPlus/epoch_15/PNSPlus.pth`, which can be found in this download link: [Google Drive, 102.9MB](https://drive.google.com/file/d/1YCC9AiSr3yMhPXBEnM2bgjoJ7fodpLkK/view?usp=sharing) / [Baidu Drive](https://pan.baidu.com/s/1mnd9GD2BiWFzsibv7WiwAA) (Password: g7sa, Size: 108MB). ```bash python ./scripts/my_test.py ``` # 5. VPS Benchmark We provide an out-of-the-box evaluation toolbox for the VPS task, which is written in Python style. You can just run it to generate the evaluation results on your custom approach. Or you can directly download the complete VPS benchmark including the prediction map of each competitor at the download link: [Google Drive, 5.45GB](https://drive.google.com/file/d/1Liva1oR1-1ihWaTNNM5WDKZVETAF587M/view?usp=sharing) / [Baidu Drive](https://pan.baidu.com/s/1Qu-0l9w0ja92nzrlWMRhoQ) (Password: 2t1l, Size: 5.45G). - More instructions about **Evaluation Toolbox** refer to [`PageLink`](https://github.com/GewelsJI/VPS/tree/main/eval). We also built an online leaderboard to keep up with the new progress of other competitors. We believe this is a fun way to learn about new research directions and stay in tune with our VPS community. - Online leaderboard is publicly avaliable at PaperWithCode: [SUN-SEG-Easy](https://paperswithcode.com/dataset/sun-seg-easy) & [SUN-SEG-Hard](https://paperswithcode.com/dataset/sun-seg-hard). Here, we present a variety of qualitative and quantitative results of VPS benchmarks: - Visual prediction of top-performance competitors:


Figure 3: Qualitative comparison of three video-based models (PNS+, PNSNet, and 2/3D) and two image-based models (ACSNet, and PraNet).

- Model-based performance:


Figure 4: Quantitative comparison on two testing sub-datasets, i.e., SUN-SEG-Easy (Unseen) and SUN-SEG-Hard (Unseen). `R/T' represents we re-train the non-public model, whose code is provided by the original authors. The best scores are highlighted in bold.

- Attribute-based performance:


Figure 5: Visual attributes-based performance on our SUN-SEG-Easy (Unseen) and SUN-SEG-Hard (Unseen) in terms of structure measure.

# 6. Tracking Trends


To better understand the development of this field and to quickly push researchers in their research process, we elaborately build a **Paper Reading List**. It includes **119** colonoscopy imaging-based AI scientific research in the past 12 years. It includes several fields, such as image polyp segmentation, video polyp segmentation, image polyp detection, video polyp detection, and image polyp classification. Besides, we will provide some interesting resources about human colonoscopy. > **Note:** If we miss some treasure works, please let me know via e-mail or directly push a PR. We will work on it as soon as possible. Many thanks for your active feedback. - The latest paper reading list and some interesting resources refer to [`Awesome-Video-Polyp-Segmentation.md`](https://github.com/GewelsJI/VPS/blob/main/docs/AWESOME_VPS.md) # 7. Citations If you have found our work useful, please use the following reference to cite this project: @article{ji2022video, title={Video polyp segmentation: A deep learning perspective}, author={Ji, Ge-Peng and Xiao, Guobao and Chou, Yu-Cheng and Fan, Deng-Ping and Zhao, Kai and Chen, Geng and Van Gool, Luc}, journal={Machine Intelligence Research}, volume={19}, number={6}, pages={531--549}, year={2022}, publisher={Springer} } @inproceedings{ji2021progressively, title={Progressively normalized self-attention network for video polyp segmentation}, author={Ji, Ge-Peng and Chou, Yu-Cheng and Fan, Deng-Ping and Chen, Geng and Fu, Huazhu and Jha, Debesh and Shao, Ling}, booktitle={International Conference on Medical Image Computing and Computer-Assisted Intervention}, pages={142--152}, year={2021}, organization={Springer} } @inproceedings{fan2020pranet, title={Pranet: Parallel reverse attention network for polyp segmentation}, author={Fan, Deng-Ping and Ji, Ge-Peng and Zhou, Tao and Chen, Geng and Fu, Huazhu and Shen, Jianbing and Shao, Ling}, booktitle={International conference on medical image computing and computer-assisted intervention}, pages={263--273}, year={2020}, organization={Springer} } # 8. FAQ - Thanks to [Tuo Wang](victor_wt@qq.com) for providing a great solution to [upgrade the CUDA version when compiling the NS block](./docs/Upgrade%20environment%20for%20NS%20block.pdf). # 9. License The dataset and source code is free for research and education use only. Any commercial usage should get formal permission first. - **Video Source:** SUN (Showa University and Nagoya University) Colonoscopy Video Database is the colonoscopy video database for the evaluation of automated colorectal-polyp detection. The database comprises still images of videos, which are collected at the Showa University Northern Yokohama Hospital. Mori Laboratory, Graduate School of Informatics, Nagoya University developed this database. Every frame in the database was annotated by the expert endoscopists at Showa University. - **Intended Use:** This database is available for only non-commercial use in research or educational purposes. As long as you use the database for these purposes, you can edit or process images and annotations in this database. Without permission from Mori Lab., commercial use of this dataset is prohibited even after copying, editing, processing, or any operations of this database. Please contact us for commercial use or if you are uncertain about the decision. - **Distribution:** It is prohibited to sell, transfer, lend, lease, resell, distribute, etc., as it is, or copy, edit, or process this database, in whole or in part. # 10. Acknowledgments - Our dataset is built upon the SUN (Showa University and Nagoya University) Colonoscopy Video Database, Thanks very much for their wonderful work! - This codebase is based on our conference version [PNSNet](https://github.com/GewelsJI/PNS-Net), which is accepted by the MICCAI-2021 conference. ================================================ FILE: docs/AWESOME_VPS.md ================================================ # Awesome Video Polyp Segmentation Contrib PaperNum ![](../assets/the-reading-list.png) # 1. Preview This is a paper collection of **133** colonoscopy imaging-based AI scientific researches in recent **12** years. In order to better understand the development of this field and to help researchers in their research process, we have divided the works into five tasks, including **133** papers on [image polyp segmentation](#21-image-polyp-segmentation), **5** papers on [video polyp segmentation](#22-video-polyp-segmentation), **17** papers on [image polyp detection](#23-image-polyp-detection), **11** papers on [video polyp detection](#24-video-polyp-detection), **6** paper on [image polyp classification](#25-image-polyp-classification), **1** paper on [video polyp classification](#26-video-polyp-classification), **2** paper on [colonoscopy depth estimation](#27-colonoscopy-depth-estimation), **1** paper on [colonoscopy deficient coverage detection](#28-colonoscopy-deficient-coverage-detection), and **1** paper on [colon polyp image synthesis](#29-colon-polyp-image-synthesis). Besides, we present the collection of **14** polyp related datasets, including **8** [image segmentation datasets](#31-image-segmentation-datasets), **2** [video segmentation datasets](#32-video-segmentation-datasets), **1** [video detection dataset](#33-video-detection-datasets), **3** [video classification datasets](#34-video-classification-datasets), and **2** [colonoscopy depth dataset](#35-colonoscopy-depth-datasets). In addition, we provide links to each paper and its repository whenever possible. * denotes the corresponding paper cannot be downloaded or the link is connected to the journal. > Note that this page is under construction. If you have anything to recommend or any suggestions, please feel free to contact us via e-mail (gepengai.ji@gmail) or directly push a PR. --- *Last updated: 12/07/2022* --- ## 1.1. Table of Contents - [Awesome Video Polyp Segmentation](#awesome-video-polyp-segmentation) - [1. Preview](#1-preview) * [1.1. Table of Contents](#11-table-of-contents) - [2. Polyp Related Methods](#2-polyp-related-methods) * [2.1 Image Polyp Segmentation](#21-image-polyp-segmentation) * [2.2 Video Polyp Segmentation](#22-video-polyp-segmentation) * [2.3 Image Polyp Detection](#23-image-polyp-detection) * [2.4 Video Polyp Detection](#24-video-polyp-detection) * [2.5 Image Polyp Classification](#25-image-polyp-classification) * [2.6 Video Polyp Classification](#26-video-polyp-classification) * [2.7 Colonoscopy Depth Estimation](#27-colonoscopy-depth-estimation) * [2.8 Colonoscopy Deficient Coverage Detection](#28-colonoscopy-deficient-coverage-detection) * [2.9 Colon Polyp Image Synthesis](#29-colon-polyp-image-synthesis) - [3. Polyp Related Datasets](#3-useful-resources) * [3.1 Image Segmentation Datasets](#31-image-segmentation-datasets) * [3.2 Video Segmentation Datasets](#32-video-segmentation-datasets) * [3.3 Video Detection Datasets](#33-video-detection-datasets) * [3.4 Video Classification Datasets](#34-video-classification-datasets) * [3.5 Colonoscopy Depth Datasets](#35-colonoscopy-depth-datasets) - [4. Useful Resources](#4-useful-resources) * [4.1 Colonoscopy Related](#41-colonoscopy-related) * [4.2 AI Conference Deadlines](#42-ai-conference-deadlines) # 2. Polyp Related Methods ## 2.1 Image Polyp Segmentation **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2023 | **MICCAI** | WeakPolyp: You Only Look Bounding Box for Polyp Segmentation | [Paper](https://arxiv.org/pdf/2307.10912.pdf)/[Code](https://github.com/weijun88/WeakPolyp) 2023 | **MICCAI** | Revisiting Feature Propagation and Aggregation in Polyp Segmentation | Paper 2023 | **MICCAI** | Probabilistic Modeling Ensemble Vision Transformer Improves Complex Polyp Segmentation | Paper 2023 | **MICCAI** | S2ME: Spatial-Spectral Mutual Teaching and Ensemble Learning for Scribble-supervised Polyp Segmentation | [Paper](https://arxiv.org/abs/2306.00451)/[Code](https://github.com/lofrienger/S2ME) 2023 | **arXiv** | Can SAM Segment Polyps? | [Paper](https://arxiv.org/abs/2304.07583) 2023 | **arXiv** | Polyp-SAM++: Can A Text Guided SAM Perform Better for Polyp Segmentation? | [Paper](https://www.researchgate.net/publication/373091859_Polyp-SAM_Can_A_Text_Guided_SAM_Perform_Better_for_Polyp_Segmentation) 2023 | **arXiv** | Polyp-SAM: Transfer SAM for Polyp Segmentation | [Paper](https://arxiv.org/abs/2305.00293) 2023 | **PR** | Cross-level Feature Aggregation Network for Polyp Segmentation | [Paper](https://www.sciencedirect.com/science/article/pii/S0031320323002558)/[Code](https://github.com/taozh2017/CFANet) 2023 | **BMVC** | `ADSNet` ADSNet: Adaptation of Distinct Semantic for Uncertain Areas in Polyp Segmentation | [Paper](https://papers.bmvc2023.org/0806.pdf) 2022 | **ISICT** | Incremental Boundary Refinement using Self Axial Reverse Attention and Uncertainty-aware Gate for Colon Polyp Segmentation | [Paper*](https://dl.acm.org/doi/abs/10.1145/3568562.3568663)/Code 2022 | **TVCJ** | DCANet: deep context attention network for automatic polyp segmentation | [Paper](https://link.springer.com/content/pdf/10.1007/s00371-022-02677-x.pdf?pdf=button)/Code 2022 | **arXiv** | Towards Automated Polyp Segmentation Using Weakly- and Semi-Supervised Learning and Deformable Transformers | [Paper](https://arxiv.org/pdf/2211.11847.pdf)/Code 2022 | **arXiv** | Spatially Exclusive Pasting: A General Data Augmentation for the Polyp Segmentation | [Paper](https://arxiv.org/pdf/2211.08284.pdf)/Code 2022 | **CBM** | MSRAformer: Multiscale spatial reverse attention network for polyp segmentation | [Paper*](https://www.sciencedirect.com/science/article/abs/pii/S0010482522009829)/Code 2022 | **CBM** | DBMF: Dual Branch Multiscale Feature Fusion Network for polyp segmentation | [Paper*](https://www.sciencedirect.com/science/article/abs/pii/S0010482522010125)/Code 2022 | **ICAISM** | Automatic Polyp Segmentation in Colonoscopy Images Using Single Network Model: SegNet | [Paper](https://link.springer.com/content/pdf/10.1007/978-981-16-2183-3_69.pdf?pdf=inline%20link)/Code 2022 | **ICMAR** | Polyp segmentation algorithm combining multi-scale attention and multi-layer loss | [Paper*](https://www.spiedigitallibrary.org/conference-proceedings-of-spie/12331/123314W/Polyp-segmentation-algorithm-combining-multi-scale-attention-and-multi-layer/10.1117/12.2652907.short?SSO=1)/Code 2022 | **MICCAI** | Using Guided Self-Attention with Local Information for Polyp Segmentation | [Paper](https://link.springer.com/chapter/10.1007/978-3-031-16440-8_60)/Code 2022 | **MICCAI** | Task-Relevant Feature Replenishment for Cross-Centre Polyp Segmentation| [Paper](https://link.springer.com/chapter/10.1007/978-3-031-16440-8_57)/[Code](https://github.com/CathyS1996/TRFRNet) 2022 | **MICCAI** | TGANet: Text-guided attention for improved polyp segmentation | [Paper](https://arxiv.org/pdf/2205.04280.pdf)/[Code](https://github.com/nikhilroxtomar/TGANet) 2022 | **MICCAI** | Lesion-Aware Dynamic Kernel for Polyp Segmentation | [Paper](https://link.springer.com/chapter/10.1007/978-3-031-16437-8_10)/Code 2022 | **MICCAI** | Semi-Supervised Spatial Temporal Attention Network for Video Polyp Segmentation | [Paper](https://link.springer.com/chapter/10.1007/978-3-031-16437-8_7)/Code 2022 | **CMIG** | Boosting medical image segmentation via conditional-synergistic convolution and lesion decoupling | [Paper](https://www.sciencedirect.com/science/article/abs/pii/S0895611122000817)/[Code](https://github.com/QianChen98/CCLD-Net) 2022 | **Gastroenterology Insights** | UPolySeg: A U-Net-Based Polyp Segmentation Network Using Colonoscopy Images | [Paper](https://www.mdpi.com/2036-7422/13/3/27/pdf?version=1660117945)/Code 2022 | **Electronics** | A Segmentation Algorithm of Colonoscopy Images Based on Multi-Scale Feature Fusion | [Paper](https://www.mdpi.com/2079-9292/11/16/2501/pdf?version=1660199760)/Code 2022 | **TCSVT** | Polyp-Mixer: An Efficient Context-Aware MLP-based Paradigm for Polyp Segmentation | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9852486)/[Code](https://github.com/shijinghuihub/Polyp-Mixer) 2022 | **TETCI** | Adaptive Context Exploration Network for Polyp Segmentation in Colonoscopy Images | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9852746)/Code 2022 | **IJCAI** | ICGNet: Integration Context-Based Reverse-Contour Guidance Network for Polyp Segmentation | [Paper](https://www.ijcai.org/proceedings/2022/0123.pdf)/Code 2022 | **IJCAI** | TCCNet: Temporally Consistent Context-Free Network for Semi-supervised Video Polyp Segmentation | [Paper](https://www.ijcai.org/proceedings/2022/0155.pdf)/[Code](https://github.com/wener-yung/TCCNet) 2022 | **AIM** | An end-to-end tracking method for polyp detectors in colonoscopy videos | [Paper](https://reader.elsevier.com/reader/sd/pii/S0933365722001270?token=85406D788AF1B59597BD0BCA3456A70DD2ECE3040EBCB1C3D669584B712F58FA656224415F80070007005E3D36B81F8D&originRegion=us-east-1&originCreation=20220801152429)/Code 2022 | **MIUA** | Polyp2Seg: Improved Polyp Segmentation with Vision Transformer | [Paper](https://link.springer.com/content/pdf/10.1007/978-3-031-12053-4_39.pdf)/Code 2022 | **CVIP** | Localization of Polyps in WCE Images Using Deep Learning Segmentation Methods: A Comparative Study | [Paper](https://link.springer.com/content/pdf/10.1007/978-3-031-11346-8_46.pdf)/Code 2022 | **AIMD** | SARM-Net: A Spatial Attention-Based Residual M-Net for Polyp Segmentation | [Paper](https://link.springer.com/chapter/10.1007/978-981-19-0151-5_33)/Code 2022 | **BSPC** | FAPN: Feature Augmented Pyramid Network for polyp segmentation | [Paper](https://reader.elsevier.com/reader/sd/pii/S1746809422004074?token=FE011B0123F802F27442369ED87DD656B402B1920147F48DC6957C72D5D1859DFD0B1ADB80194C54FBB05A9220781C20&originRegion=us-east-1&originCreation=20220708075935)/Code 2022 | **BSPC** | Automated polyp segmentation in colonoscopy images via deep network with lesion-aware feature selection and refinement | [Paper](https://www.sciencedirect.com/sdfe/reader/pii/S1746809422003688/pdf)/Code 2022 | **ICFCS** | U-Shaped Xception-Residual Network for Polyps Region Segmentation | [Paper](https://link.springer.com/chapter/10.1007/978-981-19-0105-8_25)/Code 2022 | **CBM** | Colorectal polyp region extraction using saliency detection network with neutrosophic enhancement | [Paper](https://reader.elsevier.com/reader/sd/pii/S0010482522005340?token=0E8C163715B86CD6B6EC6F0D60685AA70877424A885FF787BB2C776923BB44A84AF65684FBCAC9503E28CA794B58174B&originRegion=us-east-1&originCreation=20220708080248)/Code 2022 | **IJCARS** | Examining the effect of synthetic data augmentation in polyp detection and segmentation | [Paper](https://link.springer.com/content/pdf/10.1007/s11548-022-02651-x.pdf)/Code 2022 | **IEEE Access** | Polyp Segmentation of Colonoscopy Images by Exploring the Uncertain Areas | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9775966)/Code 2022 | **JBHI** | Boundary Constraint Network with Cross Layer Feature Integration for Polyp Segmentation | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9772424)/Code 2022 | **CMIG** | Polyp Segmentation Network with Hybrid Channel-Spatial Attention and Pyramid Global Context Guided Feature Fusion | [Paper](https://reader.elsevier.com/reader/sd/pii/S0895611122000453?token=1CC9A6070522894EA16E7984593F634B2A32CCB11CB9CBFD4CAA37916A13DE4D3F6AB47DFDDB5F6ED12F23B0CAD0FE20&originRegion=us-east-1&originCreation=20220515094414)/Code 2022 | **arXiv** | Automatic Polyp Segmentation with Multiple Kernel Dilated Convolution Network | [Paper](https://arxiv.org/pdf/2206.06264.pdf)/Code 2022 | **arXiv** | PlutoNet: An Efficient Polyp Segmentation Network | [Paper](https://arxiv.org/pdf/2204.03652.pdf)/Code 2022 | **arXiv** | Automated Polyp Segmentation in Colonoscopy using MSRFNet | [Paper](https://www.researchgate.net/profile/Debesh-Jha/publication/359698512_Automated_Polyp_Segmentation_in_Colonoscopy_using_MSRFNet/links/624907bf8068956f3c6533c1/Automated-Polyp-Segmentation-in-Colonoscopy-using-MSRFNet.pdf)/Code 2022 | **arXiv** | BlazeNeo: Blazing fast polyp segmentation and neoplasm detection | [Paper](https://arxiv.org/pdf/2203.00129.pdf)/Code 2022 | **arXiv** | BDG-Net: Boundary Distribution Guided Network for Accurate Polyp Segmentation | [Paper](https://arxiv.org/pdf/2201.00767.pdf)/Code 2022 | **arXiv** | Cross-level Contrastive Learning and Consistency Constraint for Semi-supervised Medical Image Segmentation | [Paper](https://arxiv.org/pdf/2202.04074.pdf)/Code 2022 | **arXiv** | ColonFormer: An Efficient Transformer based Method for Colon Polyp Segmentation | [Paper](https://arxiv.org/pdf/2205.08473.pdf)/Code 2022 | **KBS** | MIA-Net: Multi-information aggregation network combining transformers and convolutional feature learning for polyp segmentation | [Paper](https://www.sciencedirect.com/science/article/pii/S0950705122003926)/Code 2022 | **Diagnostics** | Performance of Convolutional Neural Networks for Polyp Localization on Public Colonoscopy Image Datasets | [Paper](https://www.mdpi.com/2075-4418/12/4/898/htm)/Code 2022 | **IEEE TCyber** | PolypSeg+: A Lightweight Context-Aware Network for Real-Time Polyp Segmentation | [Paper](https://ieeexplore.ieee.org/abstract/document/9756512)/Code 2022 | **JCDE** | SwinE-Net: hybrid deep learning approach to novel polyp segmentation using convolutional neural network and Swin Transformer | [Paper](https://academic.oup.com/jcde/article/9/2/616/6564811?login=true)/Code 2022 | **IEEE JBHI** | Artificial Intelligence for Colonoscopy: Past, Present, and Future | [Paper](https://ieeexplore.ieee.org/document/9739863)/Code 2021 | **ICONIP** | Multi-scale Fusion Attention Network for Polyp Segmentation | [Paper](https://link.springer.com/chapter/10.1007/978-3-030-92310-5_19)/Code 2021 | **AAAI** | Precise yet Efficient Semantic Calibration and Refinement in ConvNets for Real-time Polyp Segmentation from Colonoscopy Videos | [Paper](https://www.aaai.org/AAAI21Papers/AAAI-5002.WuHS.pdf)/Code 2021 | **ICCV** | Collaborative and Adversarial Learning of Focused and Dispersive Representations for Semi-supervised Polyp Segmentation | [Paper](https://openaccess.thecvf.com/content/ICCV2021/papers/Wu_Collaborative_and_Adversarial_Learning_of_Focused_and_Dispersive_Representations_for_ICCV_2021_paper.pdf)/Code 2021 | **ACM MM** | UACANet: Uncertainty Augmented Context Attention for Polyp Segmentation | [Paper](https://dl.acm.org/doi/pdf/10.1145/3474085.3475375)/[Code](https://github.com/plemeri/UACANet) 2021 | **Healthcare** | TMD-Unet: Triple-Unet with Multi-Scale Input Features and Dense Skip Connection for Medical Image Segmentation | [Paper](https://www.researchgate.net/publication/348283572_TMD-Unet_Triple-Unet_with_Multi-Scale_Input_Features_and_Dense_Skip_Connection_for_Medical_Image_Segmentation)/Code 2021 | **ICPR** | DDANet: Dual decoder attention network for automatic polyp segmentation | [Paper](https://arxiv.org/pdf/2012.15245.pdf)/[Code](https://github.com/nikhilroxtomar/DDANet) 2021 | **ICDSIT** | Sa-HarDNeSt: A Self-Attention Network for Polyp Segmentation | [Paper](https://dl.acm.org/doi/pdf/10.1145/3478905.3478942)/Code 2021 | **IJCAI** | Medical image segmentation using squeeze-and-expansion transformers | [Paper](https://arxiv.org/pdf/2105.09511.pdf)/[Code](https://github.com/askerlee/segtran) 2021 | **IEEE ISBI** | DivergentNets: Medical Image Segmentation by Network Ensemble | [Paper](https://arxiv.org/pdf/2107.00283.pdf)/[Code](https://github.com/vlbthambawita/divergent-nets) 2021 | **IEEE JBHI** | A comprehensive study on colorectal polyp segmentation with resunet++, conditional random field and test-time augmentation | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9314114)/[Code](https://github.com/DebeshJha/ResUNetPlusPlus-with-CRF-and-TTA) 2021 | **IEEE JBHI** | Mutual-prototype adaptation for cross-domain polyp segmentation | [Paper](https://sci-hub.se/downloads/2021-05-24/1a/yang2021.pdf?rand=61ab86a0abb28?download=true)/[Code](https://github.com/CityU-AIM-Group/MPA-DA) 2021 | **MIA** | Dynamic-weighting hierarchical segmentation network for medical images | [Paper](https://pdf.sciencedirectassets.com/272154/1-s2.0-S1361841521X00059/1-s2.0-S1361841521002413/main.pdf?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEIf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQDZ0ITNVi%2BvzuLOUKPQQajvIwWt9hsidNnvAiweckL5rgIgQv2G7DZIc6bMCiIaipFlvaKM8zJWKR%2BJ75Q6tnwBWVQq%2BgMIYBAEGgwwNTkwMDM1NDY4NjUiDMa65PEpU2yP8TutHirXA2%2FvwxG2Px%2B5huEgdCPa%2BWft55sBiAK71F3Ebz%2Fj7AJ2EHzmyFNE4%2BtBAHS%2Ft2dAE0l9X1a8DBEj2hj%2BnQfo5lfkj1bS6gxEny5IHEKozs9X%2BAt1l1rv7PIPXN6Eb6%2B5%2FQe24O%2Bu6iJyeiTbUS2Pk3kZCiyIbVNRygfDn6j8l5Ye1JqLSl8zljMiZJBZKWfE9pekhQbKPi5pyqflJmiBhZFxuI2YGjOhQ0LhY0fQKIqrAu0AWvFXBdv%2BX%2FxLHBStP911TVbrCl6zCf4m1Y%2FYmrRkEiR343YY0VuJ%2Bswg8p23Lf7nQ3tcSvKaJ5WmpINOPl3O%2BA7AdiqOkvF2%2BklOIfqpNSn1kA591KHwjYh4tSIQdlVCBYQIOFb5DGD3hfnwUwery0DdMRC0H4vgChsyEPGUiI2JVgr6WytCvKl3%2Bnm%2FlaxUopskmpT7S5QGGIYkKy56VoBL1yBP4SqlU1bWe%2FbujaO6mq6i5xjPP8W2l4OlCZIwKX79dq17AIN%2B1cZZYp84zVSjYpyn8qSXkxCnf3x8UNKws2TzALIMQl9YQKloABt%2BvLQYoN7P6MINo%2FcB%2FaC6HP3yXMuho7oorZtPgU5P3IHpaHSxod3iKD5uvh87P2h2noQt%2FTCQ9K2NBjqlAQj%2FqBYVx%2BDjQ%2BXuOFSdHTIw2tRNrxOxQsEtmxdxy7ej5ttMEQKnq3rhA1K6ETIoS5IH5RNNgGzijDwYiNGXy4tJd7k3tmoFnoqicAqyycG%2FPu2gzvYGKhVNh9bxllTdijLBfJRTMKSMFwyH1W3JqFZUDbT9qO7bwqciDPMerP6zF0KdGfjBrY%2FPWYJf%2FWOdhSBBTE62hfm%2FW573OuLkI1KwU1dWrw%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20211204T153800Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAQ3PHCVTY4BAUX3ZO%2F20211204%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=80ffde177a66c02f8f70db6588324bca239c48bf4366af906e2cf876c11dbcea&hash=854aafe64938f04f8ee0bf386f4c92d77219c66c67bad994e7d8806082bdafd1&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=S1361841521002413&tid=spdf-875a5c1f-84ac-43d9-aff2-8406cae898df&sid=ceb35d509987134d700962537e5a5b60cfe0gxrqa&type=client)/[Code](https://github.com/CityU-AIM-Group/DW-HieraSeg) 2021 | **MICCAI** | Automatic Polyp Segmentation via Multi-scale Subtraction Network | [Paper](https://arxiv.org/pdf/2108.05082.pdf)/[Code](https://github.com/Xiaoqi-Zhao-DLUT/MSNet) 2021 | **MICCAI** | CCBANet: Cascading Context and Balancing Attention for Polyp Segmentation | [Paper*](https://link.springer.com/content/pdf/10.1007%2F978-3-030-87193-2.pdf)/[Code](https://github.com/ntcongvn/CCBANet) 2021 | **MICCAI** | Constrained Contrastive Distribution Learning for Unsupervised Anomaly Detection and Localisation in Medical Images | [Paper](https://arxiv.org/pdf/2103.03423.pdf)/[Code](https://arxiv.org/pdf/2103.03423.pdf) 2021 | **MICCAI** | Double Encoder-Decoder Networks for Gastrointestinal Polyp Segmentation | [Paper](https://arxiv.org/pdf/2110.01939.pdf)/Code 2021 | **MICCAI** | HRENet: A Hard Region Enhancement Network for Polyp Segmentation | [Paper*](https://link.springer.com/content/pdf/10.1007%2F978-3-030-87193-2.pdf)/[Code](https://github.com/CathySH/HRENet) 2021 | **MICCAI** | Few-Shot Domain Adaptation with Polymorphic Transformers | [Paper](https://arxiv.org/pdf/2107.04805.pdf)/[Code](https://github.com/askerlee/segtran) 2021 | **MICCAI** | Learnable Oriented-Derivative Network for Polyp Segmentation | [Paper*](https://link.springer.com/content/pdf/10.1007%2F978-3-030-87193-2.pdf)/[Code](https://github.com/midsdsy/LOD-Net) 2021 | **MICCAI** | Shallow attention network for polyp segmentation | [Paper](https://arxiv.org/pdf/2108.00882.pdf)/[Code](https://github.com/weijun88/SANet) 2021 | **MICCAI** | Transfuse: Fusing transformers and cnns for medical image segmentation | [Paper](https://arxiv.org/pdf/2102.08005.pdf)/Code 2021 | **MIDL** | Deep ensembles based on stochastic activation selection for polyp segmentation | [Paper](https://arxiv.org/pdf/2104.00850.pdf)/[Code](https://github.com/LorisNanni/Deep-ensembles-based-on-Stochastic-Activation-Selection-for-Polyp-Segmentation) 2021 | **NCBI** | MBFFNet: Multi-Branch Feature Fusion Network for Colonoscopy | [Paper](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8317500/pdf/fbioe-09-696251.pdf)/Code 2021 | **RIVF** | AG-CUResNeSt: A novel method for colon polyp segmentation | [Paper](https://arxiv.org/pdf/2105.00402.pdf)/Code 2021 | **Sensors** | A-DenseUNet: Adaptive Densely Connected UNet for Polyp Segmentation in Colonoscopy Images with Atrous Convolution | [Paper](https://www.mdpi.com/1424-8220/21/4/1441/pdf)/Code 2021 | **IEEE TIM** | Colon Polyp Detection and Segmentation Based on Improved MRCNN | [Paper](https://www.researchgate.net/publication/346985142_Colon_Polyp_Detection_and_Segmentation_based_on_improved_MRCNN)/Code 2021 | **IEEE TIM** | Polyp-Net A Multimodel Fusion Network for Polyp Segmentation | [Paper](https://drive.google.com/file/d/1isi_Blz9ZAK4iPH5wKcEVw4-FSYuqNxm/view)/Code 2021 | **IEEE TMI** | Graph-based Region and Boundary Aggregation for Biomedical Image Segmentation | [Paper](https://livrepository.liverpool.ac.uk/3140502/1/TMI_region_boundary2021.pdf)/[Code](https://github.com/smallmax00/Graph_Region_Boudnary) 2021 | **IEEE Access** | A Simple Generic Method for Effective Boundary Extraction in Medical Image Segmentation | [Paper*](https://ieeexplore.ieee.org/iel7/6287639/9312710/09495769.pdf)/Code 2021 | **IEEE Access** | CRF-EfficientUNet: An Improved UNet Framework for Polyp Segmentation in Colonoscopy Images With Combined Asymmetric Loss Function and CRF-RNN Layer | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9622208)/[Code](https://github.com/lethithuhong1302/CRF-EfficientUNet) 2021 | **IEEE Access** | Real-time polyp detection, localization and segmentation in colonoscopy using deep learning | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9369308)/[Code](https://github.com/DebeshJha/ColonSegNet) 2021 | **IEEE Access** | Training on Polar Image Transformations Improves Biomedical Image Segmentation | [Paper](https://ieeexplore.ieee.org/iel7/6287639/9312710/09551998.pdf)/Code 2021 | **BioMed** | Automated Classification and Segmentation in Colorectal Images Based on Self-Paced Transfer Network | [Paper](https://www.hindawi.com/journals/bmri/2021/6683931/)/Code 2021 | **CBM** | Focus U-Net: A novel dual attention-gated CNN for polyp segmentation during colonoscopy | [Paper](https://sci-hub.se/10.1016/j.compbiomed.2021.104815)/Code 2021 | **CBMS** | Nanonet: Real-time polyp segmentation in video capsule endoscopy and colonoscopy | [Paper](https://arxiv.org/pdf/2104.11138.pdf)/[Code](https://github.com/DebeshJha/NanoNet) 2021 | **CRV** | Enhanced u-net: A feature enhancement network for polyp segmentation | [Paper](https://arxiv.org/pdf/2105.00999.pdf)/[Code](https://github.com/rucv/Enhanced-U-Net) 2021 | **IEEE DDCLS** | MSB-Net: Multi-Scale Boundary Net for Polyp Segmentation | [Paper*](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9455514)/Code 2021 | **arXiv** | BI-GCN: Boundary-Aware Input-Dependent Graph Convolution Network for Biomedical Image Segmentation | [Paper](https://arxiv.org/pdf/2110.14775.pdf)/Code 2021 | **arXiv** | CaraNet: Context Axial Reverse Attention Network for Segmentation of Small Medical Objects | [Paper](https://arxiv.org/pdf/2108.07368.pdf)/Code 2021 | **arXiv** | DS-TransUNet: Dual Swin Transformer U-Net for Medical Image Segmentation | [Paper](https://arxiv.org/pdf/2106.06716.pdf)/Code 2021 | **arXiv** | Duplex contextual relation network for polyp segmentation | [Paper](https://arxiv.org/pdf/2103.06725)/[Code](https://github.com/PRIS-CV/DCRNet) 2021 | **arXiv** | Few-shot segmentation of medical images based on meta-learning with implicit gradients | [Paper](https://arxiv.org/pdf/2106.03223.pdf)/Code 2021 | **arXiv** | GMSRF-Net: An improved generalizability with global multi-scale residual fusion network for polyp segmentation | [Paper](https://arxiv.org/pdf/2111.10614.pdf)/Code 2021 | **arXiv** | Hardnet-mseg: A simple encoder-decoder polyp segmentation neural network that achieves over 0.9 mean dice and 86 fps | [Paper](https://arxiv.org/pdf/2101.07172.pdf)/[Code](https://github.com/james128333/HarDNet-MSEG) 2021 | **arXiv** | NeoUNet: Towards accurate colon polyp segmentation and neoplasm detection | [Paper](https://arxiv.org/pdf/2107.05023.pdf)/Code 2021 | **arXiv** | Polyp segmentation in colonoscopy images using u-net-mobilenetv2 | [Paper](https://arxiv.org/pdf/2103.15715.pdf)/Code 2021 | **arXiv** | Polyp-PVT: Polyp Segmentation with Pyramid Vision Transformers | [Paper](https://arxiv.org/pdf/2108.06932.pdf)/[Code](https://github.com/DengPingFan/Polyp-PVT) 2021 | **arXiv** | Self-supervised Multi-class Pre-training for Unsupervised Anomaly Detection and Segmentation in Medical Images | [Paper](https://arxiv.org/pdf/2109.01303.pdf)/Code 2020 | **IEEE Access** | Contour-Aware Polyp Segmentation in Colonoscopy Images Using Detailed Upsamling Encoder-Decoder Networks | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9096362)/Code 2020 | **arXiv** | Automatic polyp segmentation using convolution neural networks | [Paper](https://arxiv.org/pdf/2004.10792)/Code 2020 | **arXiv** | Boundary-aware Context Neural Network for Medical Image Segmentation | [Paper](https://arxiv.org/pdf/2005.00966v1.pdf)/Code 2020 | **CBMS** | DoubleU-Net A Deep Convolutional Neural Network for Medical Image Segmentation | [Paper](https://arxiv.org/pdf/2006.04868)/[Code](https://github.com/DebeshJha/2020-CBMS-DoubleU-Net) 2020 | **HYDCON** | Polyps Segmentation using Fuzzy Thresholding in HSV Color Space | [Paper](https://sci-hub.se/downloads/2020-12-21/80/mandal2020.pdf?rand=61ab7ececa47d?download=true)/Code 2020 | **ICARM** | Real-time Colonoscopy Image Segmentation Based on Ensemble Knowledge Distillation | [Paper](https://sci-hub.se/downloads/2020-11-09/c2/huang2020.pdf?rand=61ab7ea22a48f?download=true)/Code 2020 | **IEEE ISBI** | SSN A stair-shape network for real-time polyp segmentation in colonoscopy images | [Paper](https://www.researchgate.net/profile/Ruiwei-Feng/publication/339782695_SSN_A_Stair-Shape_Network_for_Real-Time_Polyp_Segmentation_in_Colonoscopy_Images/links/5e6af8dd458515e555765049/SSN-A-Stair-Shape-Network-for-Real-Time-Polyp-Segmentation-in-Colonoscopy-Images.pdf)/Code 2020 | **IEEE JBHI** | Multi-scale Context-guided Deep Network for Automated Lesion Segmentation with Endoscopy Images of Gastrointestinal Tract | [Paper](https://sci-hub.se/downloads/2020-06-18//db/wang2020.pdf?rand=61ab7f237a9b5?download=true)/Code 2020 | **MIA** | Uncertainty and interpretability in convolutional neural networks for semantic segmentation of colorectal polyps | [Paper](https://pdf.sciencedirectassets.com/272154/1-s2.0-S1361841519X00092/1-s2.0-S1361841519301574/main.pdf?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEKv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJGMEQCIFCBk%2FghOtJfQ62sxbB63ru7AOYC0IBJiHp%2F3HJrjaU%2BAiANHFD06HdAPYve6VbMJLdwkGojYzHjBDAL846ZP7ss%2FiqDBAiE%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAQaDDA1OTAwMzU0Njg2NSIMdDwgrrObNsP3CG8sKtcDt1QC6gBH9G9et33b26BRppTYQZooVUiDID3EvU6AAsmbNCFp5XTmqgUR5rK90BvBAFaKmWgIVzhR4Hzs2%2B1knkFJMtwbhLFSPjSZ6rx%2F4%2FNI2%2FyPeMY5xN3Qd1WdRfM4I2HF%2Bmy3aO2xX1j4IchIAPhE5FVuXDE1BqWefNjTLHxO9Or7i8FcFktna%2BIB1nwJVuXieWZNtyQhRwa%2BxRDo1TvyBZ3FXQ4UJ0wVQT6ndB6eBuKMXRJrDlBzg2MlKxIII65ufBi5GhVJLqY7lZCfFswH2DhBXPYvmnqi64jrhPq6i4iRMUu%2FU78b7ibopJgGeWbqg337XQppOTHxiGxll2dhvJ7PMb%2FtN7WTWIkEYGvPr%2FPCf9ccgo%2FJDJZi7n7u9h1%2BEqNU5YPgHmW6N%2FIK19qSQrUbKq35DMgajbzHtqOSUYww2g%2FeLn8Ymyty19TwfWC%2BWAJfT%2BgbJGt3c%2BhEudC2o%2B37%2BZ9dlAjFdGLNrV%2BEb2sTR6753%2FLynwAL2ateKKWQkTrurKfmPIdVnj0J7mLMx9QXZI0nrVzjc8RJt%2FIGuDLHJbpgNjZcRPrV5G0d%2BfeO2n%2F3uy2Bxmh%2BQoKt%2F3m9xLE5SpVjjF5U%2FwjhAWQbZZPDdc8rMJ7otY0GOqYB%2Fg32ebRx9MGzVLDd%2B6k8bZYrtUqolCXWBBNThlnmcVMKVY%2FXxcRh2K1y5BGXKlDivpzB9dYmRfT3B81mJ3DG4OlS5n1voC8yiHIn0qleoeyvgph8qbuFlPJlxq51MXcHA1VZtvJYe5wTrznNCmZqP4Bby56JSVVdlzMrS%2FPhdDZBRjzqAzGO8nNvIeCgznTGUd1hpFqC5rjRCshoaxJG5V%2B3lR5qXg%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20211206T032453Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAQ3PHCVTYRZJU4SVY%2F20211206%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=c5b143cc04624e9d899f887cf83cf56cbf27185fa0286d447629ed2a306a7612&hash=7bbe140b0a4e1e4a8c73f2b402ff854818267ec8f356b261b5b3cb4e78e96de0&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=S1361841519301574&tid=spdf-a7da0fef-5b1e-49c4-a75e-040ca7ca6680&sid=ec558a9b400130484b2a9f1-fbc4af95d4adgxrqa&type=client)/Code 2020 | **MICCAI** | Adaptive context selection for polyp segmentation | [Paper](https://www.sysuhcp.com/userfiles/files/2021/02/28/8323749c38f384d5.pdf)/[Code](https://github.com/ReaFly/ACSNet) 2020 | **MICCAI** | Mi2gan: Generative adversarial network for medical image domain adaptation using mutual information constraint | [Paper](https://arxiv.org/pdf/2007.11180.pdf)/Code 2020 | **MICCAI** | Pranet: Parallel reverse attention network for polyp segmentation | [Paper](https://arxiv.org/pdf/2006.11392.pdf)/[Code](https://github.com/DengPingFan/PraNet) 2020 | **MICCAI** | PolypSeg: An Efficient Context-Aware Network for Polyp Segmentation from Colonoscopy Videos | [Paper](https://link.springer.com/chapter/10.1007%2F978-3-030-59725-2_28)/Code 2020 | **MIUA** | Polyp Segmentation with Fully Convolutional Deep Dilation Neural Network | [Paper*](https://link.springer.com/content/pdf/10.1007%2F978-3-030-39343-4.pdf)/Code 2020 | **RIVF** | Polyp Segmentation in Colonoscopy Images Using Ensembles of U-Nets with EfficientNet and Asymmetric Similarity Loss Function | [Paper](http://eprints.uet.vnu.edu.vn/eprints/id/document/3713)/Code 2020 | **Sensors** | ABC-Net Area-Boundary Constraint Network with Dynamical Feature Selection for Colorectal Polyp Segmentation | [Paper](https://sci-hub.se/downloads/2020-08-26/00/fang2020.pdf?rand=61ab823b2d3f0?download=true)/Code 2020 | **IEEE TMI** | Learn to Threshold: Thresholdnet with confidence-guided manifold mixup for polyp segmentation | [Paper](https://www.researchgate.net/publication/347925189_Learn_to_Threshold_ThresholdNet_With_Confidence-Guided_Manifold_Mixup_for_Polyp_Segmentation)/[Code](https://github.com/Guo-Xiaoqing/ThresholdNet) 2019 | **IEEE Access** | Ensemble of Instance Segmentation Models for Polyp Segmentation in Colonoscopy Images | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8648333)/Code 2019 | **CBMS** | Training Data Enhancements for Robust Polyp Segmentation in Colonoscopy Images | [Paper](https://webserver2.tecgraf.puc-rio.br/~abraposo/pubs/CBMS2019/08787526.pdf)/Code 2019 | **EMBC** | Psi-net: Shape and boundary aware joint multi-task deep network for medical image segmentation | [Paper](https://arxiv.org/pdf/1902.04099.pdf)/[Code](https://github.com/Bala93/Multi-task-deep-network) 2019 | **EMBC** | Polyp Segmentation using Generative Adversarial Network | [Paper*](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8857958)/Code 2019 | **ICMLA** | Colorectal polyp segmentation by u-net with dilation convolution | [Paper](https://arxiv.org/pdf/1912.11947.pdf)/Code 2019 | **ISM** | ResUNet++: An Advanced Architecture for Medical Image Segmentation | [Paper](https://arxiv.org/pdf/1911.07067.pdf)/[Code](https://github.com/DebeshJha/ResUNetPlusPlus-with-CRF-and-TTA) 2019 | **ISMICT** | Polyp detection and segmentation using mask r-cnn: Does a deeper feature extractor cnn always perform better? | [Paper](https://arxiv.org/pdf/1907.09180.pdf)/Code 2019 | **MICCAI** | Selective Feature Aggregation Network with Area-Boundary Constraints for Polyp Segmentation | [Paper*](https://link.springer.com/content/pdf/10.1007%2F978-3-030-32239-7.pdf)/Code 2019 | **Nature** | U-Net – Deep Learning for Cell Counting, Detection, and Morphometry | [Paper](https://lmb.informatik.uni-freiburg.de/Publications/2019/FMBCAMBBR19/paper-U-Net.pdf)/[Code](https://lmb.informatik.uni-freiburg.de/resources/opensource/unet/) 2018 | **DLMIA** | Unet++: A nested u-net architecture for medical image segmentation | [Paper](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7329239/pdf/nihms-1600717.pdf)/[Code](https://github.com/MrGiovanni/UNetPlusPlus) 2018 | **EMBC** | Polyp segmentation in colonoscopy images using fully convolutional network | [Paper](https://arxiv.org/pdf/1802.00368.pdf)/Code 2018 | **ICMM** | Real-Time Polyps Segmentation for Colonoscopy Video Frames Using Compressed Fully Convolutional Network | [Paper](https://www.researchgate.net/profile/Can-Udomcharoenchaikit/publication/322424455_Real-Time_Polyps_Segmentation_for_Colonoscopy_Video_Frames_Using_Compressed_Fully_Convolutional_Network/links/5ecc209a299bf1c09adf5049/Real-Time-Polyps-Segmentation-for-Colonoscopy-Video-Frames-Using-Compressed-Fully-Convolutional-Network.pdf)/Code 2018 | **JMRR** | Towards a computed-aided diagnosis system in colonoscopy | [Paper](https://arxiv.org/pdf/2101.06040.pdf)/Code 2018 | **Medical Robotics Res** | Automatic polyp segmentation using convolution neural networks | [Paper](https://arxiv.org/pdf/2004.10792.pdf)/Code 2017 | **ISOP** | Fully convolutional neural networks for polyp segmentation in colonoscopy | [Paper](https://discovery.ucl.ac.uk/id/eprint/1540136/7/Rosa%20Brandao_101340F.pdf)/Code 2017 | **SPMB** | Superpixel based segmentation and classification of polyps in wireless capsule endoscopy | [Paper](https://arxiv.org/pdf/1710.07390.pdf)/Code 2016 | **IEEE TMI** | Convolutional Neural Networks for Medical Image Analysis: Full Training or Fine Tuning? | [Paper](https://arxiv.org/pdf/1706.00712.pdf)/Code 2016 | **ComNet** | Advanced Algorithm for Polyp Detection Using Depth Segmentation in Colon Endoscopy | [Paper*](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7824010)/Code 2015 | **MICCAI** | U-net: Convolutional networks for biomedical image segmentation | [Paper](https://link.springer.com/content/pdf/10.1007/978-3-319-24574-4_28.pdf)/[Code](https://lmb.informatik.uni-freiburg.de/resources/opensource/unet/) 2015 | **CMIG** | WM-DOVA Maps for Accurate Polyp Highlighting in Colonoscopy: Validation vs. Saliency Maps from Physicians | [Paper](http://158.109.8.37/files/BSF2015.pdf)/[Code](https://polyp.grand-challenge.org/CVCClinicDB/) 2014 | **IJPRAI** | A complete system for candidate polyps detection in virtual colonoscopy | [Paper](https://arxiv.org/pdf/1209.6525.pdf)/Code 2014 | **IEEE TMI** | Automated polyp detection in colon capsule endoscopy | [Paper](https://arxiv.org/pdf/1305.1912.pdf)/Code 2012 | **PR** | Towards Automatic Polyp Detection with a Polyp Appearance Model | [Paper](http://refbase.cvc.uab.es/files/BSV2012a.pdf)/Code 2010 | **IEEE ICASSP** | Polyp detection in Wireless Capsule Endoscopy videos based on image segmentation and geometric feature | [Paper*](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=5495103)/Code [Back to top](#1-preview) ## 2.2 Video Polyp Segmentation **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2022 | **MICCAI** | Semi-Supervised Spatial Temporal Attention Network for Video Polyp Segmentation | [Paper](https://link.springer.com/chapter/10.1007/978-3-031-16440-8_44)/Code 2022 | **AAAI** | TCCNet: Temporally Consistent Context-Free Network for Semi-supervised Video Polyp Segmentation | [Paper](https://www.ijcai.org/proceedings/2022/0155.pdf)/Code 2022 | **MIR** | :fire: Video Polyp Segmentation: A Deep Learning Perspective | [Paper](https://arxiv.org/pdf/2203.14291v3.pdf)/[Code](https://github.com/GewelsJI/VPS) 2021 | **MICCAI** | Progressively Normalized Self-Attention Network for Video Polyp Segmentation | [Paper](https://arxiv.org/pdf/2105.08468.pdf)/[Code](https://github.com/GewelsJI/PNS-Net) 2020 | **MICCAI** | Endoscopic Polyp Segmentation Using a Hybrid 2D/3D CNN | [Paper](https://discovery.ucl.ac.uk/id/eprint/10114066/1/Endoscopic%20polyp%20segmentation%20using%20a%20hybrid%202D-3D%20CNN.pdf)/Code [Back to top](#1-preview) ## 2.3 Image Polyp Detection **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2022 | **JSJU** | Improving Colonoscopy Polyp Detection Rate Using Semi-Supervised Learning | [Paper](https://link.springer.com/content/pdf/10.1007/s12204-022-2519-1.pdf?pdf=inline%20link)/Code 2022 | **IJCARS** | Positive-gradient-weighted object activation mapping: visual explanation of object detector towards precise colorectal-polyp localisation | [Paper](https://link.springer.com/content/pdf/10.1007/s11548-022-02696-y.pdf)/Code 2022 | **arXiv** | Colonoscopy polyp detection with massive endoscopic images | [Paper](https://arxiv.org/pdf/2202.08730.pdf)/Code 2021 | **arXiv** | Detecting, Localising and Classifying Polyps from Colonoscopy Videos using Deep Learning | [Paper](https://arxiv.org/pdf/2101.03285.pdf)/Code 2020 | **Scientific Data** | HyperKvasir, a comprehensive multi-class image and video dataset for gastrointestinal endoscopy | [Paper](https://www.nature.com/articles/s41597-020-00622-y.pdf)/[Project](https://datasets.simula.no/hyper-kvasir/) 2020 | **Scientific reports** | Real-time detection of colon polyps during colonoscopy using deep learning: systematic validation with four independent datasets | [Paper](https://www.nature.com/articles/s41598-020-65387-1)/Code 2020 | **IEEE ISBI** | Reduce false-positive rate by active learning for automatic polyp detection in colonoscopy videos | [Paper](https://www.researchgate.net/profile/Zhe-Guo-12/publication/322563091_Automatic_polyp_recognition_from_colonoscopy_images_based_on_bag_of_visual_words/links/5f9b60a7299bf1b53e512f47/Automatic-polyp-recognition-from-colonoscopy-images-based-on-bag-of-visual-words.pdf)/Code 2020 | **TransAI** | Artifact Detection in Endoscopic Video with Deep Convolutional Neural Networks | [Paper*](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9253131)/Code 2019 | **IEEE Access** | Colonic Polyp Detection in Endoscopic Videos With Single Shot Detection Based Deep Convolutional Neural Network | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8731913)/Code 2019 | **ICTAI** | An Efficient Spatial-Temporal Polyp Detection Framework for Colonoscopy Video | [Paper*](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8995313)/Code 2018 | **arXiv** | Y-net: A deep convolutional neural network for polyp detection | [Paper](https://arxiv.org/pdf/1806.01907.pdf)/Code 2017 | **IEEE TMI** | Comparative validation of polyp detection methods in video colonoscopy: results from the MICCAI 2015 endoscopic vision challenge | [Paper](http://clok.uclan.ac.uk/17023/2/17023%20Final%20Version.pdf)/Code 2015 | **IEEE TMI** | Automated Polyp Detection in Colonoscopy Videos Using Shape and Context Information | [Paper](https://sci-hub.se/10.1109/tmi.2015.2487997)/Code 2009 | **Bildverarbeitung fur die Medizin** | Texturebased polyp detection in colonoscopy | [Paper](http://ftp.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-446/p346.pdf)/Code 2009 | **Proc. SPIE** | A comparison of blood vessel features and local binary patterns for colorectal polyp classification | [Paper](https://www.lfb.rwth-aachen.de/files/publications/2009/GRO09a.pdf)/Code 2007 | **IEEE ICIP** | Polyp detection in colonoscopy video using elliptical shape feature | [Paper](https://projet.liris.cnrs.fr/imagine/pub/proceedings/ICIP-2007/pdfs/0200465.pdf)/Code [Back to top](#1-preview) ## 2.4 Video Polyp Detection **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2023 | **MICCAI** | Self-Supervised Polyp Re-Identification in Colonoscopy | [Paper](https://arxiv.org/pdf/2306.08591.pdf)/[Code]() 2023 | **MICCAI** | YONA: You Only Need One Adjacent Reference-frame for Accurate and Fast Video Polyp Detection | [Paper](https://arxiv.org/pdf/2306.03686.pdf)/[Code]() 2021 | **BSPC** | Real-time automatic polyp detection in colonoscopy using feature enhancement module and spatiotemporal similarity correlation unit | [Paper](https://sci-hub.se/10.1016/j.bspc.2021.102503)/Code 2021 | **EIO** | Real-time deep learning-based colorectal polyp localization on clinical video footage achievable with a wide array of hardware configurations | [Paper](https://www.thieme-connect.com/products/ejournals/pdf/10.1055/a-1388-6735.pdf)/Code 2021 | **MICCAI** | Multi-frame Collaboration for Effective Endoscopic Video Polyp Detection via Spatial-Temporal Feature Transformation | [Paper](https://arxiv.org/pdf/2107.03609.pdf)/[Code](https://github.com/lingyunwu14/STFT) 2020 | **IEEE ISBI** | Polyp detection in colonoscopy videos by bootstrapping via temporal consistency | [Paper](https://sci-hub.se/downloads/2020-06-30//41/ma2020.pdf?rand=61ab92dda2f6b?download=true)/Code 2020 | **JBHI** | Improving Automatic Polyp Detection Using CNN by Exploiting Temporal Dependency in Colonoscopy Video | [Paper](https://ntnuopen.ntnu.no/ntnu-xmlui/bitstream/handle/11250/2723541/Improving+Automatic+Polyp+Detection+Using+CNN.pdf?sequence=2)/Code 2020 | **NPJ Digital Medicine** | AI-doscopist: a real-time deep-learning-based algorithm for localising polyps in colonoscopy videos with edge computing devices | [Paper](https://www.nature.com/articles/s41746-020-0281-z.pdf)/Code 2020 | **MICCAI** | Asynchronous in Parallel Detection and Tracking (AIPDT): Real-Time Robust Polyp Detection | [Paper*](https://link.springer.com/content/pdf/10.1007%2F978-3-030-59716-0.pdf)/Code 2019 | **IEEE ISBI** | POLYP TRACKING IN VIDEO COLONOSCOPY USING OPTICAL FLOW WITH AN ON-THE-FLY TRAINED CNN | [Paper*](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8759180)/Code 2017 | **JBHI** | Integrating Online and Offline Three-Dimensional Deep Learning for Automated Polyp Detection in Colonoscopy Videos | [Paper](http://www.cse.cuhk.edu.hk/~qdou/papers/2017/%5B2017%5D%5BJBHI%5DIntegrating%20online%20and%20offline%20three%20dimensional%20deep%20learning%20for%20automated%20polyp%20detection%20in%20colonoscopy%20videos.pdf)/Code 2015 | **IPMI** | A Comprehensive Computer-Aided Polyp Detection System for Colonoscopy Videos | [Paper](https://link.springer.com/content/pdf/10.1007/978-3-319-19992-4_25.pdf?pdf=inline%20link)/Code 2015 | **IEEE ISBI** | Automatic polyp detection in colonoscopy videos using an ensemble of convolutional neural networks | [Paper](https://www.researchgate.net/profile/Nima-Tajbakhsh/publication/283464973_Automatic_polyp_detection_in_colonoscopy_videos_using_an_ensemble_of_convolutional_neural_networks/links/5718b4a708aed43f63221b27/Automatic-polyp-detection-in-colonoscopy-videos-using-an-ensemble-of-convolutional-neural-networks.pdf)/Code [Back to top](#1-preview) ## 2.5 Image Polyp Classification **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2022 | **MICCAI** | FFCNet: Fourier Transform-Based Frequency Learning and Complex Convolutional Network for Colon Disease Classification | [Paper](https://link.springer.com/chapter/10.1007/978-3-031-16437-8_8)/[Code](https://github.com/soleilssss/FFCNet) 2022 | **MICCAI** | Toward Clinically Assisted Colorectal Polyp Recognition via Structured Cross-Modal Representation Consistency | [Paper](https://link.springer.com/content/pdf/10.1007/978-3-031-16437-8_14.pdf)/[Code](https://github.com/WeijieMax/CPC-Trans) 2020 | **IEEE ISBI** | Photoshopping Colonoscopy Video Frames | [Paper](https://arxiv.org/pdf/1910.10345v1.pdf)/Code 2020 | **MICCAI** | Few-Shot Anomaly Detection for Polyp Frames from Colonoscopy | [Paper](https://arxiv.org/pdf/2006.14811.pdf)/[Code](https://github.com/tianyu0207/FSAD-Net%20) 2020 | **MICCAI** | Two-Stream Deep Feature Modelling for Automated Video Endoscopy Data Analysis | [Paper](https://arxiv.org/pdf/2007.05914.pdf)/Code 2014 | **JICARS** | Towards embedded detection of polyps in WCE images for early diagnosis of colorectal cancer | [Paper](https://hal.archives-ouvertes.fr/hal-00843459/document)/[Code](https://polyp.grand-challenge.org/EtisLarib/) [Back to top](#1-preview) ## 2.6 Video Polyp Classification **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2022 | **MICCAI** | Contrastive Transformer-based Multiple Instance Learning for Weakly Supervised Polyp Frame Detection | [Paper](https://link.springer.com/content/pdf/10.1007/978-3-031-16437-8_9.pdf)/[Code](https://github.com/tianyu0207/weakly-polyp) [Back to top](#1-preview) ## 2.7 Colonoscopy Depth Estimation **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2022 | **arXiv** | Task-Aware Active Learning for Endoscopic Image Analysis | [Paper](https://arxiv.org/pdf/2204.03440.pdf)/[Code](https://github.com/thetna/endo-active-learn) 2019 | **IJCARS** | Implicit domain adaptation with conditional generative adversarial networks for depth prediction in endoscopy | [Paper](https://link.springer.com/content/pdf/10.1007/s11548-019-01962-w.pdf?pdf=button%20sticky)/Code/[Project](http://cmic.cs.ucl.ac.uk/ColonoscopyDepth) [Back to top](#1-preview) ## 2.8 Colonoscopy Deficient Coverage Detection **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2020 | **IEEE TMI** | Detecting Deficient Coverage in Colonoscopies | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9097918)/Code [Back to top](#1-preview) ## 2.9 Colon Polyp Image Synthesis **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2018 | **IEEE Access** | Abnormal Colon Polyp Image Synthesis Using Conditional Adversarial Networks for Improved Detection Performance | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8478237)/Code [Back to top](#1-preview) # 3. Colonoscopy Resources ## 3.1 Image Segmentation Datasets **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2022 | **arXiv** | ERS: a novel comprehensive endoscopy image dataset for machine learning, compliant with the MST 3.0 specification | [Paper](https://arxiv.org/abs/2201.08746)/[Project](https://cvlab.eti.pg.gda.pl/publications/endoscopy-dataset) 2022 | **arXiv** | Synthetic data for unsupervised polyp segmentation | [Paper](https://arxiv.org/pdf/2202.08680.pdf)/[Code](https://github.com/enric1994/synth-colon)/[Project](https://enric1994.github.io/synth-colon) 2020 | **ICMM** | **Kvasir-SEG** - Kvasir-seg: A segmented polyp dataset | [Paper](https://arxiv.org/pdf/1911.07069.pdf)/[Code](https://datasets.simula.no/kvasir-seg/) 2017 | **JHE** | **CVC-EndoSceneStill** - A Benchmark for Endoluminal Scene Segmentation of Colonoscopy Images | [Paper](https://downloads.hindawi.com/journals/jhe/2017/4037190.pdf)/[Project](http://www.cvc.uab.es/CVC-Colon/index.php/databases/cvc-endoscenestill/) 2015 | **CMIG** | **CVC-ClinicDB/CVC-612** - WM-DOVA Maps for Accurate Polyp Highlighting in Colonoscopy: Validation vs. Saliency Maps from Physicians | [Paper](http://158.109.8.37/files/BSF2015.pdf)/[Project](https://polyp.grand-challenge.org/CVCClinicDB/) 2014 | **JICARS** | **ETIS-Larib Polyp DB** - Towards embedded detection of polyps in WCE images for early diagnosis of colorectal cancer | [Paper](https://hal.archives-ouvertes.fr/hal-00843459/document)/[Project](https://polyp.grand-challenge.org/EtisLarib/) 2012 | **PR** | **CVC-ColonDB/CVC-300** - Towards Automatic Polyp Detection with a Polyp Appearance Model | [Paper](http://refbase.cvc.uab.es/files/BSV2012a.pdf)/[Project](http://mv.cvc.uab.es/projects/colon-qa/cvccolondb) _ | _ | **PICCOLO** - PICCOLO RGB/NBI (WIDEFIELD) IMAGE COLLECTION | [Project](https://www.biobancovasco.org/en/Sample-and-data-catalog/Databases/PD178-PICCOLO-EN.html) [Back to top](#1-preview) ## 3.2 Video Segmentation Datasets **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2017 | **EIO** | **KID Project** - KID Project: an internet-based digital video atlas of capsule endoscopy for research purposes | [Paper](https://www.thieme-connect.de/products/ejournals/pdf/10.1055/s-0043-105488.pdf)/[Project](https://mdss.uth.gr/datasets/endoscopy/kid/) 2015 | **IEEE TMI** | **ASU-Mayo** - Automated Polyp Detection in Colonoscopy Videos Using Shape and Context Information | [Paper](https://sci-hub.se/10.1109/tmi.2015.2487997)/[Project](https://polyp.grand-challenge.org/AsuMayo/) [Back to top](#1-preview) ## 3.3 Video Detection Datasets **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2021 | **GE** | **SUN Dataset** - Development of a computer-aided detection system for colonoscopy and a publicly accessible large colonoscopy video database (with video) | [Paper](https://pdf.sciencedirectassets.com/273305/1-s2.0-S0016510721X0003X/1-s2.0-S0016510720346551/main.pdf?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEKr%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCoLFri%2FY2qg1dLegjpE85SraDAQgXni4AstwVHir31FQIgSZt7d3LRM%2FDWZnrG2ob5NXTOCC6qrgtukFoyETMmG60qgwQIg%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARAEGgwwNTkwMDM1NDY4NjUiDB6thtIGUd1PJRcyGSrXA7YEHw4pAwRRjdUrDIBxA5b6lw7vwYqXu%2FkyL8wa5THz4ls%2BQ79EWG1w9zl7j9F6A9bkKlVvKCAb0oi3e03KthHn%2B8g0l4OC0qix4pb0UUwreyZgOjArd70QgeuNuGUMxagQQ0SWaQUG%2FO%2B24Zr7sqoJjCFuNyulHGwblX4JXXI9rhGeb2yWr%2FKRTmbwiCKzuerSPMtbyJGK72cZ5qWuriDfQfUoNqKp49hRkitn7ZzSrz0wayxDzK6PKgPXLzx60HsPBgz%2BcPDKsLlEKdrtnOHcpTzINtfACgeTkvm8QP5WQq9SQO4PNfOgWKMEBxkkXqNaXRlWCriDE8ikkIxTS1wg1bBzX6bbq2VXPQ1HWzoCozUUBpla1%2FNddRj3cOdWNPV1CMDZKivYiFQGuB5ARoL7ijrhNH0igSNRe2WKoerxDKdKfOVmaRm9TYwuqVN6jS%2B1nS%2Bd2yY090PHBWsBHK0ZC2ACs2gHTJdafVkDObbFKhyzU3%2B6Q3rVKCjC6Rw4sJnNz0xsDPfGKVZ%2Ffhh4QAzGdJi18NBSmADUbEEXgV8gYg6HgQvblxNqFcwrZqmCab0QYWvg6q0%2FqyJhEYcmVWEdQJr9wVCWHGNSe2%2BPFfKR51sURtmWBTCX17WNBjqlAXvCE4xPsYowWXOcK%2BWOREDfMffE6zUdWetTgKWtCjFFzqvbe%2BaeKWuwEHXQl9BuG1rERZT9fY9aEEEVE2q6W0cDvbkVuFnmXxmhpxYw6qlJisddMBTNDWrLB8llUO0sASdIj0uz5uKqE1zqL%2FLVS1CRv1fbYzdXwEyRqrHux3g%2BArKgJ5uq92X9jznB8E0mLTrlwVI7OYjZfLdkwIXvry008gFE9g%3D%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20211206T024723Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAQ3PHCVTYQITTHIVX%2F20211206%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=9dff7d4ff54b70360044a95447978a4d661455c0e978adb1d72f44a240f04b2c&hash=96a9a5b1c7f87db2e806041f936f9262c2f0f8a853107bf2f6ef69c9e1d1db35&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=S0016510720346551&tid=spdf-21d9a413-2601-4aaf-8815-4d4142eeaf04&sid=ec558a9b400130484b2a9f1-fbc4af95d4adgxrqa&type=client)/[Project](http://amed8k.sundatabase.org) [Back to top](#1-preview) ## 3.4 Video Classification Datasets **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2020 | **Scientific Data** | **HyperKvasir** - HyperKvasir, a comprehensive multi-class image and video dataset for gastrointestinal endoscopy | [Paper](https://www.nature.com/articles/s41597-020-00622-y.pdf)/[Project](https://datasets.simula.no/hyper-kvasir/) 2017 | **ACM MSC** | **Kvasir** - KVASIR: A Multi-Class Image Dataset for Computer Aided Gastrointestinal Disease Detection | [Paper](https://dl.acm.org/doi/pdf/10.1145/3083187.3083212)/[Project](https://datasets.simula.no/kvasir/) 2017 | **IEEE TMI** | **Colonoscopic Dataset** - Computer-Aided Classification of Gastrointestinal Lesions in Regular Colonoscopy | [Paper](https://hal.archives-ouvertes.fr/hal-01291797/document)/[Project](http://www.depeca.uah.es/colonoscopy_dataset/) [Back to top](#1-preview) ## 3.5 Colonoscopy Depth Datasets **Yr.** | **Pub.** | **Title** | **Links** :-: | :-: | :- | :-: 2020 | **IEEE TMI** | Detecting Deficient Coverage in Colonoscopies | [Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9097918)/Code/[Project](https://dl.google.com/datasets/CC20/Google-CC20-dataset.tar.gz) 2019 | **IJCARS** | Implicit domain adaptation with conditional generative adversarial networks for depth prediction in endoscopy | [Paper](https://link.springer.com/content/pdf/10.1007/s11548-019-01962-w.pdf)/[Project](http://cmic.cs.ucl.ac.uk/ColonoscopyDepth/) [Back to top](#1-preview) # 4. Useful Resources ## 4.1 Colonoscopy Related - [Deep Learning for Colonoscopy](https://github.com/GewelsJI/deep-learning-colonoscopy) ## 4.2 AI Conference Deadlines - [Acceptance Rate for AI Conferences](https://github.com/lixin4ever/Conference-Acceptance-Rate) - [AI Conference Deadlines](https://aideadlin.es/?sub=ML,CV,NLP,RO,SP,DM) [Back to top](#1-preview) ================================================ FILE: docs/DATA_DESCRIPTION.md ================================================ # The Descriptions of SUN-SEG Dataset


We first introduce a high-quality per-frame annotated VPS dataset, named SUN-SEG, which includes 158,690 frames from the famous SUN dataset. We extend the labels with diverse types, i.e., object mask, boundary, scribble, polygon, and visual attribute. We also introduce the pathological information from the original [SUN dataset](http://sundatabase.org/), including pathological classification labels, location information, and shape information. Notably, the origin SUN dataset has 113 colonoscopy videos, including 100 positive cases with 49, 136 polyp frames and 13 negative cases with 109, 554 non-polyp frames. We manually trim them into 378 positive and 728 negative short clips, meanwhile maintaining their intrinsic consecutive relationship. Such data pre-processing ensures each clip has around 3~11s duration at a real-time frame rate (i.e., 30 fps), which promotes the fault-tolerant margin for various algorithms and devices. To this end, the re-organized SUN-SEG contains 1, 106 short video clips with 158, 690 video frames totally, offering a solid foundation to build a representative benchmark. As such, it yields the final version of our SUN-SEG dataset, which includes 49,136 polyp frames (i.e., positive part) and 109,554 non-polyp frames (i.e., negative part) taken from different 285 and 728 colonoscopy videos clips, as well as the corresponding annotations. The following sections will provide details about our SUN-SEG point-by-point. - [The Descriptions of SUN-SEG Dataset](#the-descriptions-of-sun-seg-dataset) - [File Tree Organization](#file-tree-organization) - [Dataset Statistics](#dataset-statistics) - [Positive Part](#positive-part) - [Negative Part](#negative-part) - [Label Description](#label-description) - [Label-I: Category Classification Annotation](#label-i-category-classification-annotation) - [Label-II: Object Mask](#label-ii-object-mask) - [Label-III: Bounding Box](#label-iii-bounding-box) - [Label-IV: Boundary](#label-iv-boundary) - [Label-V: Two Weak Labels (Scribble & Polygon)](#label-v-two-weak-labels-scribble--polygon) - [Label-VI: Attributes Description](#label-vi-attributes-description) - [Rejected Labels](#rejected-labels) - [Citations](#citations) - [Reference](#reference) # File Tree Organization The `Frame` folder contains the frames and the rest folders contain the corresponding ground truth. As for the `bbox_annotation.json` and `classfication.txt` text files, we follow the same format as COCO and ImageNet for generality. ``` ├──data ├──SUN-SEG ├──TrainDataset ├──Frame # The images from SUN dataset ├──case1_1 ├──image_name_00001.jpg |... ├──case1_3 |... ├──GT # Object-level segmentation mask ├──case1_1 ├──image_name_00001.png |... ├──case1_3 |... ├──Edge # Weak label with edge |... ├──Scribble # Weak label with scribble |... ├──Polygon # Weak label with Polygon |... ├──Classification # Category classification annotation ├──classification.txt ├──Detection # Bounding box ├──bbox_annotation.json ├──TestEasyDataset ├──Seen ├──Frame ├──case2_3 |... ├──GT ├──case2_3 |... |... ├──Unseen ├──Frame ├──case3_1 |... ├──GT ├──case3_1 |... |... ├──TestHardDataset ├──Seen ├──Frame ├──case1_2 |... ├──GT ├──case1_2 |... |... ├──Unseen ├──Frame ├──case10_1 |... ├──GT ├──case10_1 |... |... ``` # Dataset Statistics Figure 1 (left) shows the statistic distributions for pathological patterns excluding non-polyp (NP). We find that well-differentiated or low-grade adenoma is dominated but is difficult to locate due to the low-intensity contrast between the lesion and mucosal surface. Figure 1 (right) shows the multi-dependencies among pathological patterns, shape, and location of colon polyp.


Figure 1: (Left) Distribution over pathological patterns. (Right) Multi-dependencies among pathological pattern, shape, and location.

## Positive Part - The positive part of SUN-SEG has 285 video clips (30 fps), which has 49,136 frames. - More details of each polyp video clips refer to [`INFO_POSITIVE_CASES.md`](https://github.com/GewelsJI/VPS/blob/main/docs/INFO_POSITIVE_CASES.md). ## Negative Part - The negative part of SUN-SEG has 728 video clips (30 fps), which has 109,554 frames. - More details of each non-polyp video clips refer to [`INFO_NEGATIVE_CASES.md`](https://github.com/GewelsJI/VPS/blob/main/docs/INFO_NEGATIVE_CASES.md) # Label Description ## Label-I: Category Classification Annotation


Here are seven classes of pathological diagnosis: - Low-grade adenoma (229 videos, 39834 frames) - High-grade adenoma (26 videos, 4111 frames) - Hyperplastic polyp (10 videos, 1644 frames) - Traditional serrated adenoma (9 videos, 1627 frames) - Sessile serrated lesion (8 videos, 1288 frames) - Invasive carcinoma (3 videos, 632 frames) - Non-Polyp (728 videos, 109,554 frames) The annotation is in `./data/DATASET/classification.txt`. In the text file, each row represents an image and its class of pathological diagnosis. Here are an example: image_dir_00001.jpg low_grade_adenoma image_dir_00002.jpg hyperplastic_polyp image_dir_00003.jpg sessile_serrated_lesion ... ## Label-II: Object Mask


In polyp-existing frames, each polyp is annotated with a segmentation mask as shown above. The annotation is in `./data/DATASET/GT/`. Each image's name has a direct correspondence with the annotation file name. For example, the segmentation mask for `image_dir_00001.jpg` is `image_dir_00001.png`. ## Label-III: Bounding Box


We present the bounding box annotation for each polyp-existing frame. In `./data/DATASET/bbox_annotation.json` file, we follow the same format as the COCO dataset. Here is an example of COCO-style annotation: { 'info': { 'year': 2021, 'version': 'v1.0', 'description': 'SUN Colonoscopy Video Database. Hayato et al, 2020.', 'contributor': '', 'url': '', 'date_created': ''}, 'images': [{ 'id': 'case1_1-a2-image0001', 'width': 1158, 'height': 1008, 'case_name': 'case1_1' # case_name means the name of case in the folder. 'file_name': 'case_M_20181001100941_0U62372100109341_1_005_001-1_a2_ayy_image0001'}, # file_name is corresponding to the image name in the folder. ...], 'annotation': [{ 'id': 'case1_1-a2-image0001', 'bbox': [72, 262, 68, 81]}, # Each element represnets the [min_x, min_y, width, height], where min_x and min_y are the upper-left coordinates of the bounding box. ...] } ## Label-IV: Boundary The annotations are stored in `./data/DATASET/Edge/`. Each image's name has a direct correspondence with the annotation file name.


## Label-V: Two Weak Labels (Scribble & Polygon) The annotations are in `./data/DATASET/Scribble/`, and `./data/DATASET/Polygon/`, respectively. Each image's name has a direct correspondence with the annotation file name.


## Label-VI: Attributes Description Next, we provide the complete attributes for our SUN-SEG dataset. - **Pathological Patterns** | ID | Name | Description | | ---- | ---------------------------- |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | LA | Low-grade adenoma | The polyp with low-grade dysplasia often shows nuclear changes, such as palisading and darkening of the nucleus. | | HA | High-grade adenoma | The polyp with high-grade dysplasia, which has more severe cellular and nuclear changes. | | HP | Hyperplastic polyp | The polyp has small vessels or sparse networks, with unrecognizable patterns and is lighter than or similar to the surroundings. | | TSA | Traditional serrated adenoma | A neoplastic polyp characterised by eosinophilic cells, ectopic crypt formations and slit-like epithelial serrations. | | SSL | Sessile serrated lesion | A neoplastic polyp characterised by serrated architectural features and lack of cytological dysplasia. | | IC | Invasive cancer (T1b) | Its colour is darker than the surroundings, brownish, sometimes with lighter patches. The vessel of areas with interrupted or absent vessels. The surface is amorphous with no surface pattern. | | SI | Surgical Instruments | The endoscopic surgical procedures involve the positioning of instruments, such as snares, forceps, knives and electrodes. | - **Shape** > We follow the Narrow Band Imaging International Colorectal Endoscopic (NICE) classification criteria. It uses staining, vascular patterns, and surface patterns to distinguish between hyperplastic and adenomatous colon polyps. More details refer to [link-1](https://www.endoscopy-campus.com/en/classifications/polyp-classification-nice/) and [link-2](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5369434/)


| ID | Name | Description | | ---- | ---------------------------- |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Ip | Pedunculated | Base is more narrow than the top of the lesion. | | Isp | Subpedunculated | Intermediate and broad-based. Same management as (0-Is) sessile polyps. | | Is | Sessile | Base and top of the lesion have the same diameter. | | IIa | Slightly elevated | Lesion is slightly higher than adjacent mucosa. | - **Location** | ID | Name | Description | | ---- | ---------------------------- |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | C | Cecum | Lesion is located in Cecum. | | A | Ascending colon | Lesion is located in Ascending colon. | | T | Transverse colon | Lesion is located in the Transverse colon. | | D | Descending colon | Lesion is located in Descending colon. | | S | Sigmoid colon | Lesion is located in the Sigmoid colon. | | R | Rectum | Lesion is located in Rectum. | - **Visual Attributes** | ID | Name | Description | | ---- | ---------------------------- |--------------------------------------------------------------------------------------------------------------------------------------------------| | IB | Indefinable Boundaries | The foreground and background areas around the object have a similar colour. | | HO | Heterogeneous Object | Object regions have distinct colours. | | GH | Ghosting | Object has anomaly RGB-colored boundary due to fast-moving or insufficient refresh rate. | | FM | Fast-Motion | The average per-frame object motion, computed as the Euclidean distance of polyp centroids between consecutive frames, is larger than 20 pixels. | | SO | Small-Object | The average ratio between the object size and the image area is smaller than 0.05. | | LO | Large-Object | The average ratio between the object bounding-box area and the image area is larger than $t_{lr}$ = 0.15. | | OCC | Occlusion | Object becomes partially or fully occluded. | | OV | Out-of-view | Object is partially clipped by the image boundaries. | | SV | Scale-Variation | The average area ratio among any pair of bounding boxes enclosing the target object is smaller than $0.5$. | # Rejected Labels To support learning strategies such as multi-rater agreement modeling, we follow the same file organization and relaese the rejected labels from our labeling process. We release 49,136+17,422 labels in total, you can download the annotation file from [OneDrive](https://anu365-my.sharepoint.com/:u:/g/personal/u7248002_anu_edu_au/EQJgEgN7RLZMgOxIEjHOIUMBakeE3BUU6grmCG-J0r0IBQ?e=tk1XDz) / [Baidu Drive](https://pan.baidu.com/s/14v5LB7QrrhFm3JSwmoA2bQ) (Password: inqb, Size: 120MB). The label with `IMAGE_NAME_*.png` naming format is the rejected label and vice versa. There are 33,331 images with 1 label, 14,211 images with 2 labels, 1571 images with 3 labels, and 23 images with 4 labels. # Citations If you have found our work useful, please use the following reference to cite this project: @article{ji2022vps, title={Video Polyp Segmentation: A Deep Learning Perspective}, author={Ji, Ge-Peng and Xiao, Guobao and Chou, Yu-Cheng and Fan, Deng-Ping and Zhao, Kai and Chen, Geng and Fu, Huazhu and Van Gool, Luc}, journal={Machine Intelligence Research}, year={2022} } @inproceedings{ji2021pnsnet, title={Progressively Normalized Self-Attention Network for Video Polyp Segmentation}, author={Ji, Ge-Peng and Chou, Yu-Cheng and Fan, Deng-Ping and Chen, Geng and Jha, Debesh and Fu, Huazhu and Shao, Ling}, booktitle={MICCAI}, pages={142--152}, year={2021} } @article{misawa2021development, title={Development of a computer-aided detection system for colonoscopy and a publicly accessible large colonoscopy video database (with video)}, author={Misawa, Masashi and Kudo, Shin-ei and Mori, Yuichi and Hotta, Kinichi and Ohtsuka, Kazuo and Matsuda, Takahisa and Saito, Shoichi and Kudo, Toyoki and Baba, Toshiyuki and Ishida, Fumio and others}, journal={Gastrointestinal endoscopy}, volume={93}, number={4}, pages={960--967}, year={2021}, publisher={Elsevier} } ## Reference - SUN dataset: http://sundatabase.org - COCO dataset: https://cocodataset.org ================================================ FILE: docs/DATA_PREPARATION.md ================================================ # Dataset Preparation We introduce a high-quality per-frame annotated VPS dataset, named SUN-SEG, which includes 158,690 frames elected from the famous [SUN dataset](http://amed8k.sundatabase.org). Then, we extend the labels with diverse types, i.e., object mask, boundary, scribble, and polygon. If you want to get access to our whole dataset, you should follow the next three steps. # Contents - [Step-1: Request and Download](#step-1--request-and-download) - [Step-2: Unzip SUN dataset](#step-2--unzip-sun-dataset) - [Step-3: Re-organize the file structure](#step-3--re-organize-the-file-structure) # Step-1: Request the raw SUN-database The origin colonoscopy video frames in our SUN-SEG dataset are selected from [SUN dataset](http://amed8k.sundatabase.org), while we could not distribute the video data due to the strict license. So first, you guys need to request the original colonoscopy video frame from them. In this step, you should download the polyp samples of 100 cases and non-polyp samples of 13 cases from the links provided by the SUN dataset. - **Request for video frames from SUN:** Please follow the instruction on [SUN dataset](http://amed8k.sundatabase.org) to request SUN-dataset and download the dataset by yourself. **Please use your educational email to apply for it and claim it without any commercial purpose.** Thank you for your understanding! Let me know (gepengai.ji@gmail.com) if you have any questions. # ⭐ Step-2: Download our annotations Then, you need to download the complete annotation provided in our SUN-SEG, which can be downloaded at the download link: - (July 17, 2024) Updated link: [annotation.v2 - google drive](https://drive.google.com/file/d/1ytmhpg0YaW0XZBAEfSFkMhaI6jnGLSg3/view?usp=sharing) # Step-3: Unzip SUN dataset As for video frames in the SUN dataset, these are two groups of samples, which are divided into multiple compressed file formats as zip files. To decompress each zip file downloaded, please input the password provided by the origin authors of the SUN dataset (i.e., the same as the password that you used for login). - **Unzip positive cases in SUN** - create directory: `mkdir ./data/SUN-Positive/` - unzip positive cases: `unzip -P sun_password -d ./SUN-Positive sundatabase_positive_part\*`, which will take up 11.5 + 9.7 GB of storage space. Please ensure your server has enough space to store them, otherwise, it will fail. Please replace the `sun_password` that you get from SUN's authors. - check if correct: `find ./SUN-Positive -type f -name "*.jpg" | wc -l`, which should output 49,136 in your terminal. - **Unzip negative cases in SUN (Optional)** - create directory: `mkdir ./data/SUN-Negative/` - unzip negative cases: `unzip -P sun_password -d ./SUN-Negative sundatabase_positive_part\*`, which will take up 11.6 + 10.7 + 11.5 + 10.5 GB of storage space. (This data partition is optional if we have no requirements to use them.) - check if correct: `find ./SUN-Negative -type f -name "*.jpg" | wc -l`, which should output 109,554 in your terminal. As for the annotations from our SUN-SGE, you are happy to execute: - Unwarp it via `unzip SUN-SEG-Annotation-v2.zip` - Put it at path `./data/SUN-SEG-Annotation/` After preparing all the files, your file structure will be the same as below: ``` ├──data ├──SUN-Positive ├──case1 ├──image_name.jpg |... ├──case2 |... ├──SUN-SEG-Annotation ├──TrainDataset ├──GT ├──case1_1 ├──image_name.png |... ├──Edge |... ├──Scribble |... ├──Polygon |... ├──Classification ├──classification.txt ├──Detection ├──bbox_annotation.json ├──TestEasyDataset ├──Seen ├──GT |... ├──Unseen ├──GT |... ├──TestHardDataset ├──Seen ├──GT |... ├──Unseen ├──GT |... ``` You will notice that the file structure of images in `SUN-Positive` is different from the one of annotation in `SUN-SEG-Annotation`. To reconcile the file structure, you need to follow next step to finish the data preparation. # Step-4: Re-organize the file structure By running `sh ./utils/reorganize.sh`, the original file structure in SUN-dataset will be reorganised to the same as SUN-SEG for better length balance. Finally, the folder `Frame` which is originated from `SUN-Positive`, and `GT`, as long as other annotations folders, will share the same file structure as shown below: ``` ├──data ├──SUN-SEG ├──TrainDataset ├──Frame # The images from the SUN dataset ├──case1_1 ├──image_name_00001.jpg |... ├──case1_3 |... ├──GT # Object-level segmentation mask ├──case1_1 ├──image_name_00001.png |... ├──case1_3 |... ├──Edge # Weak label with edge |... ├──Scribble # Weak label with scribble |... ├──Polygon # Weak label with Polygon |... ├──Classification # Category classification annotation ├──classification.txt ├──Detection # Bounding box ├──bbox_annotation.json ├──TestEasyDataset ├──Seen ├──Frame ├──case2_3 |... ├──GT ├──case2_3 |... |... ├──Unseen ├──Frame ├──case3_1 |... ├──GT ├──case3_1 |... |... ├──TestHardDataset ├──Seen ├──Frame ├──case1_2 |... ├──GT ├──case1_2 |... |... ├──Unseen ├──Frame ├──case10_1 |... ├──GT ├──case10_1 |... |... ``` ================================================ FILE: docs/INFO_NEGATIVE_CASES.md ================================================ # Information of Negative Cases in our SUN-SEG | Clip-ID | Number of Frames | Duration (seconds) | |----|------------------|-------------------------------| | 1_1 | 150 | 5.00 | | 1_2 | 150 | 5.00 | | 1_3 | 150 | 5.00 | | 1_4 | 150 | 5.00 | | 1_5 | 150 | 5.00 | | 1_6 | 150 | 5.00 | | 1_7 | 150 | 5.00 | | 1_8 | 150 | 5.00 | | 1_9 | 150 | 5.00 | | 1_10 | 150 | 5.00 | | 1_11 | 150 | 5.00 | | 1_12 | 150 | 5.00 | | 1_13 | 150 | 5.00 | | 1_14 | 150 | 5.00 | | 1_15 | 150 | 5.00 | | 1_16 | 150 | 5.00 | | 1_17 | 150 | 5.00 | | 1_18 | 150 | 5.00 | | 1_19 | 150 | 5.00 | | 1_20 | 150 | 5.00 | | 1_21 | 150 | 5.00 | | 1_22 | 150 | 5.00 | | 1_23 | 150 | 5.00 | | 1_24 | 150 | 5.00 | | 1_25 | 150 | 5.00 | | 1_26 | 150 | 5.00 | | 1_27 | 150 | 5.00 | | 1_28 | 150 | 5.00 | | 1_29 | 150 | 5.00 | | 1_30 | 150 | 5.00 | | 1_31 | 150 | 5.00 | | 1_32 | 150 | 5.00 | | 1_33 | 150 | 5.00 | | 1_34 | 150 | 5.00 | | 1_35 | 150 | 5.00 | | 1_36 | 150 | 5.00 | | 1_37 | 150 | 5.00 | | 1_38 | 150 | 5.00 | | 1_39 | 150 | 5.00 | | 1_40 | 150 | 5.00 | | 1_41 | 150 | 5.00 | | 1_42 | 150 | 5.00 | | 1_43 | 150 | 5.00 | | 1_44 | 150 | 5.00 | | 1_45 | 150 | 5.00 | | 1_46 | 150 | 5.00 | | 1_47 | 150 | 5.00 | | 1_48 | 150 | 5.00 | | 1_49 | 150 | 5.00 | | 1_50 | 150 | 5.00 | | 1_51 | 150 | 5.00 | | 1_52 | 150 | 5.00 | | 1_53 | 150 | 5.00 | | 1_54 | 150 | 5.00 | | 1_55 | 150 | 5.00 | | 1_56 | 150 | 5.00 | | 1_57 | 150 | 5.00 | | 1_58 | 150 | 5.00 | | 1_59 | 150 | 5.00 | | 1_60 | 150 | 5.00 | | 1_61 | 150 | 5.00 | | 1_62 | 150 | 5.00 | | 1_63 | 150 | 5.00 | | 1_64 | 150 | 5.00 | | 1_65 | 150 | 5.00 | | 1_66 | 211 | 7.03 | | 2_1 | 150 | 5.00 | | 2_2 | 150 | 5.00 | | 2_3 | 150 | 5.00 | | 2_4 | 150 | 5.00 | | 2_5 | 150 | 5.00 | | 2_6 | 150 | 5.00 | | 2_7 | 150 | 5.00 | | 2_8 | 150 | 5.00 | | 2_9 | 150 | 5.00 | | 2_10 | 150 | 5.00 | | 2_11 | 150 | 5.00 | | 2_12 | 150 | 5.00 | | 2_13 | 150 | 5.00 | | 2_14 | 150 | 5.00 | | 2_15 | 150 | 5.00 | | 2_16 | 150 | 5.00 | | 2_17 | 150 | 5.00 | | 2_18 | 150 | 5.00 | | 2_19 | 150 | 5.00 | | 2_20 | 150 | 5.00 | | 2_21 | 150 | 5.00 | | 2_22 | 150 | 5.00 | | 2_23 | 150 | 5.00 | | 2_24 | 150 | 5.00 | | 2_25 | 150 | 5.00 | | 2_26 | 150 | 5.00 | | 2_27 | 150 | 5.00 | | 2_28 | 150 | 5.00 | | 2_29 | 150 | 5.00 | | 2_30 | 150 | 5.00 | | 2_31 | 150 | 5.00 | | 2_32 | 150 | 5.00 | | 2_33 | 150 | 5.00 | | 2_34 | 150 | 5.00 | | 2_35 | 150 | 5.00 | | 2_36 | 150 | 5.00 | | 2_37 | 150 | 5.00 | | 2_38 | 150 | 5.00 | | 2_39 | 150 | 5.00 | | 2_40 | 150 | 5.00 | | 2_41 | 150 | 5.00 | | 2_42 | 150 | 5.00 | | 2_43 | 150 | 5.00 | | 2_44 | 150 | 5.00 | | 2_45 | 150 | 5.00 | | 2_46 | 150 | 5.00 | | 2_47 | 150 | 5.00 | | 2_48 | 150 | 5.00 | | 2_49 | 150 | 5.00 | | 2_50 | 150 | 5.00 | | 2_51 | 150 | 5.00 | | 2_52 | 150 | 5.00 | | 2_53 | 150 | 5.00 | | 2_54 | 150 | 5.00 | | 2_55 | 150 | 5.00 | | 2_56 | 150 | 5.00 | | 2_57 | 150 | 5.00 | | 2_58 | 150 | 5.00 | | 2_59 | 150 | 5.00 | | 2_60 | 150 | 5.00 | | 2_61 | 150 | 5.00 | | 2_62 | 150 | 5.00 | | 2_63 | 150 | 5.00 | | 2_64 | 150 | 5.00 | | 2_65 | 150 | 5.00 | | 2_66 | 150 | 5.00 | | 2_67 | 173 | 5.77 | | 3_1 | 150 | 5.00 | | 3_2 | 150 | 5.00 | | 3_3 | 150 | 5.00 | | 3_4 | 150 | 5.00 | | 3_5 | 150 | 5.00 | | 3_6 | 150 | 5.00 | | 3_7 | 150 | 5.00 | | 3_8 | 150 | 5.00 | | 3_9 | 150 | 5.00 | | 3_10 | 150 | 5.00 | | 3_11 | 150 | 5.00 | | 3_12 | 150 | 5.00 | | 3_13 | 150 | 5.00 | | 3_14 | 150 | 5.00 | | 3_15 | 150 | 5.00 | | 3_16 | 150 | 5.00 | | 3_17 | 150 | 5.00 | | 3_18 | 150 | 5.00 | | 3_19 | 150 | 5.00 | | 3_20 | 150 | 5.00 | | 3_21 | 150 | 5.00 | | 3_22 | 150 | 5.00 | | 3_23 | 150 | 5.00 | | 3_24 | 150 | 5.00 | | 3_25 | 150 | 5.00 | | 3_26 | 150 | 5.00 | | 3_27 | 150 | 5.00 | | 3_28 | 150 | 5.00 | | 3_29 | 150 | 5.00 | | 3_30 | 150 | 5.00 | | 3_31 | 150 | 5.00 | | 3_32 | 150 | 5.00 | | 3_33 | 150 | 5.00 | | 3_34 | 150 | 5.00 | | 3_35 | 150 | 5.00 | | 3_36 | 150 | 5.00 | | 3_37 | 150 | 5.00 | | 3_38 | 150 | 5.00 | | 3_39 | 150 | 5.00 | | 3_40 | 150 | 5.00 | | 3_41 | 150 | 5.00 | | 3_42 | 150 | 5.00 | | 3_43 | 150 | 5.00 | | 3_44 | 150 | 5.00 | | 3_45 | 150 | 5.00 | | 3_46 | 150 | 5.00 | | 3_47 | 150 | 5.00 | | 3_48 | 102 | 3.40 | | 4_1 | 150 | 5.00 | | 4_2 | 150 | 5.00 | | 4_3 | 150 | 5.00 | | 4_4 | 150 | 5.00 | | 4_5 | 150 | 5.00 | | 4_6 | 150 | 5.00 | | 4_7 | 150 | 5.00 | | 4_8 | 150 | 5.00 | | 4_9 | 150 | 5.00 | | 4_10 | 150 | 5.00 | | 4_11 | 150 | 5.00 | | 4_12 | 150 | 5.00 | | 4_13 | 150 | 5.00 | | 4_14 | 150 | 5.00 | | 4_15 | 150 | 5.00 | | 4_16 | 150 | 5.00 | | 4_17 | 150 | 5.00 | | 4_18 | 150 | 5.00 | | 4_19 | 150 | 5.00 | | 4_20 | 150 | 5.00 | | 4_21 | 150 | 5.00 | | 4_22 | 150 | 5.00 | | 4_23 | 150 | 5.00 | | 4_24 | 150 | 5.00 | | 4_25 | 150 | 5.00 | | 4_26 | 150 | 5.00 | | 4_27 | 150 | 5.00 | | 4_28 | 150 | 5.00 | | 4_29 | 150 | 5.00 | | 4_30 | 150 | 5.00 | | 4_31 | 150 | 5.00 | | 4_32 | 150 | 5.00 | | 4_33 | 150 | 5.00 | | 4_34 | 150 | 5.00 | | 4_35 | 150 | 5.00 | | 4_36 | 150 | 5.00 | | 4_37 | 150 | 5.00 | | 4_38 | 150 | 5.00 | | 4_39 | 150 | 5.00 | | 4_40 | 150 | 5.00 | | 4_41 | 150 | 5.00 | | 4_42 | 150 | 5.00 | | 4_43 | 150 | 5.00 | | 4_44 | 150 | 5.00 | | 4_45 | 150 | 5.00 | | 4_46 | 150 | 5.00 | | 4_47 | 150 | 5.00 | | 4_48 | 150 | 5.00 | | 4_49 | 150 | 5.00 | | 4_50 | 150 | 5.00 | | 4_51 | 150 | 5.00 | | 4_52 | 150 | 5.00 | | 4_53 | 150 | 5.00 | | 4_54 | 150 | 5.00 | | 4_55 | 150 | 5.00 | | 4_56 | 150 | 5.00 | | 4_57 | 150 | 5.00 | | 4_58 | 150 | 5.00 | | 4_59 | 150 | 5.00 | | 4_60 | 150 | 5.00 | | 4_61 | 150 | 5.00 | | 4_62 | 150 | 5.00 | | 4_63 | 150 | 5.00 | | 4_64 | 150 | 5.00 | | 4_65 | 150 | 5.00 | | 4_66 | 150 | 5.00 | | 4_67 | 150 | 5.00 | | 4_68 | 150 | 5.00 | | 4_69 | 150 | 5.00 | | 4_70 | 150 | 5.00 | | 4_71 | 150 | 5.00 | | 4_72 | 150 | 5.00 | | 4_73 | 150 | 5.00 | | 4_74 | 150 | 5.00 | | 4_75 | 150 | 5.00 | | 4_76 | 150 | 5.00 | | 4_77 | 150 | 5.00 | | 4_78 | 150 | 5.00 | | 4_79 | 150 | 5.00 | | 4_80 | 150 | 5.00 | | 4_81 | 150 | 5.00 | | 4_82 | 150 | 5.00 | | 4_83 | 150 | 5.00 | | 4_84 | 150 | 5.00 | | 4_85 | 150 | 5.00 | | 4_86 | 150 | 5.00 | | 4_87 | 150 | 5.00 | | 4_88 | 150 | 5.00 | | 4_89 | 150 | 5.00 | | 4_90 | 150 | 5.00 | | 4_91 | 150 | 5.00 | | 4_92 | 150 | 5.00 | | 4_93 | 150 | 5.00 | | 4_94 | 150 | 5.00 | | 4_95 | 150 | 5.00 | | 4_96 | 150 | 5.00 | | 4_97 | 235 | 7.83 | | 5_1 | 150 | 5.00 | | 5_2 | 150 | 5.00 | | 5_3 | 150 | 5.00 | | 5_4 | 150 | 5.00 | | 5_5 | 150 | 5.00 | | 5_6 | 150 | 5.00 | | 5_7 | 150 | 5.00 | | 5_8 | 150 | 5.00 | | 5_9 | 150 | 5.00 | | 5_10 | 150 | 5.00 | | 5_11 | 150 | 5.00 | | 5_12 | 150 | 5.00 | | 5_13 | 150 | 5.00 | | 5_14 | 150 | 5.00 | | 5_15 | 150 | 5.00 | | 5_16 | 150 | 5.00 | | 5_17 | 150 | 5.00 | | 5_18 | 150 | 5.00 | | 5_19 | 150 | 5.00 | | 5_20 | 150 | 5.00 | | 5_21 | 150 | 5.00 | | 5_22 | 150 | 5.00 | | 5_23 | 150 | 5.00 | | 5_24 | 150 | 5.00 | | 5_25 | 150 | 5.00 | | 5_26 | 150 | 5.00 | | 5_27 | 150 | 5.00 | | 5_28 | 150 | 5.00 | | 5_29 | 150 | 5.00 | | 5_30 | 150 | 5.00 | | 5_31 | 150 | 5.00 | | 5_32 | 150 | 5.00 | | 5_33 | 150 | 5.00 | | 5_34 | 150 | 5.00 | | 5_35 | 150 | 5.00 | | 5_36 | 150 | 5.00 | | 5_37 | 150 | 5.00 | | 5_38 | 150 | 5.00 | | 5_39 | 150 | 5.00 | | 5_40 | 150 | 5.00 | | 5_41 | 150 | 5.00 | | 5_42 | 150 | 5.00 | | 5_43 | 150 | 5.00 | | 5_44 | 150 | 5.00 | | 5_45 | 150 | 5.00 | | 5_46 | 150 | 5.00 | | 5_47 | 150 | 5.00 | | 5_48 | 150 | 5.00 | | 5_49 | 150 | 5.00 | | 5_50 | 150 | 5.00 | | 5_51 | 150 | 5.00 | | 5_52 | 150 | 5.00 | | 5_53 | 116 | 3.87 | | 6_1 | 150 | 5.00 | | 6_2 | 150 | 5.00 | | 6_3 | 150 | 5.00 | | 6_4 | 150 | 5.00 | | 6_5 | 150 | 5.00 | | 6_6 | 150 | 5.00 | | 6_7 | 150 | 5.00 | | 6_8 | 150 | 5.00 | | 6_9 | 150 | 5.00 | | 6_10 | 150 | 5.00 | | 6_11 | 150 | 5.00 | | 6_12 | 150 | 5.00 | | 6_13 | 150 | 5.00 | | 6_14 | 150 | 5.00 | | 6_15 | 150 | 5.00 | | 6_16 | 150 | 5.00 | | 6_17 | 150 | 5.00 | | 6_18 | 150 | 5.00 | | 6_19 | 150 | 5.00 | | 6_20 | 150 | 5.00 | | 6_21 | 150 | 5.00 | | 6_22 | 150 | 5.00 | | 6_23 | 150 | 5.00 | | 6_24 | 150 | 5.00 | | 6_25 | 150 | 5.00 | | 6_26 | 150 | 5.00 | | 6_27 | 150 | 5.00 | | 6_28 | 150 | 5.00 | | 6_29 | 150 | 5.00 | | 6_30 | 150 | 5.00 | | 6_31 | 150 | 5.00 | | 6_32 | 150 | 5.00 | | 6_33 | 150 | 5.00 | | 6_34 | 150 | 5.00 | | 6_35 | 150 | 5.00 | | 6_36 | 150 | 5.00 | | 6_37 | 150 | 5.00 | | 6_38 | 150 | 5.00 | | 6_39 | 150 | 5.00 | | 6_40 | 150 | 5.00 | | 6_41 | 150 | 5.00 | | 6_42 | 150 | 5.00 | | 6_43 | 150 | 5.00 | | 6_44 | 150 | 5.00 | | 6_45 | 150 | 5.00 | | 6_46 | 150 | 5.00 | | 6_47 | 150 | 5.00 | | 6_48 | 150 | 5.00 | | 6_49 | 150 | 5.00 | | 6_50 | 150 | 5.00 | | 6_51 | 150 | 5.00 | | 6_52 | 150 | 5.00 | | 6_53 | 150 | 5.00 | | 6_54 | 150 | 5.00 | | 6_55 | 150 | 5.00 | | 6_56 | 150 | 5.00 | | 6_57 | 150 | 5.00 | | 6_58 | 150 | 5.00 | | 6_59 | 150 | 5.00 | | 6_60 | 150 | 5.00 | | 6_61 | 150 | 5.00 | | 6_62 | 150 | 5.00 | | 6_63 | 150 | 5.00 | | 6_64 | 150 | 5.00 | | 6_65 | 150 | 5.00 | | 6_66 | 150 | 5.00 | | 6_67 | 150 | 5.00 | | 6_68 | 150 | 5.00 | | 6_69 | 150 | 5.00 | | 6_70 | 150 | 5.00 | | 6_71 | 150 | 5.00 | | 6_72 | 150 | 5.00 | | 6_73 | 150 | 5.00 | | 6_74 | 150 | 5.00 | | 6_75 | 150 | 5.00 | | 6_76 | 150 | 5.00 | | 6_77 | 150 | 5.00 | | 6_78 | 150 | 5.00 | | 6_79 | 150 | 5.00 | | 6_80 | 150 | 5.00 | | 6_81 | 150 | 5.00 | | 6_82 | 150 | 5.00 | | 6_83 | 150 | 5.00 | | 6_84 | 150 | 5.00 | | 6_85 | 150 | 5.00 | | 6_86 | 150 | 5.00 | | 6_87 | 150 | 5.00 | | 6_88 | 150 | 5.00 | | 6_89 | 150 | 5.00 | | 6_90 | 150 | 5.00 | | 6_91 | 150 | 5.00 | | 6_92 | 150 | 5.00 | | 6_93 | 150 | 5.00 | | 6_94 | 150 | 5.00 | | 6_95 | 150 | 5.00 | | 6_96 | 150 | 5.00 | | 6_97 | 150 | 5.00 | | 6_98 | 150 | 5.00 | | 6_99 | 150 | 5.00 | | 6_100 | 150 | 5.00 | | 6_101 | 150 | 5.00 | | 6_102 | 150 | 5.00 | | 6_103 | 150 | 5.00 | | 6_104 | 150 | 5.00 | | 6_105 | 150 | 5.00 | | 6_106 | 150 | 5.00 | | 6_107 | 150 | 5.00 | | 6_108 | 150 | 5.00 | | 6_109 | 150 | 5.00 | | 6_110 | 150 | 5.00 | | 6_111 | 150 | 5.00 | | 6_112 | 150 | 5.00 | | 6_113 | 150 | 5.00 | | 6_114 | 96 | 3.20 | | 7_1 | 150 | 5.00 | | 7_2 | 150 | 5.00 | | 7_3 | 150 | 5.00 | | 7_4 | 150 | 5.00 | | 7_5 | 150 | 5.00 | | 7_6 | 150 | 5.00 | | 7_7 | 150 | 5.00 | | 7_8 | 150 | 5.00 | | 7_9 | 150 | 5.00 | | 7_10 | 150 | 5.00 | | 7_11 | 150 | 5.00 | | 7_12 | 150 | 5.00 | | 7_13 | 150 | 5.00 | | 7_14 | 150 | 5.00 | | 7_15 | 150 | 5.00 | | 7_16 | 150 | 5.00 | | 7_17 | 150 | 5.00 | | 7_18 | 150 | 5.00 | | 7_19 | 150 | 5.00 | | 7_20 | 150 | 5.00 | | 7_21 | 150 | 5.00 | | 7_22 | 150 | 5.00 | | 7_23 | 150 | 5.00 | | 7_24 | 150 | 5.00 | | 7_25 | 150 | 5.00 | | 7_26 | 150 | 5.00 | | 7_27 | 150 | 5.00 | | 7_28 | 150 | 5.00 | | 7_29 | 150 | 5.00 | | 7_30 | 150 | 5.00 | | 7_31 | 150 | 5.00 | | 7_32 | 150 | 5.00 | | 7_33 | 150 | 5.00 | | 7_34 | 150 | 5.00 | | 7_35 | 150 | 5.00 | | 7_36 | 150 | 5.00 | | 7_37 | 236 | 7.87 | | 8_1 | 150 | 5.00 | | 8_2 | 150 | 5.00 | | 8_3 | 150 | 5.00 | | 8_4 | 150 | 5.00 | | 8_5 | 150 | 5.00 | | 8_6 | 150 | 5.00 | | 8_7 | 150 | 5.00 | | 8_8 | 150 | 5.00 | | 8_9 | 150 | 5.00 | | 8_10 | 150 | 5.00 | | 8_11 | 150 | 5.00 | | 8_12 | 150 | 5.00 | | 8_13 | 150 | 5.00 | | 8_14 | 150 | 5.00 | | 8_15 | 150 | 5.00 | | 8_16 | 150 | 5.00 | | 8_17 | 168 | 5.60 | | 9_1 | 150 | 5.00 | | 9_2 | 150 | 5.00 | | 9_3 | 150 | 5.00 | | 9_4 | 150 | 5.00 | | 9_5 | 150 | 5.00 | | 9_6 | 150 | 5.00 | | 9_7 | 150 | 5.00 | | 9_8 | 150 | 5.00 | | 9_9 | 150 | 5.00 | | 9_10 | 150 | 5.00 | | 9_11 | 150 | 5.00 | | 9_12 | 150 | 5.00 | | 9_13 | 150 | 5.00 | | 9_14 | 150 | 5.00 | | 9_15 | 150 | 5.00 | | 9_16 | 150 | 5.00 | | 9_17 | 150 | 5.00 | | 9_18 | 150 | 5.00 | | 9_19 | 150 | 5.00 | | 9_20 | 150 | 5.00 | | 9_21 | 150 | 5.00 | | 9_22 | 150 | 5.00 | | 9_23 | 150 | 5.00 | | 9_24 | 150 | 5.00 | | 9_25 | 150 | 5.00 | | 9_26 | 150 | 5.00 | | 9_27 | 150 | 5.00 | | 9_28 | 150 | 5.00 | | 9_29 | 150 | 5.00 | | 9_30 | 150 | 5.00 | | 9_31 | 150 | 5.00 | | 9_32 | 150 | 5.00 | | 9_33 | 150 | 5.00 | | 9_34 | 150 | 5.00 | | 9_35 | 150 | 5.00 | | 9_36 | 150 | 5.00 | | 9_37 | 150 | 5.00 | | 9_38 | 150 | 5.00 | | 9_39 | 150 | 5.00 | | 9_40 | 150 | 5.00 | | 9_41 | 150 | 5.00 | | 9_42 | 150 | 5.00 | | 9_43 | 150 | 5.00 | | 9_44 | 150 | 5.00 | | 9_45 | 150 | 5.00 | | 9_46 | 150 | 5.00 | | 9_47 | 150 | 5.00 | | 9_48 | 150 | 5.00 | | 9_49 | 150 | 5.00 | | 9_50 | 150 | 5.00 | | 9_51 | 150 | 5.00 | | 9_52 | 150 | 5.00 | | 9_53 | 150 | 5.00 | | 9_54 | 150 | 5.00 | | 9_55 | 150 | 5.00 | | 9_56 | 150 | 5.00 | | 9_57 | 150 | 5.00 | | 9_58 | 150 | 5.00 | | 9_59 | 150 | 5.00 | | 9_60 | 150 | 5.00 | | 9_61 | 150 | 5.00 | | 9_62 | 150 | 5.00 | | 9_63 | 222 | 7.40 | | 10_1 | 150 | 5.00 | | 10_2 | 150 | 5.00 | | 10_3 | 150 | 5.00 | | 10_4 | 150 | 5.00 | | 10_5 | 150 | 5.00 | | 10_6 | 150 | 5.00 | | 10_7 | 150 | 5.00 | | 10_8 | 150 | 5.00 | | 10_9 | 150 | 5.00 | | 10_10 | 150 | 5.00 | | 10_11 | 150 | 5.00 | | 10_12 | 150 | 5.00 | | 10_13 | 150 | 5.00 | | 10_14 | 150 | 5.00 | | 10_15 | 150 | 5.00 | | 10_16 | 150 | 5.00 | | 10_17 | 150 | 5.00 | | 10_18 | 150 | 5.00 | | 10_19 | 150 | 5.00 | | 10_20 | 150 | 5.00 | | 10_21 | 150 | 5.00 | | 10_22 | 150 | 5.00 | | 10_23 | 150 | 5.00 | | 10_24 | 150 | 5.00 | | 10_25 | 150 | 5.00 | | 10_26 | 150 | 5.00 | | 10_27 | 150 | 5.00 | | 10_28 | 150 | 5.00 | | 10_29 | 150 | 5.00 | | 10_30 | 150 | 5.00 | | 10_31 | 150 | 5.00 | | 10_32 | 150 | 5.00 | | 10_33 | 150 | 5.00 | | 10_34 | 150 | 5.00 | | 10_35 | 150 | 5.00 | | 10_36 | 150 | 5.00 | | 10_37 | 150 | 5.00 | | 10_38 | 150 | 5.00 | | 10_39 | 150 | 5.00 | | 10_40 | 150 | 5.00 | | 10_41 | 150 | 5.00 | | 10_42 | 150 | 5.00 | | 10_43 | 150 | 5.00 | | 10_44 | 150 | 5.00 | | 10_45 | 150 | 5.00 | | 10_46 | 150 | 5.00 | | 10_47 | 186 | 6.20 | | 11_1 | 150 | 5.00 | | 11_2 | 150 | 5.00 | | 11_3 | 150 | 5.00 | | 11_4 | 150 | 5.00 | | 11_5 | 150 | 5.00 | | 11_6 | 150 | 5.00 | | 11_7 | 150 | 5.00 | | 11_8 | 150 | 5.00 | | 11_9 | 150 | 5.00 | | 11_10 | 150 | 5.00 | | 11_11 | 150 | 5.00 | | 11_12 | 150 | 5.00 | | 11_13 | 150 | 5.00 | | 11_14 | 150 | 5.00 | | 11_15 | 150 | 5.00 | | 11_16 | 150 | 5.00 | | 11_17 | 150 | 5.00 | | 11_18 | 150 | 5.00 | | 11_19 | 150 | 5.00 | | 11_20 | 150 | 5.00 | | 11_21 | 150 | 5.00 | | 11_22 | 150 | 5.00 | | 11_23 | 150 | 5.00 | | 11_24 | 150 | 5.00 | | 11_25 | 150 | 5.00 | | 11_26 | 150 | 5.00 | | 11_27 | 150 | 5.00 | | 11_28 | 150 | 5.00 | | 11_29 | 150 | 5.00 | | 11_30 | 150 | 5.00 | | 11_31 | 150 | 5.00 | | 11_32 | 182 | 6.07 | | 12_1 | 150 | 5.00 | | 12_2 | 150 | 5.00 | | 12_3 | 150 | 5.00 | | 12_4 | 150 | 5.00 | | 12_5 | 150 | 5.00 | | 12_6 | 150 | 5.00 | | 12_7 | 150 | 5.00 | | 12_8 | 150 | 5.00 | | 12_9 | 150 | 5.00 | | 12_10 | 150 | 5.00 | | 12_11 | 150 | 5.00 | | 12_12 | 150 | 5.00 | | 12_13 | 150 | 5.00 | | 12_14 | 150 | 5.00 | | 12_15 | 150 | 5.00 | | 12_16 | 150 | 5.00 | | 12_17 | 150 | 5.00 | | 12_18 | 150 | 5.00 | | 12_19 | 150 | 5.00 | | 12_20 | 150 | 5.00 | | 12_21 | 150 | 5.00 | | 12_22 | 150 | 5.00 | | 12_23 | 150 | 5.00 | | 12_24 | 150 | 5.00 | | 12_25 | 150 | 5.00 | | 12_26 | 150 | 5.00 | | 12_27 | 150 | 5.00 | | 12_28 | 150 | 5.00 | | 12_29 | 150 | 5.00 | | 12_30 | 150 | 5.00 | | 12_31 | 150 | 5.00 | | 12_32 | 150 | 5.00 | | 12_33 | 150 | 5.00 | | 12_34 | 150 | 5.00 | | 12_35 | 150 | 5.00 | | 12_36 | 150 | 5.00 | | 12_37 | 150 | 5.00 | | 12_38 | 150 | 5.00 | | 12_39 | 150 | 5.00 | | 12_40 | 150 | 5.00 | | 12_41 | 150 | 5.00 | | 12_42 | 150 | 5.00 | | 12_43 | 150 | 5.00 | | 12_44 | 150 | 5.00 | | 12_45 | 199 | 6.63 | | 13_1 | 150 | 5.00 | | 13_2 | 150 | 5.00 | | 13_3 | 150 | 5.00 | | 13_4 | 150 | 5.00 | | 13_5 | 150 | 5.00 | | 13_6 | 150 | 5.00 | | 13_7 | 150 | 5.00 | | 13_8 | 150 | 5.00 | | 13_9 | 150 | 5.00 | | 13_10 | 150 | 5.00 | | 13_11 | 150 | 5.00 | | 13_12 | 150 | 5.00 | | 13_13 | 150 | 5.00 | | 13_14 | 150 | 5.00 | | 13_15 | 150 | 5.00 | | 13_16 | 150 | 5.00 | | 13_17 | 150 | 5.00 | | 13_18 | 150 | 5.00 | | 13_19 | 150 | 5.00 | | 13_20 | 150 | 5.00 | | 13_21 | 150 | 5.00 | | 13_22 | 150 | 5.00 | | 13_23 | 150 | 5.00 | | 13_24 | 150 | 5.00 | | 13_25 | 150 | 5.00 | | 13_26 | 150 | 5.00 | | 13_27 | 150 | 5.00 | | 13_28 | 150 | 5.00 | | 13_29 | 150 | 5.00 | | 13_30 | 150 | 5.00 | | 13_31 | 150 | 5.00 | | 13_32 | 150 | 5.00 | | 13_33 | 150 | 5.00 | | 13_34 | 150 | 5.00 | | 13_35 | 150 | 5.00 | | 13_36 | 150 | 5.00 | | 13_37 | 150 | 5.00 | | 13_38 | 150 | 5.00 | | 13_39 | 150 | 5.00 | | 13_40 | 150 | 5.00 | | 13_41 | 150 | 5.00 | | 13_42 | 178 | 5.93 | ================================================ FILE: docs/INFO_POSITIVE_CASES.md ================================================ # Information of Positive Cases in our SUN-SEG ## Abbreviation Illustration - Shape - Ip - Pedunculated - Isp - Semipedunculated - Is - Sessile - IIa - Slightly elevated - Location - C - Cecum - A - Ascending colon - T - Transverse colon - D - Descending colon - S - Sigmoid colon - R - Rectum - Pathological diagnosis - LA - Low-grade adenoma - HA - High-grade adenoma - HP - Hyperplastic polyp - TSA - Traditional serrated adenoma - SSL - Sessile serrated lesion - IC - Invasive cancer (T1b) ## Statistic Table | ID | Frames | Duration (s) | Shape | Size | Location | Pathological diagnosis | |-------|-----|-------|-------------|-------|---|--------------------------------| | 1_1 | 150 | 5.00 | Is | 6mm | C | Low-grade adenoma | | 1_2 | 216 | 7.20 | Is | 6mm | C | Low-grade adenoma | | 1_3 | 161 | 5.37 | Is | 6mm | C | Low-grade adenoma | | 2_1 | 150 | 5.00 | Is | 18mm | R | High-grade adenoma | | 2_2 | 150 | 5.00 | Is | 18mm | R | High-grade adenoma | | 2_3 | 150 | 5.00 | Is | 18mm | R | High-grade adenoma | | 2_4 | 150 | 5.00 | Is | 18mm | R | High-grade adenoma | | 2_5 | 171 | 5.70 | Is | 18mm | R | High-grade adenoma | | 2_6 | 99 | 3.30 | Is | 18mm | R | High-grade adenoma | | 2_7 | 150 | 5.00 | Is | 18mm | R | High-grade adenoma | | 2_8 | 181 | 6.03 | Is | 18mm | R | High-grade adenoma | | 2_9 | 112 | 3.73 | Is | 18mm | R | High-grade adenoma | | 3_1 | 198 | 6.60 | IIa | 3mm | A | Low-grade adenoma | | 3_2 | 94 | 3.13 | IIa | 3mm | A | Low-grade adenoma | | 4 | 80 | 2.67 | Is | 4mm | S | Low-grade adenoma | | 5_1 | 187 | 6.23 | IIa | 3mm | T | Low-grade adenoma | | 5_2 | 150 | 5.00 | IIa | 3mm | T | Low-grade adenoma | | 5_3 | 150 | 5.00 | IIa | 3mm | T | Low-grade adenoma | | 5_4 | 150 | 5.00 | IIa | 3mm | T | Low-grade adenoma | | 5_5 | 118 | 3.93 | IIa | 3mm | T | Low-grade adenoma | | 5_6 | 175 | 5.83 | IIa | 3mm | T | Low-grade adenoma | | 6_1 | 131 | 4.37 | IIa | 3mm | S | Low-grade adenoma | | 6_2 | 150 | 5.00 | IIa | 3mm | S | Low-grade adenoma | | 6_3 | 210 | 7.00 | IIa | 3mm | S | Low-grade adenoma | | 7_1 | 184 | 6.13 | IIa | 6mm | D | Low-grade adenoma | | 7_2 | 131 | 4.37 | IIa | 6mm | D | Low-grade adenoma | | 8_1 | 115 | 3.83 | Isp | 12mm | S | Low-grade adenoma | | 8_2 | 141 | 4.70 | Isp | 12mm | S | Low-grade adenoma | | 9 | 136 | 4.53 | Is | 4mm | S | Low-grade adenoma | | 10_1 | 213 | 7.10 | IIa | 3mm | T | Low-grade adenoma | | 10_2 | 223 | 7.43 | IIa | 3mm | T | Low-grade adenoma | | 11 | 113 | 3.77 | IIa | 5mm | D | Low-grade adenoma | | 12_1 | 184 | 6.13 | Is | 5mm | R | Low-grade adenoma | | 12_2 | 248 | 8.27 | Is | 5mm | R | Low-grade adenoma | | 12_3 | 106 | 3.53 | Is | 5mm | R | Low-grade adenoma | | 13_1 | 98 | 3.27 | Is | 5mm | T | Low-grade adenoma | | 13_2 | 205 | 6.83 | Is | 5mm | T | Low-grade adenoma | | 13_3 | 176 | 5.87 | Is | 5mm | T | Low-grade adenoma | | 14_1 | 150 | 5.00 | IIa | 3mm | S | Low-grade adenoma | | 14_2 | 150 | 5.00 | IIa | 3mm | S | Low-grade adenoma | | 14_3 | 378 | 12.60 | IIa | 3mm | S | Low-grade adenoma | | 14_4 | 145 | 4.83 | IIa | 3mm | S | Low-grade adenoma | | 14_5 | 160 | 5.33 | IIa | 3mm | S | Low-grade adenoma | | 14_6 | 200 | 6.67 | IIa | 3mm | S | Low-grade adenoma | | 15_1 | 235 | 7.83 | Is | 5mm | T | Low-grade adenoma | | 15_2 | 150 | 5.00 | Is | 5mm | T | Low-grade adenoma | | 15_3 | 102 | 3.40 | Is | 5mm | T | Low-grade adenoma | | 16 | 199 | 6.63 | Is | 4mm | T | Low-grade adenoma | | 17_1 | 126 | 4.20 | Is | 4mm | S | Low-grade adenoma | | 17_2 | 178 | 5.93 | Is | 4mm | S | Low-grade adenoma | | 18_1 | 91 | 3.03 | Is | 2mm | S | Hyperplastic polyp | | 18_2 | 152 | 5.07 | Is | 2mm | S | Hyperplastic polyp | | 19 | 96 | 3.20 | IIa | 3mm | T | Low-grade adenoma | | 20_1 | 150 | 5.00 | IIa | 3mm | A | Low-grade adenoma | | 20_2 | 205 | 6.83 | IIa | 3mm | A | Low-grade adenoma | | 20_3 | 148 | 4.93 | IIa | 3mm | A | Low-grade adenoma | | 20_4 | 150 | 5.00 | IIa | 3mm | A | Low-grade adenoma | | 20_5 | 191 | 6.37 | IIa | 3mm | A | Low-grade adenoma | | 20_6 | 150 | 5.00 | IIa | 3mm | A | Low-grade adenoma | | 20_7 | 170 | 5.67 | IIa | 3mm | A | Low-grade adenoma | | 20_8 | 233 | 7.77 | IIa | 3mm | A | Low-grade adenoma | | 20_9 | 150 | 5.00 | IIa | 3mm | A | Low-grade adenoma | | 20_10 | 150 | 5.00 | IIa | 3mm | A | Low-grade adenoma | | 20_11 | 150 | 5.00 | IIa | 3mm | A | Low-grade adenoma | | 20_12 | 156 | 5.20 | IIa | 3mm | A | Low-grade adenoma | | 20_13 | 150 | 5.00 | IIa | 3mm | A | Low-grade adenoma | | 20_14 | 150 | 5.00 | IIa | 3mm | A | Low-grade adenoma | | 20_15 | 150 | 5.00 | IIa | 3mm | A | Low-grade adenoma | | 20_16 | 150 | 5.00 | IIa | 3mm | A | Low-grade adenoma | | 20_17 | 150 | 5.00 | IIa | 3mm | A | Low-grade adenoma | | 20_18 | 150 | 5.00 | IIa | 3mm | A | Low-grade adenoma | | 20_19 | 118 | 3.93 | IIa | 3mm | A | Low-grade adenoma | | 20_20 | 138 | 4.60 | IIa | 3mm | A | Low-grade adenoma | | 21 | 100 | 3.33 | IIa | 3mm | S | Low-grade adenoma | | 22 | 314 | 10.47 | IIa | 2mm | A | Low-grade adenoma | | 23 | 182 | 6.07 | Ip | 12mm | A | Low-grade adenoma | | 24_1 | 150 | 5.00 | Ip | 15mm- | S | Low-grade adenoma | | 24_2 | 150 | 5.00 | Ip | 15mm- | S | Low-grade adenoma | | 24_3 | 152 | 5.07 | Ip | 15mm- | S | Low-grade adenoma | | 24_4 | 150 | 5.00 | Ip | 15mm- | S | Low-grade adenoma | | 24_5 | 150 | 5.00 | Ip | 15mm- | S | Low-grade adenoma | | 24_6 | 221 | 7.37 | Ip | 15mm- | S | Low-grade adenoma | | 25_1 | 202 | 6.73 | Is | 7mm | S | Low-grade adenoma | | 25_2 | 136 | 4.53 | Is | 7mm | S | Low-grade adenoma | | 26_1 | 151 | 5.03 | Is | 5mm | D | Low-grade adenoma | | 26_2 | 219 | 7.30 | Is | 5mm | D | Low-grade adenoma | | 27 | 249 | 8.30 | Is | 5mm | A | Hyperplastic polyp | | 28 | 195 | 6.50 | Is | 2mm | T | Low-grade adenoma | | 29_1 | 214 | 7.13 | Isp | 13mm | S | Low-grade adenoma | | 29_2 | 163 | 5.43 | Isp | 13mm | S | Low-grade adenoma | | 30 | 224 | 7.47 | IIa | 4mm | S | Low-grade adenoma | | 31 | 183 | 6.10 | Ip | 12mm | D | Low-grade adenoma | | 32_1 | 210 | 7.00 | Ip | 15mm- | A | Traditional serrated adenoma | | 32_2 | 100 | 3.33 | Ip | 15mm- | A | Traditional serrated adenoma | | 32_3 | 165 | 5.50 | Ip | 15mm- | A | Traditional serrated adenoma | | 32_4 | 185 | 6.17 | Ip | 15mm- | A | Traditional serrated adenoma | | 32_5 | 150 | 5.00 | Ip | 15mm- | A | Traditional serrated adenoma | | 32_6 | 171 | 5.70 | Ip | 15mm- | A | Traditional serrated adenoma | | 33_1 | 207 | 6.90 | Is | 5mm | S | Low-grade adenoma | | 33_2 | 186 | 6.20 | Is | 5mm | S | Low-grade adenoma | | 33_3 | 201 | 6.70 | Is | 5mm | S | Low-grade adenoma | | 34_1 | 121 | 4.03 | Is | 3mm | A | Low-grade adenoma | | 34_2 | 124 | 4.13 | Is | 3mm | A | Low-grade adenoma | | 35_1 | 185 | 6.17 | Ip | 15mm- | S | High-grade adenoma | | 35_2 | 90 | 3.00 | Ip | 15mm- | S | High-grade adenoma | | 35_3 | 193 | 6.43 | Ip | 15mm- | S | High-grade adenoma | | 35_4 | 223 | 7.43 | Ip | 15mm- | S | High-grade adenoma | | 35_5 | 150 | 5.00 | Ip | 15mm- | S | High-grade adenoma | | 35_6 | 131 | 4.37 | Ip | 15mm- | S | High-grade adenoma | | 35_7 | 124 | 4.13 | Ip | 15mm- | S | High-grade adenoma | | 35_8 | 116 | 3.87 | Ip | 15mm- | S | High-grade adenoma | | 36_1 | 334 | 11.13 | IIa | 7mm | S | Low-grade adenoma | | 36_2 | 150 | 5.00 | IIa | 7mm | S | Low-grade adenoma | | 36_3 | 203 | 6.77 | IIa | 7mm | S | Low-grade adenoma | | 36_4 | 128 | 4.27 | IIa | 7mm | S | Low-grade adenoma | | 37_1 | 187 | 6.23 | Is | 7mm | T | Low-grade adenoma | | 37_2 | 159 | 5.30 | Is | 7mm | T | Low-grade adenoma | | 37_3 | 102 | 3.40 | Is | 7mm | T | Low-grade adenoma | | 38_1 | 255 | 8.50 | Is | 5mm | A | Low-grade adenoma | | 38_2 | 254 | 8.47 | Is | 5mm | A | Low-grade adenoma | | 39_1 | 294 | 9.80 | IIa | 13mm | A | Low-grade adenoma | | 39_2 | 270 | 9.00 | IIa | 13mm | A | Low-grade adenoma | | 39_3 | 149 | 4.97 | IIa | 13mm | A | Low-grade adenoma | | 40 | 159 | 5.30 | IIa | 5mm | T | Low-grade adenoma | | 41 | 108 | 3.60 | IIa | 3mm | R | Low-grade adenoma | | 42 | 268 | 8.93 | Is | 7mm | T | Low-grade adenoma | | 43 | 260 | 8.67 | Isp | 10mm | A | Low-grade adenoma | | 44_1 | 150 | 5.00 | IIa | 5mm | S | Low-grade adenoma | | 44_2 | 150 | 5.00 | IIa | 5mm | S | Low-grade adenoma | | 44_3 | 217 | 7.23 | IIa | 5mm | S | Low-grade adenoma | | 44_4 | 228 | 7.60 | IIa | 5mm | S | Low-grade adenoma | | 45_1 | 118 | 3.93 | Is | 3mm | A | Low-grade adenoma | | 45_2 | 134 | 4.47 | Is | 3mm | A | Low-grade adenoma | | 45_3 | 131 | 4.37 | Is | 3mm | A | Low-grade adenoma | | 46 | 170 | 5.67 | IIa | 2mm | T | Hyperplastic polyp | | 47_1 | 150 | 5.00 | Is | 5mm | T | Low-grade adenoma | | 47_2 | 150 | 5.00 | Is | 5mm | T | Low-grade adenoma | | 47_3 | 150 | 5.00 | Is | 5mm | T | Low-grade adenoma | | 47_4 | 133 | 4.43 | Is | 5mm | T | Low-grade adenoma | | 47_5 | 122 | 4.07 | Is | 5mm | T | Low-grade adenoma | | 48 | 176 | 5.87 | Is | 3mm | T | Low-grade adenoma | | 49 | 181 | 6.03 | IIa | 3mm | T | Low-grade adenoma | | 50_1 | 203 | 6.77 | Ip | 10mm | S | Low-grade adenoma | | 50_2 | 150 | 5.00 | Ip | 10mm | S | Low-grade adenoma | | 50_3 | 150 | 5.00 | Ip | 10mm | S | Low-grade adenoma | | 50_4 | 237 | 7.90 | Ip | 10mm | S | Low-grade adenoma | | 51_1 | 175 | 5.83 | IIa(LST-NG) | 15mm- | C | Low-grade adenoma | | 51_2 | 250 | 8.33 | IIa(LST-NG) | 15mm- | C | Low-grade adenoma | | 51_3 | 210 | 7.00 | IIa(LST-NG) | 15mm- | C | Low-grade adenoma | | 51_4 | 150 | 5.00 | IIa(LST-NG) | 15mm- | C | Low-grade adenoma | | 51_5 | 150 | 5.00 | IIa(LST-NG) | 15mm- | C | Low-grade adenoma | | 51_6 | 150 | 5.00 | IIa(LST-NG) | 15mm- | C | Low-grade adenoma | | 51_7 | 134 | 4.47 | IIa(LST-NG) | 15mm- | C | Low-grade adenoma | | 51_8 | 150 | 5.00 | IIa(LST-NG) | 15mm- | C | Low-grade adenoma | | 51_9 | 150 | 5.00 | IIa(LST-NG) | 15mm- | C | Low-grade adenoma | | 51_10 | 218 | 7.27 | IIa(LST-NG) | 15mm- | C | Low-grade adenoma | | 52 | 207 | 6.90 | IIa | 6mm | S | Low-grade adenoma | | 53 | 245 | 8.17 | Is | 4mm | R | Hyperplastic polyp | | 54_1 | 211 | 7.03 | Is | 4mm | S | Low-grade adenoma | | 54_2 | 134 | 4.47 | Is | 4mm | S | Low-grade adenoma | | 55_1 | 237 | 7.90 | Is | 3mm | A | Low-grade adenoma | | 55_2 | 336 | 11.20 | Is | 3mm | A | Low-grade adenoma | | 55_3 | 127 | 4.23 | Is | 3mm | A | Low-grade adenoma | | 56_1 | 150 | 5.00 | Is | 4mm | S | Hyperplastic polyp | | 56_2 | 98 | 3.27 | Is | 4mm | S | Hyperplastic polyp | | 57 | 326 | 10.87 | Is | 5mm | T | Low-grade adenoma | | 58 | 267 | 8.90 | IIa | 6mm | T | Sessile serrated lesion | | 59_1 | 281 | 9.37 | Isp | 8mm | S | Traditional serrated adenoma | | 59_2 | 150 | 5.00 | Isp | 8mm | S | Traditional serrated adenoma | | 59_3 | 215 | 7.17 | Isp | 8mm | S | Traditional serrated adenoma | | 60 | 146 | 4.87 | IIa | 8mm | T | Low-grade adenoma | | 61_1 | 259 | 8.63 | Isp | 6mm | A | Low-grade adenoma | | 61_2 | 199 | 6.63 | Isp | 6mm | A | Low-grade adenoma | | 61_3 | 221 | 7.37 | Isp | 6mm | A | Low-grade adenoma | | 62_1 | 190 | 6.33 | Is | 7mm | A | Low-grade adenoma | | 62_2 | 161 | 5.37 | Is | 7mm | A | Low-grade adenoma | | 63_1 | 326 | 10.87 | Is | 7mm | R | Invasive cancer (T1b) | | 63_2 | 133 | 4.43 | Is | 7mm | R | Invasive cancer (T1b) | | 63_3 | 173 | 5.77 | Is | 7mm | R | Invasive cancer (T1b) | | 64 | 81 | 2.70 | IIa | 3mm | S | Low-grade adenoma | | 65 | 222 | 7.40 | IIa | 3mm | C | Low-grade adenoma | | 66_1 | 165 | 5.50 | Is | 6mm | S | Low-grade adenoma | | 66_2 | 323 | 10.77 | Is | 6mm | S | Low-grade adenoma | | 66_3 | 150 | 5.00 | Is | 6mm | S | Low-grade adenoma | | 66_4 | 236 | 7.87 | Is | 6mm | S | Low-grade adenoma | | 66_5 | 150 | 5.00 | Is | 6mm | S | Low-grade adenoma | | 66_6 | 134 | 4.47 | Is | 6mm | S | Low-grade adenoma | | 66_7 | 177 | 5.90 | Is | 6mm | S | Low-grade adenoma | | 66_8 | 150 | 5.00 | Is | 6mm | S | Low-grade adenoma | | 66_9 | 200 | 6.67 | Is | 6mm | S | Low-grade adenoma | | 67 | 191 | 6.37 | IIa | 5mm | T | Low-grade adenoma | | 68_1 | 164 | 5.47 | Is | 15mm- | R | High-grade adenoma | | 68_2 | 180 | 6.00 | Is | 15mm- | R | High-grade adenoma | | 68_3 | 163 | 5.43 | Is | 15mm- | R | High-grade adenoma | | 68_4 | 228 | 7.60 | Is | 15mm- | R | High-grade adenoma | | 68_5 | 150 | 5.00 | Is | 15mm- | R | High-grade adenoma | | 68_6 | 204 | 6.80 | Is | 15mm- | R | High-grade adenoma | | 68_7 | 230 | 7.67 | Is | 15mm- | R | High-grade adenoma | | 69 | 130 | 4.33 | IIa | 3mm | D | Low-grade adenoma | | 70 | 264 | 8.80 | Ip | 15mm- | S | Low-grade adenoma | | 71_1 | 128 | 4.27 | Is | 4mm | A | Low-grade adenoma | | 71_2 | 150 | 5.00 | Is | 4mm | A | Low-grade adenoma | | 71_3 | 150 | 5.00 | Is | 4mm | A | Low-grade adenoma | | 71_4 | 144 | 4.80 | Is | 4mm | A | Low-grade adenoma | | 71_5 | 226 | 7.53 | Is | 4mm | A | Low-grade adenoma | | 71_6 | 223 | 7.43 | Is | 4mm | A | Low-grade adenoma | | 72_1 | 221 | 7.37 | Is | 5mm | A | Low-grade adenoma | | 72_2 | 234 | 7.80 | Is | 5mm | A | Low-grade adenoma | | 72_3 | 104 | 3.47 | Is | 5mm | A | Low-grade adenoma | | 72_4 | 215 | 7.17 | Is | 5mm | A | Low-grade adenoma | | 73_1 | 161 | 5.37 | Is | 3mm | C | Low-grade adenoma | | 73_2 | 111 | 3.70 | Is | 3mm | C | Low-grade adenoma | | 73_3 | 164 | 5.47 | Is | 3mm | C | Low-grade adenoma | | 73_4 | 128 | 4.27 | Is | 3mm | C | Low-grade adenoma | | 73_5 | 150 | 5.00 | Is | 3mm | C | Low-grade adenoma | | 73_6 | 169 | 5.63 | Is | 3mm | C | Low-grade adenoma | | 73_7 | 150 | 5.00 | Is | 3mm | C | Low-grade adenoma | | 73_8 | 150 | 5.00 | Is | 3mm | C | Low-grade adenoma | | 73_9 | 102 | 3.40 | Is | 3mm | C | Low-grade adenoma | | 74_1 | 157 | 5.23 | Isp | 5mm | S | Low-grade adenoma | | 74_2 | 119 | 3.97 | Isp | 5mm | S | Low-grade adenoma | | 75_1 | 150 | 5.00 | Is | 3mm | T | Low-grade adenoma | | 75_2 | 193 | 6.43 | Is | 3mm | T | Low-grade adenoma | | 76_1 | 162 | 5.40 | Is | 3mm | C | Low-grade adenoma | | 76_2 | 181 | 6.03 | Is | 3mm | C | Low-grade adenoma | | 77 | 215 | 7.17 | Is | 4mm | A | Low-grade adenoma | | 78_1 | 170 | 5.67 | Isp | 12mm | S | High-grade adenoma | | 78_2 | 97 | 3.23 | Isp | 12mm | S | High-grade adenoma | | 79 | 76 | 2.53 | Is | 4mm | D | Low-grade adenoma | | 80_1 | 160 | 5.33 | Is | 10mm | S | Low-grade adenoma | | 80_2 | 150 | 5.00 | Is | 10mm | S | Low-grade adenoma | | 80_3 | 150 | 5.00 | Is | 10mm | S | Low-grade adenoma | | 80_4 | 150 | 5.00 | Is | 10mm | S | Low-grade adenoma | | 80_5 | 137 | 4.57 | Is | 10mm | S | Low-grade adenoma | | 80_6 | 150 | 5.00 | Is | 10mm | S | Low-grade adenoma | | 80_7 | 150 | 5.00 | Is | 10mm | S | Low-grade adenoma | | 80_8 | 145 | 4.83 | Is | 10mm | S | Low-grade adenoma | | 81_1 | 153 | 5.10 | Is | 6mm | S | Low-grade adenoma | | 81_2 | 274 | 9.13 | Is | 6mm | S | Low-grade adenoma | | 82 | 111 | 3.70 | IIa | 3mm | S | Sessile serrated lesion | | 83_1 | 283 | 9.43 | Isp | 13mm | R | Low-grade adenoma | | 83_2 | 378 | 12.60 | Isp | 13mm | R | Low-grade adenoma | | 83_3 | 134 | 4.47 | Isp | 13mm | R | Low-grade adenoma | | 84 | 218 | 7.27 | Is | 5mm | D | Low-grade adenoma | | 85_1 | 150 | 5.00 | IIa | 8mm | A | Low-grade adenoma | | 85_2 | 90 | 3.00 | IIa | 8mm | A | Low-grade adenoma | | 85_3 | 197 | 6.57 | IIa | 8mm | A | Low-grade adenoma | | 85_4 | 197 | 6.57 | IIa | 8mm | A | Low-grade adenoma | | 85_5 | 239 | 7.97 | IIa | 8mm | A | Low-grade adenoma | | 85_6 | 150 | 5.00 | IIa | 8mm | A | Low-grade adenoma | | 85_7 | 150 | 5.00 | IIa | 8mm | A | Low-grade adenoma | | 85_8 | 220 | 7.33 | IIa | 8mm | A | Low-grade adenoma | | 86_1 | 150 | 5.00 | IIa | 4mm | S | Low-grade adenoma | | 86_2 | 107 | 3.57 | IIa | 4mm | S | Low-grade adenoma | | 87_1 | 250 | 8.33 | Is | 3mm | C | Low-grade adenoma | | 87_2 | 204 | 6.80 | Is | 3mm | C | Low-grade adenoma | | 88_1 | 249 | 8.30 | Is | 4mm | A | Low-grade adenoma | | 89 | 149 | 4.97 | Ip | 5mm | D | Low-grade adenoma | | 90_1 | 150 | 5.00 | Is | 10mm | A | Sessile serrated lesion | | 90_2 | 150 | 5.00 | Is | 10mm | A | Sessile serrated lesion | | 90_3 | 179 | 5.97 | Is | 10mm | A | Sessile serrated lesion | | 91_1 | 150 | 5.00 | IIa | 13mm | A | Low-grade adenoma | | 91_2 | 150 | 5.00 | IIa | 13mm | A | Low-grade adenoma | | 91_3 | 150 | 5.00 | IIa | 13mm | A | Low-grade adenoma | | 91_4 | 109 | 3.63 | IIa | 13mm | A | Low-grade adenoma | | 91_5 | 150 | 5.00 | IIa | 13mm | A | Low-grade adenoma | | 91_6 | 118 | 3.93 | IIa | 13mm | A | Low-grade adenoma | | 91_7 | 234 | 7.80 | IIa | 13mm | A | Low-grade adenoma | | 92_1 | 201 | 6.70 | Is | 7mm | D | Low-grade adenoma | | 92_2 | 190 | 6.33 | Is | 7mm | D | Low-grade adenoma | | 93_1 | 217 | 7.23 | Is | 7mm | D | Low-grade adenoma | | 93_2 | 235 | 7.83 | Is | 7mm | D | Low-grade adenoma | | 94 | 136 | 4.53 | Is | 6mm | S | Low-grade adenoma | | 95_1 | 259 | 8.63 | Isp | 8mm | S | Low-grade adenoma | | 95_2 | 102 | 3.40 | Isp | 8mm | S | Low-grade adenoma | | 95_3 | 150 | 5.00 | Isp | 8mm | S | Low-grade adenoma | | 95_4 | 95 | 3.17 | Isp | 8mm | S | Low-grade adenoma | | 96_1 | 115 | 3.83 | Is | 5mm | S | Hyperplastic polyp | | 96_2 | 186 | 6.20 | Is | 5mm | S | Hyperplastic polyp | | 97_1 | 150 | 5.00 | IIa | 15mm- | C | Sessile serrated lesion | | 97_2 | 154 | 5.13 | IIa | 15mm- | C | Sessile serrated lesion | | 97_3 | 127 | 4.23 | IIa | 15mm- | C | Sessile serrated lesion | | 98 | 170 | 5.67 | IIa | 4mm | T | Low-grade adenoma | | 99 | 161 | 5.37 | Is | 5mm | S | Low-grade adenoma | | 100 | 188 | 6.27 | IIa | 3mm | R | Hyperplastic polyp | ================================================ FILE: docs/RELEASE_NOTES.md ================================================ - We greatly appreciate [@Yuli Zhou](https://github.com/zhoustan) for the feedback and for fixing the sorting bug in our test and evaluation processes. This bug fix has a very minor impact on performance. Below, we showcase the results before and after the fix: | Dataset | Method | Status | Smeasure | meanEm | wFmeasure | maxDice | |------------------------|------------------|----------------|----------|--------|-----------|---------| | TestEasyDataset-Seen | 2022-MIR-PNSPlus | before-fix | 0.917 | 0.924 | 0.848 | 0.888 | | TestEasyDataset-Seen | 2022-MIR-PNSPlus | after-fix | 0.917 | 0.924 | 0.848 | 0.888 | | TestHardDataset-Seen | 2022-MIR-PNSPlus | before-fix | 0.887 | 0.929 | 0.806 | 0.855 | | TestHardDataset-Seen | 2022-MIR-PNSPlus | after-fix | 0.887 | 0.902 | 0.806 | 0.855 | | Dataset | Method | Status | Smeasure | meanEm | wFmeasure | maxDice | meanFm | meanSen | |------------------------|------------------|----------------|----------|--------|-----------|---------|--------|---------| | TestEasyDataset-Unseen | 2022-MIR-PNSPlus | before-fix | 0.806 | 0.798 | 0.676 | 0.756 | 0.730 | 0.630 | | TestEasyDataset-Unseen | 2022-MIR-PNSPlus | after-fix | 0.806 | 0.798 | 0.676 | 0.756 | 0.730 | 0.630 | | TestHardDataset-Unseen | 2022-MIR-PNSPlus | before-fix | 0.797 | 0.793 | 0.653 | 0.737 | 0.709 | 0.623 | | TestHardDataset-Unseen | 2022-MIR-PNSPlus | after-fix | 0.798 | 0.793 | 0.654 | 0.737 | 0.709 | 0.624 | Reminder: To ensure the correctness of results, we strongly recommend running the sorting strategy on a Linux system. ================================================ FILE: eval/README.md ================================================ # VPS Evaluation Toolbox This toolbox is used to evaluate the performance of video polyp segmentation task. # Usage - Prerequisites of environment: ```bash python -m pip install opencv-python tdqm prettytable scikit-learn ``` - Running the evaluation: ```bash sh ./eval.sh ``` By running the script, results of all models on SUN-SEG dataset will be evaluated simultaneously. If you want to evaluate the specific models, please modify the `$MODEL_NAMES`variable in `eval.sh` which is corresponding to the argument `--model_lst`. Note that the modified model name should be the same to the folder name under `./data/Pred/`. In `vps_evaluator.py`, you can specify `--metric_list` to decide the applying metrics. `--txt_name` denotes the folder name of evaluation result. `--data_lst` and `--check_integrity` represent the used dataset and the integrity examination of result maps and ground truth. # Citation If you have found our work useful, please use the following reference to cite this project: @article{ji2022video, title={Video polyp segmentation: A deep learning perspective}, author={Ji, Ge-Peng and Xiao, Guobao and Chou, Yu-Cheng and Fan, Deng-Ping and Zhao, Kai and Chen, Geng and Van Gool, Luc}, journal={Machine Intelligence Research}, volume={19}, number={6}, pages={531--549}, year={2022}, publisher={Springer} } @inproceedings{ji2021progressively, title={Progressively normalized self-attention network for video polyp segmentation}, author={Ji, Ge-Peng and Chou, Yu-Cheng and Fan, Deng-Ping and Chen, Geng and Fu, Huazhu and Jha, Debesh and Shao, Ling}, booktitle={International Conference on Medical Image Computing and Computer-Assisted Intervention}, pages={142--152}, year={2021}, organization={Springer} } @inproceedings{fan2020pranet, title={Pranet: Parallel reverse attention network for polyp segmentation}, author={Fan, Deng-Ping and Ji, Ge-Peng and Zhou, Tao and Chen, Geng and Fu, Huazhu and Shen, Jianbing and Shao, Ling}, booktitle={International conference on medical image computing and computer-assisted intervention}, pages={263--273}, year={2020}, organization={Springer} } ================================================ FILE: eval/eval-result/2015-MICCAI-UNet/TestEasyDataset_eval.txt ================================================ +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2015-MICCAI-UNet | 0.720 | 0.810 | 0.543 | 0.625 | 0.606 | 0.508 | +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2015-MICCAI-UNet/TestHardDataset_eval.txt ================================================ +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2015-MICCAI-UNet | 0.710 | 0.801 | 0.524 | 0.619 | 0.602 | 0.493 | +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2018-TMI-UNet++/TestEasyDataset_eval.txt ================================================ +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2018-TMI-UNet++ | 0.731 | 0.803 | 0.560 | 0.633 | 0.620 | 0.524 | +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2018-TMI-UNet++/TestHardDataset_eval.txt ================================================ +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2018-TMI-UNet++ | 0.714 | 0.795 | 0.526 | 0.605 | 0.595 | 0.488 | +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2019-TPAMI-COSNet/TestEasyDataset_eval.txt ================================================ +-----------------+-------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+-------------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2019-TPAMI-COSNet | 0.707 | 0.851 | 0.513 | 0.667 | 0.649 | 0.548 | +-----------------+-------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2019-TPAMI-COSNet/TestHardDataset_eval.txt ================================================ +-----------------+-------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+-------------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2019-TPAMI-COSNet | 0.706 | 0.845 | 0.501 | 0.651 | 0.639 | 0.531 | +-----------------+-------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2020-AAAI-PCSA/TestEasyDataset_eval.txt ================================================ +-----------------+----------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+----------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2020-AAAI-PCSA | 0.728 | 0.845 | 0.515 | 0.649 | 0.631 | 0.518 | +-----------------+----------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2020-AAAI-PCSA/TestHardDataset_eval.txt ================================================ +-----------------+----------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+----------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2020-AAAI-PCSA | 0.711 | 0.823 | 0.481 | 0.624 | 0.609 | 0.489 | +-----------------+----------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2020-MICCAI-23DCNN/TestEasyDataset_eval.txt ================================================ +-----------------+--------------------+----------+-------+-----------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxDice | maxIoU | +-----------------+--------------------+----------+-------+-----------+---------+--------+ | TestEasyDataset | 2020-MICCAI-23DCNN | 0.817 | 0.872 | 0.698 | 0.755 | 0.668 | +-----------------+--------------------+----------+-------+-----------+---------+--------+ ================================================ FILE: eval/eval-result/2020-MICCAI-23DCNN/TestHardDataset_eval.txt ================================================ +-----------------+--------------------+----------+-------+-----------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxDice | maxIoU | +-----------------+--------------------+----------+-------+-----------+---------+--------+ | TestHardDataset | 2020-MICCAI-23DCNN | 0.806 | 0.863 | 0.671 | 0.737 | 0.643 | +-----------------+--------------------+----------+-------+-----------+---------+--------+ ================================================ FILE: eval/eval-result/2020-MICCAI-ACSNet/TestEasyDataset_eval.txt ================================================ +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2020-MICCAI-ACSNet | 0.820 | 0.889 | 0.702 | 0.773 | 0.760 | 0.682 | +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2020-MICCAI-ACSNet/TestHardDataset_eval.txt ================================================ +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2020-MICCAI-ACSNet | 0.818 | 0.885 | 0.696 | 0.769 | 0.758 | 0.673 | +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2020-MICCAI-PraNet/TestEasyDataset_eval.txt ================================================ +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2020-MICCAI-PraNet | 0.780 | 0.877 | 0.649 | 0.710 | 0.689 | 0.608 | +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2020-MICCAI-PraNet/TestHardDataset_eval.txt ================================================ +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2020-MICCAI-PraNet | 0.759 | 0.846 | 0.615 | 0.683 | 0.660 | 0.569 | +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2020-TIP-MATNet/TestEasyDataset_eval.txt ================================================ +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2020-TIP-MATNet | 0.800 | 0.889 | 0.618 | 0.757 | 0.739 | 0.632 | +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2020-TIP-MATNet/TestHardDataset_eval.txt ================================================ +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2020-TIP-MATNet | 0.802 | 0.881 | 0.601 | 0.745 | 0.728 | 0.616 | +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2021-ICCV-DCFNet/TestEasyDataset_eval.txt ================================================ +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2021-ICCV-DCFNet | 0.536 | 0.560 | 0.294 | 0.353 | 0.344 | 0.274 | +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2021-ICCV-DCFNet/TestHardDataset_eval.txt ================================================ +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2021-ICCV-DCFNet | 0.542 | 0.575 | 0.301 | 0.363 | 0.357 | 0.284 | +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2021-ICCV-FSNet/TestEasyDataset_eval.txt ================================================ +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2021-ICCV-FSNet | 0.771 | 0.842 | 0.625 | 0.769 | 0.747 | 0.658 | +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2021-ICCV-FSNet/TestHardDataset_eval.txt ================================================ +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2021-ICCV-FSNet | 0.763 | 0.849 | 0.608 | 0.752 | 0.738 | 0.641 | +-----------------+-----------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2021-MICCAI-PNSNet/TestEasyDataset_eval.txt ================================================ +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2021-MICCAI-PNSNet | 0.805 | 0.843 | 0.677 | 0.738 | 0.724 | 0.649 | +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2021-MICCAI-PNSNet/TestHardDataset_eval.txt ================================================ +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2021-MICCAI-PNSNet | 0.799 | 0.849 | 0.665 | 0.728 | 0.719 | 0.637 | +-----------------+--------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2021-MICCAI-SANet/TestEasyDataset_eval.txt ================================================ +-----------------+-------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+-------------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2021-MICCAI-SANet | 0.755 | 0.870 | 0.621 | 0.707 | 0.693 | 0.595 | +-----------------+-------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2021-MICCAI-SANet/TestHardDataset_eval.txt ================================================ +-----------------+-------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+-------------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2021-MICCAI-SANet | 0.736 | 0.849 | 0.577 | 0.647 | 0.640 | 0.543 | +-----------------+-------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2021-NIPS-AMD/TestEasyDataset_eval.txt ================================================ +-----------------+---------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+---------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2021-NIPS-AMD | 0.473 | 0.602 | 0.127 | 0.219 | 0.260 | 0.165 | +-----------------+---------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2021-NIPS-AMD/TestHardDataset_eval.txt ================================================ +-----------------+---------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+---------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2021-NIPS-AMD | 0.474 | 0.585 | 0.124 | 0.209 | 0.244 | 0.155 | +-----------------+---------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2022-TMI-PNSPlus/TestEasyDataset_eval.txt ================================================ +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ | TestEasyDataset | 2022-MIR-PNSPlus | 0.837 | 0.910 | 0.723 | 0.803 | 0.787 | 0.704 | +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval-result/2022-TMI-PNSPlus/TestHardDataset_eval.txt ================================================ +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ | Dataset | Method | Smeasure | maxEm | wFmeasure | maxFm | maxDice | maxIoU | +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ | TestHardDataset | 2022-MIR-PNSPlus | 0.826 | 0.891 | 0.701 | 0.785 | 0.770 | 0.679 | +-----------------+------------------+----------+-------+-----------+-------+---------+--------+ ================================================ FILE: eval/eval.sh ================================================ # The candidate competitors are listed here: # MODEL_NAMES=('2022-MIR-PNSPlus' '2021-NIPS-AMD' '2021-MICCAI-PNSNet' '2021-ICCV-FSNet' '2021-ICCV-DCFNet' '2020-TIP-MATNet' '2020-MICCAI-23DCNN' '2020-AAAI-PCSA' '2019-TPAMI-COSNet' '2021-TPAMI-SINetV2' '2021-MICCAI-SANet' '2020-MICCAI-PraNet' '2020-MICCAI-ACSNet' '2018-TMI-UNet++' '2015-MICCAI-UNet') MODEL_NAMES=('2022-MIR-PNSPlus') for MODEL_NAME in ${MODEL_NAMES[*]} do nohup python -u vps_evaluator.py --data_lst TestEasyDataset/Unseen --model_lst $MODEL_NAME --txt_name $MODEL_NAME >> ./loggings/$MODEL_NAME-1.log & nohup python -u vps_evaluator.py --data_lst TestEasyDataset/Seen --model_lst $MODEL_NAME --txt_name $MODEL_NAME >> ./loggings/$MODEL_NAME-2.log & nohup python -u vps_evaluator.py --data_lst TestHardDataset/Unseen --model_lst $MODEL_NAME --txt_name $MODEL_NAME >> ./loggings/$MODEL_NAME-2.log & nohup python -u vps_evaluator.py --data_lst TestHardDataset/Seen --model_lst $MODEL_NAME --txt_name $MODEL_NAME >> ./loggings/$MODEL_NAME-2.log & done ================================================ FILE: eval/metrics.py ================================================ # -*- coding: utf-8 -*- # @Time : 2021/09/13 # @Author : Johnson-Chou # @Email : johnson111788@gmail.com # @FileName : metrics.py # @Reference: https://github.com/mczhuge/SOCToolbox import numpy as np from scipy.ndimage import convolve, distance_transform_edt as bwdist _EPS = np.spacing(1) _TYPE = np.float64 def _prepare_data(pred: np.ndarray, gt: np.ndarray) -> tuple: gt = gt > 128 pred = pred / 255 if pred.max() != pred.min(): pred = (pred - pred.min()) / (pred.max() - pred.min()) return pred, gt def _get_adaptive_threshold(matrix: np.ndarray, max_value: float = 1) -> float: return min(2 * matrix.mean(), max_value) class Fmeasure(object): def __init__(self, length, beta: float = 0.3): self.beta = beta self.precisions = [] self.recalls = [] self.adaptive_fms = [] self.changeable_fms = [] def step(self, pred: np.ndarray, gt: np.ndarray, idx): pred, gt = _prepare_data(pred, gt) adaptive_fm = self.cal_adaptive_fm(pred=pred, gt=gt) self.adaptive_fms.append(adaptive_fm) precisions, recalls, changeable_fms = self.cal_pr(pred=pred, gt=gt) self.precisions.append(precisions) self.recalls.append(recalls) self.changeable_fms.append(changeable_fms) def cal_adaptive_fm(self, pred: np.ndarray, gt: np.ndarray) -> float: adaptive_threshold = _get_adaptive_threshold(pred, max_value=1) binary_predcition = pred >= adaptive_threshold area_intersection = binary_predcition[gt].sum() if area_intersection == 0: adaptive_fm = 0 else: pre = area_intersection / np.count_nonzero(binary_predcition) rec = area_intersection / np.count_nonzero(gt) # F_beta measure adaptive_fm = (1 + self.beta) * pre * rec / (self.beta * pre + rec) return adaptive_fm def cal_pr(self, pred: np.ndarray, gt: np.ndarray) -> tuple: pred = (pred * 255).astype(np.uint8) bins = np.linspace(0, 256, 257) fg_hist, _ = np.histogram(pred[gt], bins=bins) bg_hist, _ = np.histogram(pred[~gt], bins=bins) fg_w_thrs = np.cumsum(np.flip(fg_hist), axis=0) bg_w_thrs = np.cumsum(np.flip(bg_hist), axis=0) TPs = fg_w_thrs Ps = fg_w_thrs + bg_w_thrs Ps[Ps == 0] = 1 T = max(np.count_nonzero(gt), 1) precisions = TPs / Ps recalls = TPs / T numerator = (1 + self.beta) * precisions * recalls denominator = np.where(numerator == 0, 1, self.beta * precisions + recalls) changeable_fms = numerator / denominator return precisions, recalls, changeable_fms def get_results(self): adaptive_fm = np.mean(np.array(self.adaptive_fms, _TYPE)) # precision = np.mean(np.array(self.precisions, dtype=_TYPE), axis=0) # N, 256 # recall = np.mean(np.array(self.recalls, dtype=_TYPE), axis=0) # N, 256 changeable_fm = np.mean(np.array(self.changeable_fms, dtype=_TYPE), axis=0) # return dict(fm=dict(adp=adaptive_fm, curve=changeable_fm), # pr=dict(p=precision, r=recall)) return dict(adpFm=adaptive_fm, meanFm=changeable_fm, maxFm=changeable_fm) class MAE(object): def __init__(self, length): self.maes = [] def step(self, pred: np.ndarray, gt: np.ndarray, idx): pred, gt = _prepare_data(pred, gt) mae = self.cal_mae(pred, gt) self.maes.append(mae) def cal_mae(self, pred: np.ndarray, gt: np.ndarray) -> float: mae = np.mean(np.abs(pred - gt)) return mae def get_results(self): mae = np.mean(np.array(self.maes, _TYPE)) return dict(MAE=mae) class Smeasure(object): def __init__(self, length, alpha: float = 0.5): self.sms = [] self.alpha = alpha def step(self, pred: np.ndarray, gt: np.ndarray, idx): pred, gt = _prepare_data(pred=pred, gt=gt) sm = self.cal_sm(pred, gt) self.sms.append(sm) def cal_sm(self, pred: np.ndarray, gt: np.ndarray) -> float: y = np.mean(gt) if y == 0: sm = 1 - np.mean(pred) elif y == 1: sm = np.mean(pred) else: sm = self.alpha * self.object(pred, gt) + (1 - self.alpha) * self.region(pred, gt) sm = max(0, sm) return sm def object(self, pred: np.ndarray, gt: np.ndarray) -> float: fg = pred * gt bg = (1 - pred) * (1 - gt) u = np.mean(gt) object_score = u * self.s_object(fg, gt) + (1 - u) * self.s_object(bg, 1 - gt) return object_score def s_object(self, pred: np.ndarray, gt: np.ndarray) -> float: x = np.mean(pred[gt == 1]) sigma_x = np.std(pred[gt == 1], ddof=1) score = 2 * x / (np.power(x, 2) + 1 + sigma_x + _EPS) return score def region(self, pred: np.ndarray, gt: np.ndarray) -> float: x, y = self.centroid(gt) part_info = self.divide_with_xy(pred, gt, x, y) w1, w2, w3, w4 = part_info['weight'] pred1, pred2, pred3, pred4 = part_info['pred'] gt1, gt2, gt3, gt4 = part_info['gt'] score1 = self.ssim(pred1, gt1) score2 = self.ssim(pred2, gt2) score3 = self.ssim(pred3, gt3) score4 = self.ssim(pred4, gt4) return w1 * score1 + w2 * score2 + w3 * score3 + w4 * score4 def centroid(self, matrix: np.ndarray) -> tuple: """ To ensure consistency with the matlab code, one is added to the centroid coordinate, so there is no need to use the redundant addition operation when dividing the region later, because the sequence generated by ``1:X`` in matlab will contain ``X``. :param matrix: a bool data array :return: the centroid coordinate """ h, w = matrix.shape area_object = np.count_nonzero(matrix) if area_object == 0: x = np.round(w / 2) y = np.round(h / 2) else: # More details can be found at: https://www.yuque.com/lart/blog/gpbigm y, x = np.argwhere(matrix).mean(axis=0).round() return int(x) + 1, int(y) + 1 def divide_with_xy(self, pred: np.ndarray, gt: np.ndarray, x, y) -> dict: h, w = gt.shape area = h * w gt_LT = gt[0:y, 0:x] gt_RT = gt[0:y, x:w] gt_LB = gt[y:h, 0:x] gt_RB = gt[y:h, x:w] pred_LT = pred[0:y, 0:x] pred_RT = pred[0:y, x:w] pred_LB = pred[y:h, 0:x] pred_RB = pred[y:h, x:w] w1 = x * y / area w2 = y * (w - x) / area w3 = (h - y) * x / area w4 = 1 - w1 - w2 - w3 return dict(gt=(gt_LT, gt_RT, gt_LB, gt_RB), pred=(pred_LT, pred_RT, pred_LB, pred_RB), weight=(w1, w2, w3, w4)) def ssim(self, pred: np.ndarray, gt: np.ndarray) -> float: h, w = pred.shape N = h * w x = np.mean(pred) y = np.mean(gt) sigma_x = np.sum((pred - x) ** 2) / (N - 1) sigma_y = np.sum((gt - y) ** 2) / (N - 1) sigma_xy = np.sum((pred - x) * (gt - y)) / (N - 1) alpha = 4 * x * y * sigma_xy beta = (x ** 2 + y ** 2) * (sigma_x + sigma_y) if alpha != 0: score = alpha / (beta + _EPS) elif alpha == 0 and beta == 0: score = 1 else: score = 0 return score def get_results(self): sm = np.mean(np.array(self.sms, dtype=_TYPE)) return dict(Smeasure=sm) class Emeasure(object): def __init__(self, length): self.adaptive_ems = [] self.changeable_ems = [] def step(self, pred: np.ndarray, gt: np.ndarray, idx): pred, gt = _prepare_data(pred=pred, gt=gt) self.gt_fg_numel = np.count_nonzero(gt) self.gt_size = gt.shape[0] * gt.shape[1] changeable_ems = self.cal_changeable_em(pred, gt) self.changeable_ems.append(changeable_ems) adaptive_em = self.cal_adaptive_em(pred, gt) self.adaptive_ems.append(adaptive_em) def cal_adaptive_em(self, pred: np.ndarray, gt: np.ndarray) -> float: adaptive_threshold = _get_adaptive_threshold(pred, max_value=1) adaptive_em = self.cal_em_with_threshold(pred, gt, threshold=adaptive_threshold) return adaptive_em def cal_changeable_em(self, pred: np.ndarray, gt: np.ndarray) -> np.ndarray: changeable_ems = self.cal_em_with_cumsumhistogram(pred, gt) return changeable_ems def cal_em_with_threshold(self, pred: np.ndarray, gt: np.ndarray, threshold: float) -> float: binarized_pred = pred >= threshold fg_fg_numel = np.count_nonzero(binarized_pred & gt) fg_bg_numel = np.count_nonzero(binarized_pred & ~gt) fg___numel = fg_fg_numel + fg_bg_numel bg___numel = self.gt_size - fg___numel if self.gt_fg_numel == 0: enhanced_matrix_sum = bg___numel elif self.gt_fg_numel == self.gt_size: enhanced_matrix_sum = fg___numel else: parts_numel, combinations = self.generate_parts_numel_combinations( fg_fg_numel=fg_fg_numel, fg_bg_numel=fg_bg_numel, pred_fg_numel=fg___numel, pred_bg_numel=bg___numel, ) results_parts = [] for i, (part_numel, combination) in enumerate(zip(parts_numel, combinations)): align_matrix_value = 2 * (combination[0] * combination[1]) / \ (combination[0] ** 2 + combination[1] ** 2 + _EPS) enhanced_matrix_value = (align_matrix_value + 1) ** 2 / 4 results_parts.append(enhanced_matrix_value * part_numel) enhanced_matrix_sum = sum(results_parts) em = enhanced_matrix_sum / (self.gt_size - 1 + _EPS) return em def cal_em_with_cumsumhistogram(self, pred: np.ndarray, gt: np.ndarray) -> np.ndarray: pred = (pred * 255).astype(np.uint8) bins = np.linspace(0, 256, 257) fg_fg_hist, _ = np.histogram(pred[gt], bins=bins) fg_bg_hist, _ = np.histogram(pred[~gt], bins=bins) fg_fg_numel_w_thrs = np.cumsum(np.flip(fg_fg_hist), axis=0) fg_bg_numel_w_thrs = np.cumsum(np.flip(fg_bg_hist), axis=0) fg___numel_w_thrs = fg_fg_numel_w_thrs + fg_bg_numel_w_thrs bg___numel_w_thrs = self.gt_size - fg___numel_w_thrs if self.gt_fg_numel == 0: enhanced_matrix_sum = bg___numel_w_thrs elif self.gt_fg_numel == self.gt_size: enhanced_matrix_sum = fg___numel_w_thrs else: parts_numel_w_thrs, combinations = self.generate_parts_numel_combinations( fg_fg_numel=fg_fg_numel_w_thrs, fg_bg_numel=fg_bg_numel_w_thrs, pred_fg_numel=fg___numel_w_thrs, pred_bg_numel=bg___numel_w_thrs, ) results_parts = np.empty(shape=(4, 256), dtype=np.float64) for i, (part_numel, combination) in enumerate(zip(parts_numel_w_thrs, combinations)): align_matrix_value = 2 * (combination[0] * combination[1]) / \ (combination[0] ** 2 + combination[1] ** 2 + _EPS) enhanced_matrix_value = (align_matrix_value + 1) ** 2 / 4 results_parts[i] = enhanced_matrix_value * part_numel enhanced_matrix_sum = results_parts.sum(axis=0) em = enhanced_matrix_sum / (self.gt_size - 1 + _EPS) return em def generate_parts_numel_combinations(self, fg_fg_numel, fg_bg_numel, pred_fg_numel, pred_bg_numel): bg_fg_numel = self.gt_fg_numel - fg_fg_numel bg_bg_numel = pred_bg_numel - bg_fg_numel parts_numel = [fg_fg_numel, fg_bg_numel, bg_fg_numel, bg_bg_numel] mean_pred_value = pred_fg_numel / self.gt_size mean_gt_value = self.gt_fg_numel / self.gt_size demeaned_pred_fg_value = 1 - mean_pred_value demeaned_pred_bg_value = 0 - mean_pred_value demeaned_gt_fg_value = 1 - mean_gt_value demeaned_gt_bg_value = 0 - mean_gt_value combinations = [ (demeaned_pred_fg_value, demeaned_gt_fg_value), (demeaned_pred_fg_value, demeaned_gt_bg_value), (demeaned_pred_bg_value, demeaned_gt_fg_value), (demeaned_pred_bg_value, demeaned_gt_bg_value) ] return parts_numel, combinations def get_results(self): adaptive_em = np.mean(np.array(self.adaptive_ems, dtype=_TYPE)) changeable_em = np.mean(np.array(self.changeable_ems, dtype=_TYPE), axis=0) return dict(adpEm=adaptive_em, meanEm=changeable_em, maxEm=changeable_em) class WeightedFmeasure(object): def __init__(self, length, beta: float = 1): self.beta = beta self.weighted_fms = [] def step(self, pred: np.ndarray, gt: np.ndarray, idx): pred, gt = _prepare_data(pred=pred, gt=gt) if np.all(~gt): wfm = 0 else: wfm = self.cal_wfm(pred, gt) self.weighted_fms.append(wfm) def cal_wfm(self, pred: np.ndarray, gt: np.ndarray) -> float: # [Dst,IDXT] = bwdist(dGT); Dst, Idxt = bwdist(gt == 0, return_indices=True) # %Pixel dependency # E = abs(FG-dGT); E = np.abs(pred - gt) Et = np.copy(E) Et[gt == 0] = Et[Idxt[0][gt == 0], Idxt[1][gt == 0]] # K = fspecial('gaussian',7,5); # EA = imfilter(Et,K); K = self.matlab_style_gauss2D((7, 7), sigma=5) EA = convolve(Et, weights=K, mode="constant", cval=0) # MIN_E_EA = E; # MIN_E_EA(GT & EA np.ndarray: """ 2D gaussian mask - should give the same result as MATLAB's fspecial('gaussian',[shape],[sigma]) """ m, n = [(ss - 1) / 2 for ss in shape] y, x = np.ogrid[-m: m + 1, -n: n + 1] h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) h[h < np.finfo(h.dtype).eps * h.max()] = 0 sumh = h.sum() if sumh != 0: h /= sumh return h def get_results(self): weighted_fm = np.mean(np.array(self.weighted_fms, dtype=_TYPE)) return dict(wFmeasure=weighted_fm) class Medical(object): def __init__(self, length): self.Thresholds = np.linspace(1, 0, 256) self.threshold_Sensitivity = np.zeros((length, len(self.Thresholds))) self.threshold_Specificity = np.zeros((length, len(self.Thresholds))) self.threshold_Dice = np.zeros((length, len(self.Thresholds))) self.threshold_IoU = np.zeros((length, len(self.Thresholds))) def Fmeasure_calu(self, pred, gt, threshold): if threshold > 1: threshold = 1 Label3 = np.zeros_like(gt) Label3[pred >= threshold] = 1 NumRec = np.sum(Label3 == 1) NumNoRec = np.sum(Label3 == 0) LabelAnd = (Label3 == 1) & (gt == 1) NumAnd = np.sum(LabelAnd == 1) num_obj = np.sum(gt) num_pred = np.sum(Label3) FN = num_obj - NumAnd FP = NumRec - NumAnd TN = NumNoRec - FN if NumAnd == 0: RecallFtem = 0 Dice = 0 SpecifTem = 0 IoU = 0 else: IoU = NumAnd / (FN + NumRec) RecallFtem = NumAnd / num_obj SpecifTem = TN / (TN + FP) Dice = 2 * NumAnd / (num_obj + num_pred) return RecallFtem, SpecifTem, Dice, IoU def step(self, pred, gt, idx): pred, gt = _prepare_data(pred=pred, gt=gt) threshold_Rec = np.zeros(len(self.Thresholds)) threshold_Iou = np.zeros(len(self.Thresholds)) threshold_Spe = np.zeros(len(self.Thresholds)) threshold_Dic = np.zeros(len(self.Thresholds)) for j, threshold in enumerate(self.Thresholds): threshold_Rec[j], threshold_Spe[j], threshold_Dic[j], \ threshold_Iou[j] = self.Fmeasure_calu(pred, gt, threshold) self.threshold_Sensitivity[idx, :] = threshold_Rec self.threshold_Specificity[idx, :] = threshold_Spe self.threshold_Dice[idx, :] = threshold_Dic self.threshold_IoU[idx, :] = threshold_Iou def get_results(self): column_Sen = np.mean(self.threshold_Sensitivity, axis=0) column_Spe = np.mean(self.threshold_Specificity, axis=0) column_Dic = np.mean(self.threshold_Dice, axis=0) column_IoU = np.mean(self.threshold_IoU, axis=0) return dict(meanSen=column_Sen, meanSpe=column_Spe, meanDice=column_Dic, meanIoU=column_IoU, maxSen=column_Sen, maxSpe=column_Spe, maxDice=column_Dic, maxIoU=column_IoU) ================================================ FILE: eval/vps_evaluator.py ================================================ # -*- coding: utf-8 -*- # @Time : 2022/03/14 # @Author : Johnson-Chou # @Email : johnson111788@gmail.com # @FileName : vps_evaluator.py import glob import os import cv2 import argparse from tqdm import tqdm import prettytable as pt import numpy as np def get_competitors(root): for model_name in os.listdir(root): print('\'{}\''.format(model_name), end=', ') def evaluator(gt_pth_lst, pred_pth_lst, metrics): module_map_name = {"Smeasure": "Smeasure", "wFmeasure": "WeightedFmeasure", "MAE": "MAE", "adpEm": "Emeasure", "meanEm": "Emeasure", "maxEm": "Emeasure", "adpFm": "Fmeasure", "meanFm": "Fmeasure", "maxFm": "Fmeasure", "meanSen": "Medical", "maxSen": "Medical", "meanSpe": "Medical", "maxSpe": "Medical", "meanDice": "Medical", "maxDice": "Medical", "meanIoU": "Medical", "maxIoU": "Medical"} res, metric_module = {}, {} metric_module_list = [module_map_name[metric] for metric in metrics] metric_module_list = list(set(metric_module_list)) # define measures for metric_module_name in metric_module_list: metric_module[metric_module_name] = getattr(__import__("metrics", fromlist=[metric_module_name]), metric_module_name)(length=len(gt_pth_lst)) assert len(gt_pth_lst) == len(pred_pth_lst) # evaluator for idx in tqdm(range(len(gt_pth_lst))): gt_pth = gt_pth_lst[idx] pred_pth = pred_pth_lst[idx] # print(gt_pth, pred_pth) assert os.path.isfile(gt_pth) and os.path.isfile(pred_pth) pred_ary = cv2.imread(pred_pth, cv2.IMREAD_GRAYSCALE) gt_ary = cv2.imread(gt_pth, cv2.IMREAD_GRAYSCALE) # ensure the shape of prediction is matched to gt if not gt_ary.shape == pred_ary.shape: pred_ary = cv2.resize(pred_ary, (gt_ary.shape[1], gt_ary.shape[0])) for module in metric_module.values(): module.step(pred=pred_ary, gt=gt_ary, idx=idx) for metric in metrics: module = metric_module[module_map_name[metric]] res[metric] = module.get_results()[metric] return res def eval_engine_vps(opt, txt_save_path): # evaluation for whole dataset for _data_name in opt.data_lst[0]: print('#' * 20, 'Current Dataset:', _data_name, '#' * 20) filename = os.path.join(txt_save_path, '{}_eval.txt'.format(_data_name.replace('/', '-'))) with open(filename, 'w+') as file_to_write: # initial settings for PrettyTable tb = pt.PrettyTable() names = ["Dataset", "Method"] names.extend(opt.metric_list) tb.field_names = names # iter each method for current dataset for _model_name in opt.model_lst[0]: print('#' * 10, 'Current Method:', _model_name, '#' * 10) gt_src = os.path.join(opt.gt_root, _data_name, 'GT') pred_src = os.path.join(opt.pred_root, _model_name, _data_name) # get the sequence list for current dataset case_list = os.listdir(gt_src) mean_case_score_list, max_case_score_list = [], [] # iter each video frame for current method-dataset for case in case_list: case_gt_name_list = glob.glob(gt_src + '/{}/*.png'.format(case)) try: case_gt_name_list.sort( key=lambda name: (int(name.split('/')[-2]), int(name.split('/')[-1].rstrip('.png'))) ) except: case_gt_name_list.sort( key=lambda name: (int(name.split("/")[-2].split('case')[1].split('_')[0]), 0 if not len(name.split('/')[-2].split('_')) > 1 else int( name.split('/')[-2].split('_')[1]), int(name.split('/')[-1].split('-')[0].split('_')[-1]), int(name.split('/')[-1].split('_a')[1].split('_')[0]), int(name.split('/')[-1].split('_image')[1].split('.png')[ 0]))) # for fair comparison, we remove the first frame and last frame in the video suggested by reference: Shifting More Attention to Video Salient Object Detection # https://github.com/DengPingFan/DAVSOD/blob/master/EvaluateTool/main.m case_gt_name_list = case_gt_name_list[1:-1] case_pred_name_list = [gt.replace(gt_src, pred_src) for gt in case_gt_name_list] result = evaluator( gt_pth_lst=case_gt_name_list, pred_pth_lst=case_pred_name_list, metrics=opt.metric_list ) mean_score_ind, max_score_ind = [], [] mean_score_list, max_score_list = [], [] for i, (name, value) in enumerate(result.items()): if 'max' in name or 'mean' in name: if 'max' in name: max_score_list.append(value) max_score_ind.append(i) else: mean_score_list.append(value) mean_score_ind.append(i) else: mean_score_list.append([value]*256) mean_score_ind.append(i) # calculate all the metrics at frame-level max_case_score_list.append(max_score_list) mean_case_score_list.append(mean_score_list) # calculate all the metrics at sequence-level max_case_score_list = np.mean(np.array(max_case_score_list), axis=0) mean_case_score_list = np.mean(np.array(mean_case_score_list), axis=0) case_score_list = [] for index in range(len(opt.metric_list)): real_max_index = np.where(np.array(max_score_ind) == index) real_mean_index = np.where(np.array(mean_score_ind) == index) if len(real_max_index[0]) > 0: case_score_list.append(max_case_score_list[real_max_index[0]].max().round(3)) else: case_score_list.append(mean_case_score_list[real_mean_index[0]].mean().round(3)) final_score_list = ['{:.3f}'.format(case) for case in case_score_list] tb.add_row([_data_name.replace('/', '-'), _model_name] + list(final_score_list)) print(tb) file_to_write.write(str(tb)) file_to_write.close() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( '--gt_root', type=str, help='custom your ground-truth root', default='../data/SUN-SEG-Annotation/') parser.add_argument( '--pred_root', type=str, help='custom your prediction root', default='../data/Pred/') parser.add_argument( '--metric_list', type=list, help='set the evaluation metrics', default=['Smeasure', 'maxEm', 'wFmeasure', 'maxDice', 'maxIoU'], choices=["Smeasure", "wFmeasure", "MAE", "adpEm", "meanEm", "maxEm", "adpFm", "meanFm", "maxFm", "meanSen", "maxSen", "meanSpe", "maxSpe", "meanDice", "maxDice", "meanIoU", "maxIoU"]) parser.add_argument( '--data_lst', type=str, help='set the dataset what you wanna to test', nargs='+', action='append', choices=['TestEasyDataset/Seen', 'TestHardDataset/Seen', 'TestEasyDataset/Unseen', 'TestHardDataset/Unseen']) parser.add_argument( '--model_lst', type=str, help='candidate competitors', nargs='+', action='append', choices=['2015-MICCAI-UNet', '2018-TMI-UNet++', '2020-MICCAI-ACSNet', '2020-MICCAI-PraNet', '2021-MICCAI-SANet', '2019-TPAMI-COSNet', '2020-AAAI-PCSA', '2020-MICCAI-23DCNN', '2020-TIP-MATNet', '2021-ICCV-DCFNet', '2021-ICCV-FSNet', '2021-MICCAI-PNSNet', '2021-NIPS-AMD', '2022-MIR-PNSPlus']) parser.add_argument( '--txt_name', type=str, help='logging root', default='Benchmark') parser.add_argument( '--check_integrity', type=bool, help='whether to check the file integrity', default=True) opt = parser.parse_args() txt_save_path = './eval-result/{}/'.format(opt.txt_name) os.makedirs(txt_save_path, exist_ok=True) # TODO: check the integrity of each candidates @Johnson-Chou if opt.check_integrity: for _data_name in opt.data_lst[0]: for _model_name in opt.model_lst[0]: gt_pth = os.path.join(opt.gt_root, _data_name, 'GT') pred_pth = os.path.join(opt.pred_root, _model_name, _data_name) if not sorted(os.listdir(gt_pth)) == sorted(os.listdir(pred_pth)): print(len(sorted(os.listdir(gt_pth))), len(sorted(os.listdir(pred_pth)))) print('The {} Dataset of {} Model is not matching to the ground-truth'.format(_data_name, _model_name)) # raise Exception('check done') else: print('>>> Skip check the integrity of each candidates ...') # start eval engine eval_engine_vps(opt, txt_save_path) ================================================ FILE: lib/__init__.py ================================================ ================================================ FILE: lib/dataloader/__init__.py ================================================ ================================================ FILE: lib/dataloader/dataloader.py ================================================ import os import torch from torch.utils.data import Dataset from scripts.config import config from lib.dataloader.preprocess import * class VideoDataset(Dataset): def __init__(self, video_dataset, transform=None, time_interval=1): super(VideoDataset, self).__init__() self.time_clips = config.video_time_clips self.video_train_list = [] video_root = os.path.join(config.dataset_root, video_dataset) img_root = os.path.join(video_root, 'Frame') gt_root = os.path.join(video_root, 'GT') cls_list = os.listdir(img_root) self.video_filelist = {} for cls in cls_list: self.video_filelist[cls] = [] cls_img_path = os.path.join(img_root, cls) cls_label_path = os.path.join(gt_root, cls) tmp_list = os.listdir(cls_img_path) tmp_list.sort(key=lambda name: ( int(name.split('-')[0].split('_')[-1]), int(name.split('_a')[1].split('_')[0]), int(name.split('_image')[1].split('.jpg')[0]))) for filename in tmp_list: self.video_filelist[cls].append(( os.path.join(cls_img_path, filename), os.path.join(cls_label_path, filename.replace(".jpg", ".png")) )) # ensemble for cls in cls_list: li = self.video_filelist[cls] for begin in range(1, len(li) - (self.time_clips - 1) * time_interval - 1): batch_clips = [] batch_clips.append(li[0]) for t in range(self.time_clips): batch_clips.append(li[begin + time_interval * t]) self.video_train_list.append(batch_clips) self.img_label_transform = transform def __getitem__(self, idx): img_label_li = self.video_train_list[idx] IMG = None LABEL = None img_li = [] label_li = [] for idx, (img_path, label_path) in enumerate(img_label_li): img = Image.open(img_path).convert('RGB') label = Image.open(label_path).convert('L') img_li.append(img) label_li.append(label) img_li, label_li = self.img_label_transform(img_li, label_li) for idx, (img, label) in enumerate(zip(img_li, label_li)): if idx == 0: IMG = torch.zeros(len(img_li), *(img.shape)) LABEL = torch.zeros(len(img_li) - 1, *(label.shape)) IMG[idx, :, :, :] = img else: IMG[idx, :, :, :] = img LABEL[idx - 1, :, :, :] = label return IMG, LABEL def __len__(self): return len(self.video_train_list) def get_video_dataset(): statistics = torch.load(config.data_statistics) trsf_main = Compose_imglabel([ Resize_video(config.size[0], config.size[1]), Random_crop_Resize_Video(7), Random_horizontal_flip_video(0.5), toTensor_video(), Normalize_video(statistics["mean"], statistics["std"]) ]) train_loader = VideoDataset(config.dataset, transform=trsf_main, time_interval=1) return train_loader if __name__ == "__main__": statistics = torch.load(config.data_statistics) trsf_main = Compose_imglabel([ Resize_video(config.size[0], config.size[1]), Random_crop_Resize_Video(7), Random_horizontal_flip_video(0.5), toTensor_video(), Normalize_video(statistics["mean"], statistics["std"]) ]) train_loader = VideoDataset(config.dataset, transform=trsf_main, time_interval=1) ================================================ FILE: lib/dataloader/preprocess.py ================================================ import random from PIL import Image from torchvision.transforms import ToTensor as torchtotensor class Compose_imglabel(object): def __init__(self, transforms): self.transforms = transforms def __call__(self, img, label): for t in self.transforms: img, label = t(img, label) return img, label class Random_crop_Resize_Video(object): def _randomCrop(self, img, label, x, y): width, height = img.size region = [x, y, width - x, height - y] img, label = img.crop(region), label.crop(region) img = img.resize((width, height), Image.BILINEAR) label = label.resize((width, height), Image.NEAREST) return img, label def __init__(self, crop_size): self.crop_size = crop_size def __call__(self, imgs, labels): res_img = [] res_label = [] x, y = random.randint(0, self.crop_size), random.randint(0, self.crop_size) for img, label in zip(imgs, labels): img, label = self._randomCrop(img, label, x, y) res_img.append(img) res_label.append(label) return res_img, res_label class Random_horizontal_flip_video(object): def _horizontal_flip(self, img, label): return img.transpose(Image.FLIP_LEFT_RIGHT), label.transpose(Image.FLIP_LEFT_RIGHT) def __init__(self, prob): ''' :param prob: should be (0,1) ''' assert prob >= 0 and prob <= 1, "prob should be [0,1]" self.prob = prob def __call__(self, imgs, labels): ''' flip img and label simultaneously :param img:should be PIL image :param label:should be PIL image :return: ''' if random.random() < self.prob: res_img = [] res_label = [] for img, label in zip(imgs, labels): img, label = self._horizontal_flip(img, label) res_img.append(img) res_label.append(label) return res_img, res_label else: return imgs, labels class Resize_video(object): def __init__(self, height, width): self.height = height self.width = width def __call__(self, imgs, labels): res_img = [] res_label = [] for img, label in zip(imgs, labels): res_img.append(img.resize((self.width, self.height), Image.BILINEAR)) res_label.append(label.resize((self.width, self.height), Image.NEAREST)) return res_img, res_label class Normalize_video(object): def __init__(self, mean, std): self.mean, self.std = mean, std def __call__(self, imgs, labels): res_img = [] for img in imgs: for i in range(3): img[:, :, i] -= float(self.mean[i]) for i in range(3): img[:, :, i] /= float(self.std[i]) res_img.append(img) return res_img, labels class toTensor_video(object): def __init__(self): self.totensor = torchtotensor() def __call__(self, imgs, labels): res_img = [] res_label = [] for img, label in zip(imgs, labels): img, label = self.totensor(img), self.totensor(label).long() res_img.append(img) res_label.append(label) return res_img, res_label ================================================ FILE: lib/module/LightRFB.py ================================================ import torch import torch.nn as nn class h_sigmoid(nn.Module): def __init__(self, inplace=True): super(h_sigmoid, self).__init__() self.relu = nn.ReLU6(inplace=inplace) def forward(self, x): return self.relu(x + 3) / 6 class SELayer(nn.Module): def __init__(self, channel, reduction=4): super(SELayer, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Sequential( nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), nn.Linear(channel // reduction, channel), h_sigmoid() ) def forward(self, x): b, c, _, _ = x.size() y = self.avg_pool(x).view(b, c) y = self.fc(y).view(b, c, 1, 1) return x * y class BasicConv(nn.Module): def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False): super(BasicConv, self).__init__() self.out_channels = out_planes self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None self.relu = nn.PReLU() if relu else None def forward(self, x): x = self.conv(x) if self.bn is not None: x = self.bn(x) if self.relu is not None: x = self.relu(x) return x class LightRFB(nn.Module): def __init__(self, channels_in=1024, channels_mid=128, channels_out=32): super(LightRFB, self).__init__() self.global_se = SELayer(channels_in) self.reduce = nn.Sequential(nn.Conv2d(channels_in, channels_mid, kernel_size=1, bias=False), nn.BatchNorm2d(channels_mid), nn.PReLU(channels_mid)) self.br0 = nn.Sequential( BasicConv(channels_mid, channels_mid, kernel_size=1, bias=False, bn=True, relu=True), BasicConv(channels_mid, channels_mid, kernel_size=3, dilation=1, padding=1, groups=channels_mid, bias=False, relu=False), ) self.br1 = nn.Sequential( BasicConv(channels_mid, channels_mid, kernel_size=3, dilation=1, padding=1, groups=channels_mid, bias=False, bn=True, relu=False), BasicConv(channels_mid, channels_mid, kernel_size=1, dilation=1, bias=False, bn=True, relu=True), BasicConv(channels_mid, channels_mid, kernel_size=3, dilation=3, padding=3, groups=channels_mid, bias=False, relu=False), ) self.br2 = nn.Sequential( BasicConv(channels_mid, channels_mid, kernel_size=5, dilation=1, padding=2, groups=channels_mid, bias=False, bn=True, relu=False), BasicConv(channels_mid, channels_mid, kernel_size=1, dilation=1, bias=False, bn=True, relu=True), BasicConv(channels_mid, channels_mid, kernel_size=3, dilation=5, padding=5, groups=channels_mid, bias=False, relu=False), ) self.br3 = nn.Sequential( BasicConv(channels_mid, channels_mid, kernel_size=7, dilation=1, padding=3, groups=channels_mid, bias=False, bn=True, relu=False), BasicConv(channels_mid, channels_mid, kernel_size=1, dilation=1, bias=False, bn=True, relu=True), BasicConv(channels_mid, channels_mid, kernel_size=3, dilation=7, padding=7, groups=channels_mid, bias=False, relu=False), ) self.point_global = BasicConv(channels_mid * 4 + channels_in, channels_out, kernel_size=1, bias=False, bn=True, relu=True) def forward(self, x): x_reduce = self.reduce(self.global_se(x)) x0 = self.br0(x_reduce) x1 = self.br1(x_reduce) x2 = self.br2(x_reduce) x3 = self.br3(x_reduce) out = self.point_global(torch.cat([x, x0, x1, x2, x3], dim=1)) return out if __name__ == "__main__": m = LightRFB(196, 128, 32) t = torch.zeros(1, 196, 14, 14) print(m(t).shape) ================================================ FILE: lib/module/PNS/PNS_Module/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.0 FATAL_ERROR) project(SA) find_package(Torch REQUIRED) find_package(CUDA REQUIRED) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") if(CUDA_FOUND) # add -Wextra compiler flag for gcc compilations if (UNIX) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -D_GLIBCXX_USE_CXX11_ABI=0") endif (UNIX) # add debugging to CUDA NVCC flags. For NVidia's NSight tools. set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG} "-G") file( GLOB cu *.cu) file( GLOB hdr *.hpp *.h ) SET (CPP_FILES sa_ext.cpp) CUDA_ADD_EXECUTABLE(SA ${CPP_FILES} ${cu} ${hdr}) target_link_libraries(SA "${TORCH_LIBRARIES}") set_property(TARGET SA PROPERTY CXX_STANDARD 11) else(CUDA_FOUND) message("CUDA is not found!") endif() ================================================ FILE: lib/module/PNS/PNS_Module/reference.cpp ================================================ #include //#include #include #include //串行比对 void sa_weight_forward_Ref(const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& weight,int B,int T,int C,int H,int W,int radius,int dilation){ int diameter=2*radius+1; for(int batch=0;batch=0&&dw+w>=0){ for(int c=0;c=0&&h+dh=0&&w+dw=0&&h+dh=0&&w+dw=0&&h+dh=0&&w+dw=0&&dw+w>=0){ for(int c=0;c=0&&h+dh=0&&w+dw #include #include #include #include #define TensorAccessor5D torch::PackedTensorAccessor /* #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 #else static __inline__ __device__ double atomicAdd(double *address, double val) { unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; if (val==0.0) return __longlong_as_double(old); do { assumed = old; old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val +__longlong_as_double(assumed))); } while (assumed != old); return __longlong_as_double(old); } #endif */ template __global__ void sa_weight_forward_kernel( const TensorAccessor5D query, const TensorAccessor5D key, TensorAccessor5D weight,int B,int T,int C,int H,int W,int radius,int dilation){ int w = blockIdx.x * blockDim.x + threadIdx.x;//col int h = blockIdx.y * blockDim.y + threadIdx.y;//row int time = blockIdx.z;//time int diameter=2*radius+1; //query B*T*C*H*W //key B*T*C*H*W //weight B*T*9T*H*W if(w=0&&w+dw=0){ for(int c=0;c __global__ void sa_map_forward_kernel( const TensorAccessor5D weight, const TensorAccessor5D proj, TensorAccessor5D out,int B,int T,int C,int H,int W,int radius,int dilation){ int w = blockIdx.x * blockDim.x + threadIdx.x;//col int h = blockIdx.y * blockDim.y + threadIdx.y;//row int time = blockIdx.z;//time int diameter=2*radius+1; //weight B*T*9T*H*W //proj B*T*C*H*W //out B*T*C*H*W if(w=0&&w+dw=0){ scalar_t weight_temp=weight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w]; scalar_t proj_value=proj[batch][cal_time][c][h+dh][w+dw]; sum+=weight_temp*proj_value; } } } } out[batch][time][c][h][w]=sum; } } } } template __global__ void sa_weight_backward_kernel_query( const TensorAccessor5D dweight, const TensorAccessor5D key, TensorAccessor5D dquery,int B,int T,int C,int H,int W,int radius,int dilation){ int w = blockIdx.x * blockDim.x + threadIdx.x;//col int h = blockIdx.y * blockDim.y + threadIdx.y;//row int time = blockIdx.z;//time int diameter=2*radius+1; //weight B*T*9T*H*W //proj B*T*C*H*W //out B*T*C*H*W if(w=0&&w+dw=0){ scalar_t _dweight=dweight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w]; scalar_t _key=key[batch][cal_time][c][h+dh][w+dw]; sum+=_dweight*_key; } } } } dquery[batch][time][c][h][w]=sum; } } } } template __global__ void sa_weight_backward_kernel_key( const TensorAccessor5D dweight, const TensorAccessor5D query, TensorAccessor5D dkey,int B,int T,int C,int H,int W,int radius,int dilation){ int w = blockIdx.x * blockDim.x + threadIdx.x;//col int h = blockIdx.y * blockDim.y + threadIdx.y;//row int time = blockIdx.z;//time int diameter=2*radius+1; //weight B*T*9T*H*W //proj B*T*C*H*W //out B*T*C*H*W if(w=0&&w+dw=0){ scalar_t _dweight=dweight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w]; scalar_t _query=query[batch][time][c][h][w]; atomicAdd(&dkey[batch][cal_time][c][h+dh][w+dw],_dweight*_query); } } } } } } } } template __global__ void sa_map_backward_kernel_weight( const TensorAccessor5D dout, const TensorAccessor5D proj, TensorAccessor5D dweight,int B,int T,int C,int H,int W,int radius,int dilation){ int w = blockIdx.x * blockDim.x + threadIdx.x;//col int h = blockIdx.y * blockDim.y + threadIdx.y;//row int time = blockIdx.z;//time int diameter=2*radius+1; //weight B*T*9T*H*W //proj B*T*C*H*W //out B*T*C*H*W if(w=0&&w+dw=0){ scalar_t _proj=proj[batch][cal_time][c][h+dh][w+dw]; scalar_t _dout=dout[batch][time][c][h][w]; sum+=_dout*_proj; } } dweight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w]=sum; } } } } } } template __global__ void sa_map_backward_kernel_proj( const TensorAccessor5D dout, const TensorAccessor5D weight, TensorAccessor5D dproj,int B,int T,int C,int H,int W,int radius,int dilation){ int w = blockIdx.x * blockDim.x + threadIdx.x;//col int h = blockIdx.y * blockDim.y + threadIdx.y;//row int time = blockIdx.z;//time int diameter=2*radius+1; //weight B*T*9T*H*W //proj B*T*C*H*W //out B*T*C*H*W if(w=0&&w+dw=0){ scalar_t weight_temp=weight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w]; scalar_t _dout=dout[batch][time][c][h][w]; atomicAdd(&dproj[batch][cal_time][c][h+dh][w+dw],_dout*weight_temp); } } } } } } } } void _sa_weight_forward_cuda(const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& weight,int B,int T,int C,int H,int W,int radius,int dilation){ dim3 threads(16,16); dim3 blocks((W+threads.x-1)/threads.x,(H+threads.y-1)/threads.y,T); AT_DISPATCH_FLOATING_TYPES(weight.scalar_type(), "sa_weight_forward_cuda", ([&] { sa_weight_forward_kernel<<>>( query.packed_accessor(), key.packed_accessor(), weight.packed_accessor(),B,T,C,H,W,radius,dilation); })); } void _sa_map_forward_cuda(const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& out,int B,int T,int C,int H,int W,int radius,int dilation){ dim3 threads(16,16); dim3 blocks((W+threads.x-1)/threads.x,(H+threads.y-1)/threads.y,T); AT_DISPATCH_FLOATING_TYPES(weight.scalar_type(), "sa_map_forward_cuda", ([&] { sa_map_forward_kernel<<>>( weight.packed_accessor(), proj.packed_accessor(), out.packed_accessor(),B,T,C,H,W,radius,dilation); })); } void _sa_weight_backward_cuda(const torch::Tensor& dw,const torch::Tensor& query, const torch::Tensor& key,torch::Tensor& dquery,torch::Tensor& dkey, int B,int T,int C,int H,int W,int radius,int dilation){ dim3 threads(16,16); dim3 blocks((W+threads.x-1)/threads.x,(H+threads.y-1)/threads.y,T); AT_DISPATCH_FLOATING_TYPES(dw.scalar_type(), "sa_weight_backward_cuda", ([&] { const TensorAccessor5D dw_acc=dw.packed_accessor(); const TensorAccessor5D query_acc=query.packed_accessor(); const TensorAccessor5D key_acc=key.packed_accessor(); TensorAccessor5D dquery_acc=dquery.packed_accessor(); TensorAccessor5D dkey_acc=dkey.packed_accessor(); sa_weight_backward_kernel_query<<>>(dw_acc,key_acc,dquery_acc,B,T,C,H,W,radius,dilation); sa_weight_backward_kernel_key<<>>(dw_acc,query_acc,dkey_acc,B,T,C,H,W,radius,dilation); })); } void _sa_map_backward_cuda(const torch::Tensor& dout, const torch::Tensor& weight, const torch::Tensor& proj,torch::Tensor& dweight,torch::Tensor& dproj, int B,int T,int C,int H,int W,int radius,int dilation){ dim3 threads(16,16); dim3 blocks((W+threads.x-1)/threads.x,(H+threads.y-1)/threads.y,T); AT_DISPATCH_FLOATING_TYPES(dout.scalar_type(), "sa_map_backward_cuda", ([&] { const TensorAccessor5D dout_acc=dout.packed_accessor(); const TensorAccessor5D weight_acc=weight.packed_accessor(); const TensorAccessor5D proj_acc=proj.packed_accessor(); TensorAccessor5D dweight_acc=dweight.packed_accessor(); TensorAccessor5D dproj_acc=dproj.packed_accessor(); sa_map_backward_kernel_weight<<>>(dout_acc,proj_acc,dweight_acc,B,T,C,H,W,radius,dilation); sa_map_backward_kernel_proj<<>>(dout_acc,weight_acc,dproj_acc,B,T,C,H,W,radius,dilation); })); } ================================================ FILE: lib/module/PNS/PNS_Module/sa_ext.cpp ================================================ //#include #include #include"utils.h" #include"timer.h" #include"reference.h" void get_sizes(const torch::Tensor& t,int *B,int *T,int *C,int *H,int *W){ *B=t.size(0); *T=t.size(1); *C=t.size(2); *H=t.size(3); *W=t.size(4); } void _sa_weight_forward_cuda(const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& weight,int B,int T,int C,int H,int W,int radius,int dilation); void _sa_map_forward_cuda(const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& out,int B,int T,int C,int H,int W,int radius,int dilation); void _sa_weight_backward_cuda(const torch::Tensor& dw,const torch::Tensor& query, const torch::Tensor& key,torch::Tensor& dquery,torch::Tensor& dkey, int B,int T,int C,int H,int W,int radius,int dilation); void _sa_map_backward_cuda(const torch::Tensor& dout,const torch::Tensor& weight, const torch::Tensor& proj,torch::Tensor& dweight,torch::Tensor& dproj, int B,int T,int C,int H,int W,int radius,int dilaiton); //forward declarations-------python pass information here void sa_weight_forward(const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& weight,int radius,int dilation){ int B,T,C,H,W; get_sizes(query,&B,&T,&C,&H,&W); //GpuTimer timer; //timer.Start(); _sa_weight_forward_cuda(query,key,weight,B,T,C,H,W,radius,dilation); //timer.Stop(); //cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); //printf("Your code ran in: %f msecs.\n", timer.Elapsed()); } void sa_map_forward(const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& out,int radius,int dilation){ int B,T,C,H,W; get_sizes(proj,&B,&T,&C,&H,&W); //GpuTimer timer; //timer.Start(); _sa_map_forward_cuda(weight,proj,out,B,T,C,H,W,radius,dilation); //timer.Stop(); //cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); //printf("Your code ran in: %f msecs.\n", timer.Elapsed()); } void sa_weight_backward(const torch::Tensor& dw,const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& dquery,torch::Tensor& dkey,int radius,int dilation){ int B,T,C,H,W; get_sizes(query,&B,&T,&C,&H,&W); //GpuTimer timer; //timer.Start(); _sa_weight_backward_cuda(dw,query,key,dquery,dkey,B,T,C,H,W,radius,dilation); //timer.Stop(); //cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); //printf("Your code ran in: %f msecs.\n", timer.Elapsed()); } void sa_map_backward(const torch::Tensor& dout,const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& dweight,torch::Tensor& dproj,int radius,int dilation){ int B,T,C,H,W; get_sizes(proj,&B,&T,&C,&H,&W); //GpuTimer timer; //timer.Start(); _sa_map_backward_cuda(dout,weight,proj,dweight,dproj,B,T,C,H,W,radius,dilation); //timer.Stop(); //cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); //printf("Your code ran in: %f msecs.\n", timer.Elapsed()); } void sa_weight_forward_ref(const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& weight,int radius,int dilation){ int B,T,C,H,W; get_sizes(query,&B,&T,&C,&H,&W); sa_weight_forward_Ref(query,key,weight,B,T,C,H,W,radius,dilation); } void sa_weight_backward_ref(const torch::Tensor& dw,const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& dquery,torch::Tensor& dkey,int radius,int dilation){ int B,T,C,H,W; get_sizes(query,&B,&T,&C,&H,&W); sa_weight_backward_query_Ref(dw,query,key,dquery,B,T,C,H,W,radius,dilation); sa_weight_backward_key_Ref(dw,query,key,dkey,B,T,C,H,W,radius,dilation); } void sa_map_forward_ref(const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& out,int radius,int dilation){ int B,T,C,H,W; get_sizes(proj,&B,&T,&C,&H,&W); sa_map_forward_Ref(weight,proj,out,B,T,C,H,W,radius,dilation); } void sa_map_backward_ref(const torch::Tensor& dout,const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& dweight,torch::Tensor& dproj,int radius,int dilation){ int B,T,C,H,W; get_sizes(proj,&B,&T,&C,&H,&W); sa_map_backward_weight_Ref(dout,weight,proj,dweight,B,T,C,H,W,radius,dilation); sa_map_backward_proj_Ref(dout,weight,proj,dproj,B,T,C,H,W,radius,dilation); } PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("weight_forward", &sa_weight_forward, "weight forward (CUDA)"); m.def("weight_backward", &sa_weight_backward, "weight backward (CUDA)"); m.def("map_forward", &sa_map_forward, "map forward (CUDA)"); m.def("map_backward", &sa_map_backward, "map backward (CUDA)"); m.def("weight_forward_ref", &sa_weight_forward_ref, "weight forward ref (CUDA)"); m.def("weight_backward_ref", &sa_weight_backward_ref, "weight backward ref (CUDA)"); m.def("map_forward_ref", &sa_map_forward_ref, "map forward ref (CUDA)"); m.def("map_backward_ref", &sa_map_backward_ref, "map backward ref (CUDA)"); } /* int main() { //torch::Tensor weight=torch::ones({2,5,5*9,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); //torch::Tensor query=torch::ones({2,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); //torch::Tensor key=torch::ones({2,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); //sa_weight_forward(query,key,weight); /* torch::Tensor weight=torch::ones({1,5,5*9,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); torch::Tensor proj=torch::ones({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); torch::Tensor out=torch::zeros({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); sa_map_forward(weight,proj,out); /* torch::Tensor dw=torch::ones({1,5,5*9,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); torch::Tensor query=torch::ones({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); torch::Tensor key=torch::ones({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); torch::Tensor dquery=torch::zeros({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); torch::Tensor dkey=torch::zeros({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); sa_weight_backward(dw,query,key,dquery,dkey); torch::Tensor dout=torch::ones({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); torch::Tensor weight=torch::ones({1,5,5*9,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); torch::Tensor proj=torch::ones({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); torch::Tensor dweight=torch::zeros({1,5,5*9,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); torch::Tensor dproj=torch::zeros({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0)); sa_map_backward(dout,weight,proj,dweight,dproj); std::cout< struct GpuTimer { cudaEvent_t start; cudaEvent_t stop; GpuTimer() { cudaEventCreate(&start); cudaEventCreate(&stop); } ~GpuTimer() { cudaEventDestroy(start); cudaEventDestroy(stop); } void Start() { cudaEventRecord(start, 0); } void Stop() { cudaEventRecord(stop, 0); } float Elapsed() { float elapsed; cudaEventSynchronize(stop); cudaEventElapsedTime(&elapsed, start, stop); return elapsed; } }; #endif /* GPU_TIMER_H__ */ ================================================ FILE: lib/module/PNS/PNS_Module/utils.h ================================================ #ifndef UTILS_H__ #define UTILS_H__ #include #include #include #include #include #include #include #define checkCudaErrors(val) check( (val), #val, __FILE__, __LINE__) template void check(T err, const char* const func, const char* const file, const int line) { if (err != cudaSuccess) { std::cerr << "CUDA error at: " << file << ":" << line << std::endl; std::cerr << cudaGetErrorString(err) << " " << func << std::endl; exit(1); } } template void checkResultsExact(const T* const ref, const T* const gpu, size_t numElem) { //check that the GPU result matches the CPU result for (size_t i = 0; i < numElem; ++i) { if (ref[i] != gpu[i]) { std::cerr << "Difference at pos " << i << std::endl; //the + is magic to convert char to int without messing //with other types std::cerr << "Reference: " << std::setprecision(17) << +ref[i] << "\nGPU : " << +gpu[i] << std::endl; exit(1); } } } template void checkResultsEps(const T* const ref, const T* const gpu, size_t numElem, double eps1, double eps2) { assert(eps1 >= 0 && eps2 >= 0); unsigned long long totalDiff = 0; unsigned numSmallDifferences = 0; for (size_t i = 0; i < numElem; ++i) { //subtract smaller from larger in case of unsigned types T smaller = std::min(ref[i], gpu[i]); T larger = std::max(ref[i], gpu[i]); T diff = larger - smaller; if (diff > 0 && diff <= eps1) { numSmallDifferences++; } else if (diff > eps1) { std::cerr << "Difference at pos " << +i << " exceeds tolerance of " << eps1 << std::endl; std::cerr << "Reference: " << std::setprecision(17) << +ref[i] << "\nGPU : " << +gpu[i] << std::endl; exit(1); } totalDiff += diff * diff; } double percentSmallDifferences = (double)numSmallDifferences / (double)numElem; if (percentSmallDifferences > eps2) { std::cerr << "Total percentage of non-zero pixel difference between the two images exceeds " << 100.0 * eps2 << "%" << std::endl; std::cerr << "Percentage of non-zero pixel differences: " << 100.0 * percentSmallDifferences << "%" << std::endl; exit(1); } } //Uses the autodesk method of image comparison //Note the the tolerance here is in PIXELS not a percentage of input pixels template void checkResultsAutodesk(const T* const ref, const T* const gpu, size_t numElem, double variance, size_t tolerance) { size_t numBadPixels = 0; for (size_t i = 0; i < numElem; ++i) { T smaller = std::min(ref[i], gpu[i]); T larger = std::max(ref[i], gpu[i]); T diff = larger - smaller; if (diff > variance) ++numBadPixels; } if (numBadPixels > tolerance) { std::cerr << "Too many bad pixels in the image." << numBadPixels << "/" << tolerance << std::endl; exit(1); } } #endif ================================================ FILE: lib/module/PNS/setup.py ================================================ from setuptools import setup from torch.utils.cpp_extension import BuildExtension, CUDAExtension from os.path import join project_root = 'PNS_Module' sources = [join(project_root, file) for file in ['sa_ext.cpp', 'sa.cu','reference.cpp']] nvcc_args = [ '-gencode', 'arch=compute_61,code=sm_61', '-gencode', 'arch=compute_70,code=sm_70', '-gencode', 'arch=compute_70,code=compute_70' ] cxx_args = ['-std=c++11'] setup( name='self_cuda', ext_modules=[ CUDAExtension('self_cuda_backend', sources, extra_compile_args={'cxx': cxx_args,'nvcc': nvcc_args}) ], cmdclass={ 'build_ext': BuildExtension }) ================================================ FILE: lib/module/PNSPlusModule.py ================================================ import numpy as np from math import sqrt import torch import torch.nn as nn import torch.nn.functional as F import torch.autograd as autograd from torch.autograd.function import once_differentiable import self_cuda_backend as _ext def _check_contiguous(*args): if not all([mod is None or mod.is_contiguous() for mod in args]): raise ValueError("Non-contiguous input") class Relevance_Measuring(autograd.Function): @staticmethod def forward(ctx, query, key, radius=1, dilation=1): ctx.radius = radius ctx.dilation = dilation b, t, c, h, w = query.shape local_size = 2 * radius + 1 size = (b, t, local_size * local_size * t, h, w) weight = torch.zeros(size, dtype=query.dtype, layout=query.layout, device=query.device) weight.fill_(-np.inf) _ext.weight_forward(query, key, weight, radius, dilation) ctx.save_for_backward(query, key) return weight @staticmethod @once_differentiable def backward(ctx, dw): query, key = ctx.saved_tensors dquery = torch.zeros_like(query) dkey = torch.zeros_like(key) _ext.weight_backward(dw.contiguous(), query, key, dquery, dkey, ctx.radius, ctx.dilation) _check_contiguous(dquery, dkey) return dquery, dkey, None, None class Spatial_Temporal_Aggregation(autograd.Function): @staticmethod def forward(ctx, weight, proj, radius=1, dilation=1): ctx.radius = radius ctx.dilation = dilation out = torch.zeros_like(proj) _ext.map_forward(weight, proj, out, radius, dilation) ctx.save_for_backward(weight, proj) return out @staticmethod @once_differentiable def backward(ctx, dout): weight, proj = ctx.saved_tensors dweight = torch.zeros_like(weight) dproj = torch.zeros_like(proj) _ext.map_backward(dout.contiguous(), weight, proj, dweight, dproj, ctx.radius, ctx.dilation) _check_contiguous(dweight, dproj) return dweight, dproj, None, None relevance_measuring = Relevance_Measuring.apply spatial_temporal_aggregation = Spatial_Temporal_Aggregation.apply class NS_Block(nn.Module): def __init__(self, channels_in=32, n_head=4, d_k=8, d_v=8, radius=[3, 3, 3, 3], dilation=[1, 3, 5, 7]): super(NS_Block, self).__init__() self.channels_in = channels_in self.n_head = n_head self.d_k = d_k self.radius = radius self.dilation = dilation self.query_conv = nn.Conv3d(channels_in, n_head * d_k, 1, bias=False) self.key_conv = nn.Conv3d(channels_in, n_head * d_k, 1, bias=False) self.value_conv = nn.Conv3d(channels_in, n_head * d_v, 1, bias=False) self.output_Linear = nn.Conv3d(n_head * d_v, channels_in, 1, bias=False) # Optimization: self-adapting layer normalization self.bn = nn.LayerNorm([int(self.channels_in/self.n_head), 16, 28]) def forward(self, first, x): dilation, radius = self.dilation, self.radius x_ = x.permute(0, 2, 1, 3, 4).contiguous() first_ = first.permute(0, 2, 1, 3, 4).contiguous() query = self.query_conv(first_).permute(0, 2, 1, 3, 4) query_chunk = query.chunk(self.n_head, 2) key = self.key_conv(x_).permute(0, 2, 1, 3, 4) key_chunk = key.chunk(self.n_head, 2) value = self.value_conv(x_).permute(0, 2, 1, 3, 4) value_chunk = value.chunk(self.n_head, 2) M_T, M_A = [], [] for i in range(self.n_head): query_i = query_chunk[i].contiguous() query_i = self.bn(query_i) key_i = key_chunk[i].contiguous() value_i = value_chunk[i].contiguous() # Optimization: self-adapting scaling factor M_A_i = relevance_measuring(query_i, key_i, radius[i], dilation[i]) / sqrt(self.channels_in/self.n_head) M_A.append(F.softmax(M_A_i, dim=2)) M_T.append(spatial_temporal_aggregation(M_A_i, value_i, radius[i], dilation[i])) M_S, _ = torch.max(torch.cat(M_A, dim=2), dim=2) M_T = torch.cat(M_T, dim=2).permute(0, 2, 1, 3, 4) out_cat = self.output_Linear(M_T) * M_S.unsqueeze(2).permute(0, 2, 1, 3, 4) return out_cat.permute(0, 2, 1, 3, 4) __all__ = ["NS_Block", "relevance_measuring", "spatial_temporal_aggregation"] ================================================ FILE: lib/module/PNSPlusNetwork.py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from lib.module.LightRFB import LightRFB from lib.module.Res2Net_v1b import res2net50_v1b_26w_4s from lib.module.PNSPlusModule import NS_Block class conbine_feature(nn.Module): def __init__(self): super(conbine_feature, self).__init__() self.up2_high = DilatedParallelConvBlockD2(32, 16) self.up2_low = nn.Conv2d(24, 16, 1, stride=1, padding=0, bias=False) self.up2_bn2 = nn.BatchNorm2d(16) self.up2_act = nn.PReLU(16) self.refine = nn.Sequential(nn.Conv2d(16, 16, 3, padding=1, bias=False), nn.BatchNorm2d(16), nn.PReLU()) def forward(self, low_fea, high_fea): high_fea = self.up2_high(high_fea) low_fea = self.up2_bn2(self.up2_low(low_fea)) refine_feature = self.refine(self.up2_act(high_fea + low_fea)) return refine_feature class DilatedParallelConvBlockD2(nn.Module): def __init__(self, nIn, nOut, add=False): super(DilatedParallelConvBlockD2, self).__init__() n = int(np.ceil(nOut / 2.)) n2 = nOut - n self.conv0 = nn.Conv2d(nIn, nOut, 1, stride=1, padding=0, dilation=1, bias=False) self.conv1 = nn.Conv2d(n, n, 3, stride=1, padding=1, dilation=1, bias=False) self.conv2 = nn.Conv2d(n2, n2, 3, stride=1, padding=2, dilation=2, bias=False) self.bn = nn.BatchNorm2d(nOut) self.add = add def forward(self, input): in0 = self.conv0(input) in1, in2 = torch.chunk(in0, 2, dim=1) b1 = self.conv1(in1) b2 = self.conv2(in2) output = torch.cat([b1, b2], dim=1) if self.add: output = input + output output = self.bn(output) return output class PNSNet(nn.Module): def __init__(self): super(PNSNet, self).__init__() self.feature_extractor = res2net50_v1b_26w_4s(pretrained=True) self.High_RFB = LightRFB() self.Low_RFB = LightRFB(channels_in=512, channels_mid=128, channels_out=24) self.squeeze = nn.Sequential(nn.Conv2d(1024, 32, 1), nn.BatchNorm2d(32), nn.ReLU(inplace=True)) self.decoder = conbine_feature() self.SegNIN = nn.Sequential(nn.Dropout2d(0.1), nn.Conv2d(16, 1, kernel_size=1, bias=False)) self.NSB_global = NS_Block(32, radius=[3, 3, 3, 3], dilation=[3, 4, 3, 4]) self.NSB_local = NS_Block(32, radius=[3, 3, 3, 3], dilation=[1, 2, 1, 2]) def forward(self, x): origin_shape = x.shape x = x.view(-1, *origin_shape[2:]) x = self.feature_extractor.conv1(x) x = self.feature_extractor.bn1(x) x = self.feature_extractor.relu(x) x = self.feature_extractor.maxpool(x) x1 = self.feature_extractor.layer1(x) # Extract anchor, low-level, and high-level features. low_feature = self.feature_extractor.layer2(x1) high_feature = self.feature_extractor.layer3(low_feature) # Reduce the channel dimension. high_feature = self.High_RFB(high_feature) low_feature = self.Low_RFB(low_feature) # Reshape into temporal formation. high_feature = high_feature.view(*origin_shape[:2], *high_feature.shape[1:]) low_feature = low_feature.view(*origin_shape[:2], *low_feature.shape[1:]) # Feature Separation. high_feature_global = high_feature[:, 0, ...].unsqueeze(dim=1).repeat(1, 5, 1, 1, 1) high_feature_local = high_feature[:, 1:6, ...] low_feature = low_feature[:, 1:6, ...] # First NS Block. high_feature_1 = self.NSB_global(high_feature_global, high_feature_local) + high_feature_local # Second NS Block. high_feature_2 = self.NSB_local(high_feature_1, high_feature_1) + high_feature_1 # Residual Connection. high_feature = high_feature_2 + high_feature_local # Reshape back into spatial formation. high_feature = high_feature.contiguous().view(-1, *high_feature.shape[2:]) low_feature = low_feature.contiguous().view(-1, *low_feature.shape[2:]) # Resize high-level feature to the same as low-level feature. high_feature = F.interpolate(high_feature, size=(low_feature.shape[-2], low_feature.shape[-1]), mode="bilinear", align_corners=False) # UNet-like decoder. out = self.decoder(low_feature.clone(), high_feature.clone()) out = torch.sigmoid( F.interpolate(self.SegNIN(out), size=(origin_shape[-2], origin_shape[-1]), mode="bilinear", align_corners=False)) return out if __name__ == "__main__": a = torch.randn(1, 6, 3, 256, 448).cuda() mobile = PNSNet().cuda() print(mobile(a).shape) ================================================ FILE: lib/module/Res2Net_v1b.py ================================================ import math import torch import torch.nn as nn import torch.utils.model_zoo as model_zoo __all__ = ['Res2Net', 'res2net50_v1b', 'res2net101_v1b', 'res2net50_v1b_26w_4s'] model_urls = { 'res2net50_v1b_26w_4s': 'https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net50_v1b_26w_4s-3cf99910.pth', 'res2net101_v1b_26w_4s': 'https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net101_v1b_26w_4s-0812c246.pth', } class Bottle2neck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None, baseWidth=26, scale=4, stype='normal'): """ Constructor Args: inplanes: input channel dimensionality planes: output channel dimensionality stride: conv stride. Replaces pooling layer. downsample: None when stride = 1 baseWidth: basic width of conv3x3 scale: number of scale. type: 'normal': normal set. 'stage': first block of a new stage. """ super(Bottle2neck, self).__init__() width = int(math.floor(planes * (baseWidth / 64.0))) self.conv1 = nn.Conv2d(inplanes, width * scale, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(width * scale) if scale == 1: self.nums = 1 else: self.nums = scale - 1 if stype == 'stage': self.pool = nn.AvgPool2d(kernel_size=3, stride=stride, padding=1) convs = [] bns = [] for i in range(self.nums): convs.append(nn.Conv2d(width, width, kernel_size=3, stride=stride, padding=1, bias=False)) bns.append(nn.BatchNorm2d(width)) self.convs = nn.ModuleList(convs) self.bns = nn.ModuleList(bns) self.conv3 = nn.Conv2d(width * scale, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stype = stype self.scale = scale self.width = width def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) spx = torch.split(out, self.width, 1) for i in range(self.nums): if i == 0 or self.stype == 'stage': sp = spx[i] else: sp = sp + spx[i] sp = self.convs[i](sp) sp = self.relu(self.bns[i](sp)) if i == 0: out = sp else: out = torch.cat((out, sp), 1) if self.scale != 1 and self.stype == 'normal': out = torch.cat((out, spx[self.nums]), 1) elif self.scale != 1 and self.stype == 'stage': out = torch.cat((out, self.pool(spx[self.nums])), 1) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Res2Net(nn.Module): def __init__(self, block, layers, baseWidth=26, scale=4, num_classes=1000): self.inplanes = 64 super(Res2Net, self).__init__() self.baseWidth = baseWidth self.scale = scale self.conv1 = nn.Sequential( nn.Conv2d(3, 32, 3, 2, 1, bias=False), nn.BatchNorm2d(32), nn.ReLU(inplace=True), nn.Conv2d(32, 32, 3, 1, 1, bias=False), nn.BatchNorm2d(32), nn.ReLU(inplace=True), nn.Conv2d(32, 64, 3, 1, 1, bias=False) ) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU() self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False), nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample=downsample, stype='stage', baseWidth=self.baseWidth, scale=self.scale)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes, baseWidth=self.baseWidth, scale=self.scale)) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x1 = self.layer2(x) x2 = self.layer3(x1) # x = self.layer4(x) # # x = self.avgpool(x) # x = x.view(x.size(0), -1) # x = self.fc(x) return x1, x2 def res2net50_v1b(pretrained=False, **kwargs): """Constructs a Res2Net-50_v1b lib. Res2Net-50 refers to the Res2Net-50_v1b_26w_4s. Args: pretrained (bool): If True, returns a lib pre-trained on ImageNet """ model = Res2Net(Bottle2neck, [3, 4, 6, 3], baseWidth=26, scale=4, **kwargs) if pretrained: model.load_state_dict(model_zoo.load_url(model_urls['res2net50_v1b_26w_4s'])) return model def res2net101_v1b(pretrained=False, **kwargs): """Constructs a Res2Net-50_v1b_26w_4s lib. Args: pretrained (bool): If True, returns a lib pre-trained on ImageNet """ model = Res2Net(Bottle2neck, [3, 4, 23, 3], baseWidth=26, scale=4, **kwargs) if pretrained: model.load_state_dict(model_zoo.load_url(model_urls['res2net101_v1b_26w_4s'])) return model def res2net50_v1b_26w_4s(pretrained=False, **kwargs): """Constructs a Res2Net-50_v1b_26w_4s lib. Args: pretrained (bool): If True, returns a lib pre-trained on ImageNet """ model = Res2Net(Bottle2neck, [3, 4, 6, 3], baseWidth=26, scale=4, **kwargs) if pretrained: # or you can directly load from local file: # model_state = torch.load('add/your/model_path/res2net50_v1b_26w_4s-3cf99910.pth') # model.load_state_dict(model_state) model.load_state_dict(model_zoo.load_url(model_urls['res2net50_v1b_26w_4s'], map_location='cpu')) return model def res2net101_v1b_26w_4s(pretrained=False, **kwargs): """Constructs a Res2Net-50_v1b_26w_4s lib. Args: pretrained (bool): If True, returns a lib pre-trained on ImageNet """ model = Res2Net(Bottle2neck, [3, 4, 23, 3], baseWidth=26, scale=4, **kwargs) if pretrained: model.load_state_dict(model_zoo.load_url(model_urls['res2net101_v1b_26w_4s'])) return model def res2net152_v1b_26w_4s(pretrained=False, **kwargs): """Constructs a Res2Net-50_v1b_26w_4s lib. Args: pretrained (bool): If True, returns a lib pre-trained on ImageNet """ model = Res2Net(Bottle2neck, [3, 8, 36, 3], baseWidth=26, scale=4, **kwargs) if pretrained: model.load_state_dict(model_zoo.load_url(model_urls['res2net152_v1b_26w_4s'])) return model if __name__ == '__main__': images = torch.rand(1, 3, 224, 224).cuda() model = res2net50_v1b_26w_4s(pretrained=True) model = model.cuda() print(model(images).size()) ================================================ FILE: lib/module/__init__.py ================================================ ================================================ FILE: lib/utils/__init__.py ================================================ ================================================ FILE: lib/utils/utils.py ================================================ def clip_gradient(optimizer, grad_clip): """ For calibrating misalignment gradient via cliping gradient technique :param optimizer: :param grad_clip: :return: """ for group in optimizer.param_groups: for param in group['params']: if param.grad is not None: param.grad.data.clamp_(-grad_clip, grad_clip) def adjust_lr(optimizer, init_lr, epoch, decay_rate=0.1, decay_epoch=30): decay = decay_rate ** (epoch // decay_epoch) for param_group in optimizer.param_groups: param_group['lr'] = decay * init_lr lr = param_group['lr'] return lr ================================================ FILE: scripts/config.py ================================================ import argparse parser = argparse.ArgumentParser() # optimizer parser.add_argument('--gpu_id', type=str, default='0, 1, 2, 3', help='train use gpu') parser.add_argument('--lr_mode', type=str, default="poly") parser.add_argument('--base_lr', type=float, default=3e-4) parser.add_argument('--finetune_lr', type=float, default=1e-4) parser.add_argument('--decay_rate', type=float, default=0.1, help='decay rate of learning rate') parser.add_argument('--decay_epoch', type=int, default=50, help='every n epochs decay learning rate') parser.add_argument('--clip', type=float, default=0.5, help='gradient clipping margin') # train schedule parser.add_argument('--epoches', type=int, default=15) # data parser.add_argument('--data_statistics', type=str, default="lib/dataloader/statistics.pth", help='The normalization statistics.') parser.add_argument('--dataset', type=str, default="TrainDataset") parser.add_argument('--dataset_root', type=str, default="./data/SUN-SEG") parser.add_argument('--size', type=tuple, default=(256, 448)) parser.add_argument('--batchsize', type=int, default=24) parser.add_argument('--video_time_clips', type=int, default=5) parser.add_argument('--save_path', type=str, default='snapshot/PNSPlus/') parser.add_argument('--video_testset_root', type=str, default='./data/SUN-SEG') config = parser.parse_args() ================================================ FILE: scripts/eval_eff.py ================================================ import os import time import numpy as np from ptflops import get_model_complexity_info import torch from lib.module.PNSPlusNetwork import PNSNet as Network def computeTime(model, inputs, device='cuda'): if device == 'cuda': model = model.cuda() inputs = inputs.cuda() model.eval() time_spent = [] for idx in range(100): start_time = time.time() with torch.no_grad(): _ = model(inputs) if device == 'cuda': torch.cuda.synchronize() # wait for cuda to finish (cuda is asynchronous!) if idx > 10: time_spent.append(time.time() - start_time) print('Avg execution time (ms): %.4f, FPS:%d'%(np.mean(time_spent),1*1//np.mean(time_spent))) return 1*1//np.mean(time_spent) if __name__=="__main__": os.environ['CUDA_VISIBLE_DEVICES'] = '0' torch.backends.cudnn.benchmark = True model = Network().cuda() with torch.cuda.device(0): macs, params = get_model_complexity_info(model, (6, 3, 256, 448), as_strings=True, print_per_layer_stat=False, verbose=True) inputs = torch.randn(1, 6, 3, 256, 448) print(str(params) + '\t' + str(macs)) computeTime(model, inputs) ================================================ FILE: scripts/my_test.py ================================================ import os import numpy as np from tqdm import tqdm from PIL import Image import torch from torch.utils.data import DataLoader, Dataset from torchvision.transforms import ToTensor, Compose, Resize from config import config from lib.module.PNSPlusNetwork import PNSNet as Network def safe_save(img, save_path): os.makedirs(save_path.replace(save_path.split('/')[-1], ""), exist_ok=True) img.save(save_path) class Normalize(object): def __init__(self, mean, std): self.mean, self.std = mean, std def __call__(self, img): for i in range(3): img[:, :, i] -= float(self.mean[i]) for i in range(3): img[:, :, i] /= float(self.std[i]) return img class Test_Dataset(Dataset): def __init__(self, root, testset): time_interval = 1 self.time_clips = config.video_time_clips self.video_test_list = [] video_root = os.path.join(root, testset, 'Frame') cls_list = os.listdir(video_root) self.video_filelist = {} for cls in cls_list: self.video_filelist[cls] = [] cls_path = os.path.join(video_root, cls) tmp_list = os.listdir(cls_path) tmp_list.sort(key=lambda name: ( int(name.split('-')[0].split('_')[-1]), int(name.split('_a')[1].split('_')[0]), int(name.split('_image')[1].split('.jpg')[ 0]))) for filename in tmp_list: self.video_filelist[cls].append(os.path.join(cls_path, filename)) # ensemble for cls in cls_list: li = self.video_filelist[cls] begin = 0 # change for inference from first frame while begin < len(li): if len(li) - begin - 1 < self.time_clips: begin = len(li) - self.time_clips batch_clips = [] batch_clips.append(li[0]) for t in range(self.time_clips): batch_clips.append(li[begin + time_interval * t]) begin += self.time_clips self.video_test_list.append(batch_clips) self.img_transform = Compose([ Resize((config.size[0], config.size[1]), Image.BILINEAR), ToTensor(), Normalize([0.4732661, 0.44874457, 0.3948762], [0.22674961, 0.22012031, 0.2238305]) ]) def __getitem__(self, idx): img_path_li = self.video_test_list[idx] IMG = None img_li = [] for idx, img_path in enumerate(img_path_li): img = Image.open(img_path).convert('RGB') img_li.append(self.img_transform(img)) for idx, img in enumerate(img_li): if IMG is not None: IMG[idx, :, :, :] = img else: IMG = torch.zeros(len(img_li), *(img.shape)) IMG[idx, :, :, :] = img return IMG, img_path_li def __len__(self): return len(self.video_test_list) class AutoTest: def __init__(self, test_dataset, data_root, model_path): assert isinstance(test_dataset, list), "error" self.data_root = data_root self.test_dataset = test_dataset self.dataloader = {} for dst in self.test_dataset: self.dataloader[dst] = DataLoader(Test_Dataset(data_root, dst), batch_size=1, shuffle=False, num_workers=8) print('Load checkpoint:', model_path) self.model = Network().cuda() new_state = {} state_dict = torch.load(model_path, map_location=torch.device('cpu')) for key, value in state_dict.items(): new_state[key.replace('module.', '')] = value self.tag_dir = 'res/'+model_path.split('/')[-3]+'_'+model_path.split('/')[-2]+'/' self.model.load_state_dict(new_state) self.model.eval() def test(self): with torch.no_grad(): for dst in self.test_dataset: for img, path_li in tqdm(self.dataloader[dst], desc="test:%s" % dst): result = self.model(img.cuda()) for res, path in zip(result, path_li[1:]): npres = res.squeeze().cpu().numpy() safe_save(Image.fromarray((npres * 255).astype(np.uint8)), path[0].replace(self.data_root, self.tag_dir).replace(".jpg", ".png").replace('Frame', '')) if __name__ == "__main__": at = AutoTest(['TestEasyDataset/Seen', 'TestHardDataset/Seen', 'TestEasyDataset/Unseen', 'TestHardDataset/Unseen'], config.video_testset_root, "snapshot/PNSPlus/epoch_15/PNSPlus.pth") at.test() ================================================ FILE: scripts/my_train.py ================================================ import os import logging from datetime import datetime import torch import torch.nn as nn import torch.nn.functional as F import torch.backends.cudnn as cudnn from torch.utils import data from config import config from lib.module.PNSPlusNetwork import PNSNet as Network from lib.dataloader.dataloader import get_video_dataset from lib.utils.utils import clip_gradient, adjust_lr class CrossEntropyLoss(nn.Module): def __init__(self): super(CrossEntropyLoss, self).__init__() def forward(self, *inputs): pred, target = tuple(inputs) total_loss = F.binary_cross_entropy(pred.squeeze(), target.squeeze().float()) return total_loss def train(train_loader, model, optimizer, epoch, save_path, loss_func): global step model.cuda().train() loss_all = 0 epoch_step = 0 try: for i, (images, gts) in enumerate(train_loader, start=1): optimizer.zero_grad() images = images.cuda() gts = gts.cuda() preds = model(images) loss = loss_func(preds.squeeze().contiguous(), gts.contiguous().view(-1, *(gts.shape[2:]))) loss.backward() clip_gradient(optimizer, config.clip) optimizer.step() step += 1 epoch_step += 1 loss_all += loss.data if i % 20 == 0 or i == total_step or i == 1: print('{} Epoch [{:03d}/{:03d}], Step [{:04d}/{:04d}], Total_loss: {:.4f}'. format(datetime.now(), epoch, config.epoches, i, total_step, loss.data)) logging.info( '[Train Info]:Epoch [{:03d}/{:03d}], Step [{:04d}/{:04d}], Total_loss: {:.4f}'. format(epoch, config.epoches, i, total_step, loss.data)) os.makedirs(os.path.join(save_path, "epoch_%d" % (epoch + 1)), exist_ok=True) save_root = os.path.join(save_path, "epoch_%d" % (epoch + 1)) torch.save(model.state_dict(), os.path.join(save_root, "PNSPlus.pth")) loss_all /= epoch_step logging.info('[Train Info]: Epoch [{:03d}/{:03d}], Loss_AVG: {:.4f}'.format(epoch, config.epoches, loss_all)) except KeyboardInterrupt: print('Keyboard Interrupt: save model and exit.') if not os.path.exists(save_path): os.makedirs(save_path) torch.save(model.state_dict(), save_path + 'Net_epoch_{}.pth'.format(epoch + 1)) print('Save checkpoints successfully!') raise if __name__ == '__main__': model = Network().cuda() if config.gpu_id == '0': os.environ["CUDA_VISIBLE_DEVICES"] = "0" print('USE GPU 0') elif config.gpu_id == '0, 1': model = nn.DataParallel(model) os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1" print('USE GPU 0 and 1') elif config.gpu_id == '2, 3': model = nn.DataParallel(model) os.environ["CUDA_VISIBLE_DEVICES"] = "2, 3" print('USE GPU 2 and 3') elif config.gpu_id == '0, 1, 2, 3': model = nn.DataParallel(model) os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2, 3" print('USE GPU 0, 1, 2 and 3') cudnn.benchmark = True base_params = [params for name, params in model.named_parameters() if ("temporal_high" in name)] finetune_params = [params for name, params in model.named_parameters() if ("temporal_high" not in name)] optimizer = torch.optim.Adam([ {'params': base_params, 'lr': config.base_lr, 'weight_decay': 1e-4, 'name': "base_params"}, {'params': finetune_params, 'lr': config.finetune_lr, 'weight_decay': 1e-4, 'name': 'finetune_params'}]) save_path = config.save_path if not os.path.exists(save_path): os.makedirs(save_path) loss_func = CrossEntropyLoss() # load data print('load data...') train_loader =get_video_dataset() train_loader = data.DataLoader(dataset=train_loader, batch_size=config.batchsize, shuffle=True, num_workers=4, pin_memory=False ) logging.info('Train on {}'.format(config.dataset)) print('Train on {}'.format(config.dataset)) total_step = len(train_loader) # logging logging.basicConfig(filename=save_path + 'log.log', format='[%(asctime)s-%(filename)s-%(levelname)s:%(message)s]', level=logging.INFO, filemode='a', datefmt='%Y-%m-%d %I:%M:%S %p') logging.info("Network-Train") print("Network-Train") logging.info('Config: epoch: {}; lr: {}; batchsize: {}; trainsize: {}; clip: {}; decay_rate: {}; ' 'save_path: {}; decay_epoch: {}'.format(config.epoches, config.base_lr, config.batchsize, config.size, config.clip, config.decay_rate, config.save_path, config.decay_epoch)) print('Config: epoch: {}; lr: {}; batchsize: {}; trainsize: {}; clip: {}; decay_rate: {}; ' 'save_path: {}; decay_epoch: {}'.format(config.epoches, config.base_lr, config.batchsize, config.size, config.clip, config.decay_rate, config.save_path, config.decay_epoch)) step = 0 print("Start train...") for epoch in range(config.epoches): cur_lr = adjust_lr(optimizer, config.base_lr, epoch, config.decay_rate, config.decay_epoch) train(train_loader, model, optimizer, epoch, save_path, loss_func) ================================================ FILE: snapshot/.placeholder ================================================ the snapshots will be stored here ================================================ FILE: utils/reorganize.py ================================================ import os, shutil, glob SUN_root = './data/SUN-Positive/' SUNSEG_root = './data/SUN-SEG-Annotation/' SUN_split_dict = {} SUNSEG_split_dict = {} SUNSEG_dataset_dict = {} image_list = [] # SUN_list = glob.glob(SUN_root + '*/*.jpg') SUNSEG_test_list = glob.glob(SUNSEG_root + 'Test*/*/GT/*/*.png') SUNSEG_train_list = glob.glob(SUNSEG_root + 'TrainDataset/GT/*/*.png') SUNSEG_list = SUNSEG_test_list + SUNSEG_train_list SUN_list = [os.path.join(SUN_root, name.split('/')[-2].split('_')[0] if len(name.split('/')[-2].split('_')) > 1 else name.split('/')[-2], name.split('/')[-1].replace('.png', '')) for name in SUNSEG_list] for SUN_path, SUNSEG_path in zip(SUN_list, SUNSEG_list): """ @func: Get SUN and SUN-SEG case-to-image structure in a dictionary """ SUN_case_name, SUN_image_name = SUN_path.split('/')[-2], SUN_path.split('/')[-1] SUNSEG_dataset_name, SUNSEG_case_name, SUNSEG_image_name = SUNSEG_path.split('SUN-SEG-Annotation/')[1].split('/GT')[0], SUNSEG_path.split('/')[-2], SUNSEG_path.split('/')[-1].rstrip('.png') SUN_split_dict[SUN_image_name] = SUN_case_name SUNSEG_split_dict[SUNSEG_image_name] = SUNSEG_case_name SUNSEG_dataset_dict[SUNSEG_image_name] = SUNSEG_dataset_name image_list.append(SUN_image_name) for image in image_list: """ @func: Change original SUN's structure """ SUN_case = SUN_split_dict[image] SUNSEG_case = SUNSEG_split_dict[image] dataset_split = SUNSEG_dataset_dict[image] os.makedirs(os.path.join(SUNSEG_root, dataset_split, 'Frame', SUNSEG_case), exist_ok=True) shutil.move(os.path.join(SUN_root, SUN_case, image + '.jpg'), os.path.join(SUNSEG_root, dataset_split, 'Frame', SUNSEG_case, image + '.jpg')) ================================================ FILE: utils/reorganize.sh ================================================ python ./utils/reorganize.py rm -rf ./data/SUN-Positive mv ./data/SUN-SEG-Annotation ./data/SUN-SEG