Repository: facebookresearch/adaptive_teacher Branch: main Commit: 5256463ad9ec Files: 49 Total size: 329.0 KB Directory structure: gitextract_n6ts7o2q/ ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── adapteacher/ │ ├── __init__.py │ ├── checkpoint/ │ │ └── detection_checkpoint.py │ ├── config.py │ ├── data/ │ │ ├── __init__.py │ │ ├── build.py │ │ ├── common.py │ │ ├── dataset_mapper.py │ │ ├── datasets/ │ │ │ ├── builtin.py │ │ │ └── cityscapes_foggy.py │ │ ├── detection_utils.py │ │ └── transforms/ │ │ └── augmentation_impl.py │ ├── engine/ │ │ ├── hooks.py │ │ ├── probe.py │ │ └── trainer.py │ ├── evaluation/ │ │ ├── __init__.py │ │ ├── coco_evaluation.py │ │ └── pascal_voc_evaluation.py │ ├── modeling/ │ │ ├── meta_arch/ │ │ │ ├── rcnn.py │ │ │ ├── ts_ensemble.py │ │ │ └── vgg.py │ │ ├── proposal_generator/ │ │ │ └── rpn.py │ │ └── roi_heads/ │ │ ├── fast_rcnn.py │ │ └── roi_heads.py │ └── solver/ │ ├── build.py │ └── lr_scheduler.py ├── configs/ │ ├── Base-RCNN-C4.yaml │ ├── faster_rcnn_R101_cross_clipart.yaml │ ├── faster_rcnn_R101_cross_clipart_b4.yaml │ ├── faster_rcnn_R101_cross_water.yaml │ └── faster_rcnn_VGG_cross_city.yaml ├── prod_lib/ │ ├── TARGETS │ ├── config/ │ │ └── defaults.py │ ├── data/ │ │ ├── builtin.py │ │ └── cityscapes_foggy.py │ ├── engine/ │ │ ├── probe.py │ │ └── trainer.py │ ├── evaluation/ │ │ ├── __init__.py │ │ ├── coco_evaluation.py │ │ └── pascal_voc_evaluation.py │ ├── modeling/ │ │ ├── daobj_rcnn.py │ │ └── vgg.py │ └── runner/ │ ├── __init__.py │ └── runner.py └── train_net.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ **/__pycache__/ ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies within all project spaces, and it also applies when an individual is representing the project or its community in public spaces. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. This Code of Conduct also applies outside the project spaces when there is a reasonable belief that an individual's behavior may have a negative impact on the project or its community. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at . All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to Adaptive Teacher We want to make contributing to this project as easy and transparent as possible. ## Our Development Process To be added. ## Pull Requests We actively welcome your pull requests. 1. Fork the repo and create your branch from `main`. 2. If you've added code that should be tested, add tests. 3. If you've changed APIs, update the documentation. 4. Ensure the test suite passes. 5. Make sure your code lints. 6. If you haven't already, complete the Contributor License Agreement ("CLA"). ## Contributor License Agreement ("CLA") In order to accept your pull request, we need you to submit a CLA. You only need to do this once to work on any of Meta's open source projects. Complete your CLA here: ## Issues We use GitHub issues to track public bugs. Please ensure your description is clear and has sufficient instructions to be able to reproduce the issue. Meta has a [bounty program](https://www.facebook.com/whitehat/) for the safe disclosure of security bugs. In those cases, please go through the process outlined on that page and do not file a public issue. ## Coding Style * 2 spaces for indentation rather than tabs * 80 character line length * ... ## License By contributing to Adaptive Teacher, you agree that your contributions will be licensed under the LICENSE file in the root directory of this source tree. ================================================ FILE: LICENSE ================================================ Attribution-NonCommercial 4.0 International ======================================================================= Creative Commons Corporation ("Creative Commons") is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an "as-is" basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible. Using Creative Commons Public Licenses Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses. Considerations for licensors: Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC- licensed material, or material used under an exception or limitation to copyright. More considerations for licensors: wiki.creativecommons.org/Considerations_for_licensors Considerations for the public: By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensor's permission is not necessary for any reason--for example, because of any applicable exception or limitation to copyright--then that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described. Although not required by our licenses, you are encouraged to respect those requests where reasonable. More_considerations for the public: wiki.creativecommons.org/Considerations_for_licensees ======================================================================= Creative Commons Attribution-NonCommercial 4.0 International Public License By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-NonCommercial 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. Section 1 -- Definitions. a. Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. b. Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. c. Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. d. Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. e. Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. f. Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License. g. Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. h. Licensor means the individual(s) or entity(ies) granting rights under this Public License. i. NonCommercial means not primarily intended for or directed towards commercial advantage or monetary compensation. For purposes of this Public License, the exchange of the Licensed Material for other material subject to Copyright and Similar Rights by digital file-sharing or similar means is NonCommercial provided there is no payment of monetary compensation in connection with the exchange. j. Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. k. Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. l. You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. Section 2 -- Scope. a. License grant. 1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: a. reproduce and Share the Licensed Material, in whole or in part, for NonCommercial purposes only; and b. produce, reproduce, and Share Adapted Material for NonCommercial purposes only. 2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. 3. Term. The term of this Public License is specified in Section 6(a). 4. Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a) (4) never produces Adapted Material. 5. Downstream recipients. a. Offer from the Licensor -- Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. b. No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. 6. No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). b. Other rights. 1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. 2. Patent and trademark rights are not licensed under this Public License. 3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties, including when the Licensed Material is used other than for NonCommercial purposes. Section 3 -- License Conditions. Your exercise of the Licensed Rights is expressly made subject to the following conditions. a. Attribution. 1. If You Share the Licensed Material (including in modified form), You must: a. retain the following if it is supplied by the Licensor with the Licensed Material: i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); ii. a copyright notice; iii. a notice that refers to this Public License; iv. a notice that refers to the disclaimer of warranties; v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; b. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and c. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. 3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. 4. If You Share Adapted Material You produce, the Adapter's License You apply must not prevent recipients of the Adapted Material from complying with this Public License. Section 4 -- Sui Generis Database Rights. Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database for NonCommercial purposes only; b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material; and c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. Section 5 -- Disclaimer of Warranties and Limitation of Liability. a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. Section 6 -- Term and Termination. a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: 1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or 2. upon express reinstatement by the Licensor. For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. Section 7 -- Other Terms and Conditions. a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. Section 8 -- Interpretation. a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. ======================================================================= Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” The text of the Creative Commons public licenses is dedicated to the public domain under the CC0 Public Domain Dedication. Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at creativecommons.org/policies, Creative Commons does not authorize the use of the trademark "Creative Commons" or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses. Creative Commons may be contacted at creativecommons.org. ================================================ FILE: README.md ================================================ # Cross-Domain Adaptive Teacher for Object Detection [![License: CC BY-NC 4.0](https://img.shields.io/badge/License-CC%20BY--NC%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by-nc/4.0/) [![License: CC BY-NC 4.0](https://licensebuttons.net/l/by-nc/4.0/80x15.png)](https://creativecommons.org/licenses/by-nc/4.0/) This is the PyTorch implementation of our paper:
**Cross-Domain Adaptive Teacher for Object Detection**
[Yu-Jhe Li](https://yujheli.github.io/), [Xiaoliang Dai](https://sites.google.com/view/xiaoliangdai), [Chih-Yao Ma](https://chihyaoma.github.io/), [Yen-Cheng Liu](https://ycliu93.github.io/), [Kan Chen](https://kanchen.info/), [Bichen Wu](https://scholar.google.com/citations?user=K3QJPdMAAAAJ&hl=en), [Zijian He](https://research.fb.com/people/he-zijian/), [Kris Kitani](http://www.cs.cmu.edu/~kkitani/), [Peter Vajda](https://sites.google.com/site/vajdap)
IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), 2022
[[Paper](https://openaccess.thecvf.com/content/CVPR2022/papers/Li_Cross-Domain_Adaptive_Teacher_for_Object_Detection_CVPR_2022_paper.pdf)] [[Project](https://yujheli.github.io/projects/adaptiveteacher.html)]

# Installation ## Prerequisites - Python ≥ 3.6 - PyTorch ≥ 1.5 and torchvision that matches the PyTorch installation. - Detectron2 == 0.3 (The version I used to run my code) ## Our tested environment - 8 V100 (16 batch size) - 4 2080 Ti (4 batch size) ## Install python env To install required dependencies on the virtual environment of the python (e.g., virtualenv for python3), please run the following command at the root of this code: ``` $ python3 -m venv /path/to/new/virtual/environment/. $ source /path/to/new/virtual/environment/bin/activate ``` For example: ``` $ mkdir python_env $ python3 -m venv python_env/ $ source python_env/bin/activate ``` ## Build Detectron2 from Source Follow the [INSTALL.md](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md) to install Detectron2. ## Dataset download 1. Download the datasets 2. Organize the dataset as the Cityscapes and PASCAL VOC format following: ```shell adaptive_teacher/ └── datasets/ └── cityscapes/ ├── gtFine/ ├── train/ └── test/ └── val/ ├── leftImg8bit/ ├── train/ └── test/ └── val/ └── cityscapes_foggy/ ├── gtFine/ ├── train/ └── test/ └── val/ ├── leftImg8bit/ ├── train/ └── test/ └── val/ └── VOC2012/ ├── Annotations/ ├── ImageSets/ └── JPEGImages/ └── clipark/ ├── Annotations/ ├── ImageSets/ └── JPEGImages/ └── watercolor/ ├── Annotations/ ├── ImageSets/ └── JPEGImages/ ``` # Training - Train the Adaptive Teacher under PASCAL VOC (source) and Clipart1k (target) ```shell python train_net.py \ --num-gpus 8 \ --config configs/faster_rcnn_R101_cross_clipart.yaml\ OUTPUT_DIR output/exp_clipart ``` - Train the Adaptive Teacher under cityscapes (source) and foggy cityscapes (target) ```shell python train_net.py\ --num-gpus 8\ --config configs/faster_rcnn_VGG_cross_city.yaml\ OUTPUT_DIR output/exp_city ``` ## Resume the training ```shell python train_net.py \ --resume \ --num-gpus 8 \ --config configs/faster_rcnn_R101_cross_clipart.yaml MODEL.WEIGHTS .pth ``` ## Evaluation ```shell python train_net.py \ --eval-only \ --num-gpus 8 \ --config configs/faster_rcnn_R101_cross_clipart.yaml \ MODEL.WEIGHTS .pth ``` ## Results and Model Weights If you are urgent with the pre-trained weights, please download our interal prod_weights here at the [Link](https://drive.google.com/drive/folders/17p8oYjhmoA77_hyVZq4WLJezsUiSZhdi?usp=sharing). Please note that the key name in the pre-trained model is slightly different and you will need to align manually. Otherwise, please wait and we will try to release the local weights in the future. ### Real to Artistic Adaptation: | Backbone | Source set (labeled) | Target set (unlabeled) | Batch size | AP@.5 | Model Weights | Comment | | :-----: | :---------------: | :----------------: | :---------------------: | :-----: | :----------: |:-----: | | R101 | VOC12 | Clipark1k | 16 labeled + 16 unlabeled | 40.1 | [link](https://drive.google.com/file/d/1mzqSlkftJDTj1IWZC0huuMIzWDtfSaL0/view?usp=sharing)| Ours w/o discriminator (dis=0)| | R101 | VOC12 | Clipark1k | 4 labeled + 4 unlabeled | 47.2 | [link](https://drive.google.com/file/d/1F72bfPP-5uu4H2rS_OscSLVhvjHeylR-/view?usp=sharing)| lr=0.01, dis_w=0.1, default | | R101 | VOC12 | Clipark1k | 16 labeled + 16 unlabeled | 49.6 | [link](https://drive.google.com/file/d/1qbueKiNPLIP4gFJrUQi_1kpCAQmUivFG/view?usp=sharing)| Ours in the paper, unsup_w=0.5| | R101+FPN | VOC12 | Clipark1k | 16 labeled + 16 unlabeled | 51.2 |link (coming soon) | For future work| ### Weather Adaptation: | Backbone | Source set (labeled) | Target set (unlabeled) | Batch size | AP@.5 | Model Weights | Comment| | :-----: | :---------------: | :----------------: | :---------------------: | :-----: | :--------------------------------------------------: |:-----: | | VGG16| Cityscapes | Foggy Cityscapes (ALL) | 16 labeled + 16 unlabeled | 48.7 | link (coming soon)|Ours w/o discriminator| | VGG16| Cityscapes | Foggy Cityscapes (ALL) | 16 labeled + 16 unlabeled | 50.9 | link (coming soon)|Ours in the paper| | VGG16| Cityscapes | Foggy Cityscapes (0.02) | 16 labeled + 16 unlabeled | in progress | link (coming soon)|Ours in the paper| | VGG16+FPN | Cityscapes | Foggy Cityscapes (ALL) | 16 labeled + 16 unlabeled | 57.4 |link (coming soon) |For future work| ## Citation If you use Adaptive Teacher in your research or wish to refer to the results published in the paper, please use the following BibTeX entry. ```BibTeX @inproceedings{li2022cross, title={Cross-Domain Adaptive Teacher for Object Detection}, author={Li, Yu-Jhe and Dai, Xiaoliang and Ma, Chih-Yao and Liu, Yen-Cheng and Chen, Kan and Wu, Bichen and He, Zijian and Kitani, Kris and Vajda, Peter}, booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, year={2022} } ``` Also, if you use Detectron2 in your research, please use the following BibTeX entry. ```BibTeX @misc{wu2019detectron2, author = {Yuxin Wu and Alexander Kirillov and Francisco Massa and Wan-Yen Lo and Ross Girshick}, title = {Detectron2}, howpublished = {\url{https://github.com/facebookresearch/detectron2}}, year = {2019} } ``` ## License This project is licensed under CC-BY-NC 4.0 License, as found in the LICENSE file. ================================================ FILE: adapteacher/__init__.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved from .config import add_ateacher_config ================================================ FILE: adapteacher/checkpoint/detection_checkpoint.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved from detectron2.checkpoint.c2_model_loading import align_and_update_state_dicts from detectron2.checkpoint import DetectionCheckpointer # for load_student_model from typing import Any from fvcore.common.checkpoint import _strip_prefix_if_present, _IncompatibleKeys class DetectionTSCheckpointer(DetectionCheckpointer): def _load_model(self, checkpoint): if checkpoint.get("__author__", None) == "Caffe2": # pretrained model weight: only update student model if checkpoint.get("matching_heuristics", False): self._convert_ndarray_to_tensor(checkpoint["model"]) # convert weights by name-matching heuristics model_state_dict = self.model.modelStudent.state_dict() align_and_update_state_dicts( model_state_dict, checkpoint["model"], c2_conversion=checkpoint.get("__author__", None) == "Caffe2", ) checkpoint["model"] = model_state_dict # for non-caffe2 models, use standard ways to load it incompatible = self._load_student_model(checkpoint) model_buffers = dict(self.model.modelStudent.named_buffers(recurse=False)) for k in ["pixel_mean", "pixel_std"]: # Ignore missing key message about pixel_mean/std. # Though they may be missing in old checkpoints, they will be correctly # initialized from config anyway. if k in model_buffers: try: incompatible.missing_keys.remove(k) except ValueError: pass return incompatible else: # whole model if checkpoint.get("matching_heuristics", False): self._convert_ndarray_to_tensor(checkpoint["model"]) # convert weights by name-matching heuristics model_state_dict = self.model.state_dict() align_and_update_state_dicts( model_state_dict, checkpoint["model"], c2_conversion=checkpoint.get("__author__", None) == "Caffe2", ) checkpoint["model"] = model_state_dict # for non-caffe2 models, use standard ways to load it incompatible = super()._load_model(checkpoint) model_buffers = dict(self.model.named_buffers(recurse=False)) for k in ["pixel_mean", "pixel_std"]: # Ignore missing key message about pixel_mean/std. # Though they may be missing in old checkpoints, they will be correctly # initialized from config anyway. if k in model_buffers: try: incompatible.missing_keys.remove(k) except ValueError: pass return incompatible def _load_student_model(self, checkpoint: Any) -> _IncompatibleKeys: # pyre-ignore checkpoint_state_dict = checkpoint.pop("model") self._convert_ndarray_to_tensor(checkpoint_state_dict) # if the state_dict comes from a model that was wrapped in a # DataParallel or DistributedDataParallel during serialization, # remove the "module" prefix before performing the matching. _strip_prefix_if_present(checkpoint_state_dict, "module.") # work around https://github.com/pytorch/pytorch/issues/24139 model_state_dict = self.model.modelStudent.state_dict() incorrect_shapes = [] for k in list(checkpoint_state_dict.keys()): if k in model_state_dict: shape_model = tuple(model_state_dict[k].shape) shape_checkpoint = tuple(checkpoint_state_dict[k].shape) if shape_model != shape_checkpoint: incorrect_shapes.append((k, shape_checkpoint, shape_model)) checkpoint_state_dict.pop(k) # pyre-ignore incompatible = self.model.modelStudent.load_state_dict( checkpoint_state_dict, strict=False ) return _IncompatibleKeys( missing_keys=incompatible.missing_keys, unexpected_keys=incompatible.unexpected_keys, incorrect_shapes=incorrect_shapes, ) # class DetectionCheckpointer(Checkpointer): # """ # Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2 # model zoo, and apply conversions for legacy models. # """ # def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables): # is_main_process = comm.is_main_process() # super().__init__( # model, # save_dir, # save_to_disk=is_main_process if save_to_disk is None else save_to_disk, # **checkpointables, # ) # def _load_file(self, filename): # if filename.endswith(".pkl"): # with PathManager.open(filename, "rb") as f: # data = pickle.load(f, encoding="latin1") # if "model" in data and "__author__" in data: # # file is in Detectron2 model zoo format # self.logger.info("Reading a file from '{}'".format(data["__author__"])) # return data # else: # # assume file is from Caffe2 / Detectron1 model zoo # if "blobs" in data: # # Detection models have "blobs", but ImageNet models don't # data = data["blobs"] # data = {k: v for k, v in data.items() if not k.endswith("_momentum")} # return {"model": data, "__author__": "Caffe2", "matching_heuristics": True} # loaded = super()._load_file(filename) # load native pth checkpoint # if "model" not in loaded: # loaded = {"model": loaded} # return loaded # def _load_model(self, checkpoint): # if checkpoint.get("matching_heuristics", False): # self._convert_ndarray_to_tensor(checkpoint["model"]) # # convert weights by name-matching heuristics # model_state_dict = self.model.state_dict() # align_and_update_state_dicts( # model_state_dict, # checkpoint["model"], # c2_conversion=checkpoint.get("__author__", None) == "Caffe2", # ) # checkpoint["model"] = model_state_dict # # for non-caffe2 models, use standard ways to load it # super()._load_model(checkpoint) ================================================ FILE: adapteacher/config.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved from detectron2.config import CfgNode as CN def add_ateacher_config(cfg): """ Add config for semisupnet. """ _C = cfg _C.TEST.VAL_LOSS = True _C.MODEL.RPN.UNSUP_LOSS_WEIGHT = 1.0 _C.MODEL.RPN.LOSS = "CrossEntropy" _C.MODEL.ROI_HEADS.LOSS = "CrossEntropy" _C.SOLVER.IMG_PER_BATCH_LABEL = 1 _C.SOLVER.IMG_PER_BATCH_UNLABEL = 1 _C.SOLVER.FACTOR_LIST = (1,) _C.DATASETS.TRAIN_LABEL = ("coco_2017_train",) _C.DATASETS.TRAIN_UNLABEL = ("coco_2017_train",) _C.DATASETS.CROSS_DATASET = True _C.TEST.EVALUATOR = "COCOeval" _C.SEMISUPNET = CN() # Output dimension of the MLP projector after `res5` block _C.SEMISUPNET.MLP_DIM = 128 # Semi-supervised training _C.SEMISUPNET.Trainer = "ateacher" _C.SEMISUPNET.BBOX_THRESHOLD = 0.7 _C.SEMISUPNET.PSEUDO_BBOX_SAMPLE = "thresholding" _C.SEMISUPNET.TEACHER_UPDATE_ITER = 1 _C.SEMISUPNET.BURN_UP_STEP = 12000 _C.SEMISUPNET.EMA_KEEP_RATE = 0.0 _C.SEMISUPNET.UNSUP_LOSS_WEIGHT = 4.0 _C.SEMISUPNET.SUP_LOSS_WEIGHT = 0.5 _C.SEMISUPNET.LOSS_WEIGHT_TYPE = "standard" _C.SEMISUPNET.DIS_TYPE = "res4" _C.SEMISUPNET.DIS_LOSS_WEIGHT = 0.1 # dataloader # supervision level _C.DATALOADER.SUP_PERCENT = 100.0 # 5 = 5% dataset as labeled set _C.DATALOADER.RANDOM_DATA_SEED = 0 # random seed to read data _C.DATALOADER.RANDOM_DATA_SEED_PATH = "dataseed/COCO_supervision.txt" _C.EMAMODEL = CN() _C.EMAMODEL.SUP_CONSIST = True ================================================ FILE: adapteacher/data/__init__.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved from .build import ( build_detection_test_loader, build_detection_semisup_train_loader, ) ================================================ FILE: adapteacher/data/build.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import logging import numpy as np import operator import json import torch.utils.data from detectron2.utils.comm import get_world_size from detectron2.data.common import ( DatasetFromList, MapDataset, ) from detectron2.data.dataset_mapper import DatasetMapper from detectron2.data.samplers import ( InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler, ) from detectron2.data.build import ( trivial_batch_collator, worker_init_reset_seed, get_detection_dataset_dicts, build_batch_data_loader, ) from adapteacher.data.common import ( AspectRatioGroupedSemiSupDatasetTwoCrop, ) """ This file contains the default logic to build a dataloader for training or testing. """ def divide_label_unlabel( dataset_dicts, SupPercent, random_data_seed, random_data_seed_path ): num_all = len(dataset_dicts) num_label = int(SupPercent / 100.0 * num_all) # read from pre-generated data seed with open(random_data_seed_path) as COCO_sup_file: coco_random_idx = json.load(COCO_sup_file) labeled_idx = np.array(coco_random_idx[str(SupPercent)][str(random_data_seed)]) assert labeled_idx.shape[0] == num_label, "Number of READ_DATA is mismatched." label_dicts = [] unlabel_dicts = [] labeled_idx = set(labeled_idx) for i in range(len(dataset_dicts)): if i in labeled_idx: label_dicts.append(dataset_dicts[i]) else: unlabel_dicts.append(dataset_dicts[i]) return label_dicts, unlabel_dicts # uesed by supervised-only baseline trainer def build_detection_semisup_train_loader(cfg, mapper=None): dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) # Divide into labeled and unlabeled sets according to supervision percentage label_dicts, unlabel_dicts = divide_label_unlabel( dataset_dicts, cfg.DATALOADER.SUP_PERCENT, cfg.DATALOADER.RANDOM_DATA_SEED, cfg.DATALOADER.RANDOM_DATA_SEED_PATH, ) dataset = DatasetFromList(label_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) dataset = MapDataset(dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": sampler = TrainingSampler(len(dataset)) elif sampler_name == "RepeatFactorTrainingSampler": repeat_factors = ( RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( label_dicts, cfg.DATALOADER.REPEAT_THRESHOLD ) ) sampler = RepeatFactorTrainingSampler(repeat_factors) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) # list num of labeled and unlabeled logger.info("Number of training samples " + str(len(dataset))) logger.info("Supervision percentage " + str(cfg.DATALOADER.SUP_PERCENT)) return build_batch_data_loader( dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, ) # uesed by evaluation def build_detection_test_loader(cfg, dataset_name, mapper=None): dataset_dicts = get_detection_dataset_dicts( [dataset_name], filter_empty=False, proposal_files=[ cfg.DATASETS.PROPOSAL_FILES_TEST[ list(cfg.DATASETS.TEST).index(dataset_name) ] ] if cfg.MODEL.LOAD_PROPOSALS else None, ) dataset = DatasetFromList(dataset_dicts) if mapper is None: mapper = DatasetMapper(cfg, False) dataset = MapDataset(dataset, mapper) sampler = InferenceSampler(len(dataset)) batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) data_loader = torch.utils.data.DataLoader( dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator, ) return data_loader # uesed by unbiased teacher trainer def build_detection_semisup_train_loader_two_crops(cfg, mapper=None): if cfg.DATASETS.CROSS_DATASET: # cross-dataset (e.g., coco-additional) label_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN_LABEL, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) unlabel_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN_UNLABEL, filter_empty=False, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) else: # different degree of supervision (e.g., COCO-supervision) dataset_dicts = get_detection_dataset_dicts( cfg.DATASETS.TRAIN, filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE if cfg.MODEL.KEYPOINT_ON else 0, proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, ) # Divide into labeled and unlabeled sets according to supervision percentage label_dicts, unlabel_dicts = divide_label_unlabel( dataset_dicts, cfg.DATALOADER.SUP_PERCENT, cfg.DATALOADER.RANDOM_DATA_SEED, cfg.DATALOADER.RANDOM_DATA_SEED_PATH, ) label_dataset = DatasetFromList(label_dicts, copy=False) # exclude the labeled set from unlabeled dataset unlabel_dataset = DatasetFromList(unlabel_dicts, copy=False) # include the labeled set in unlabel dataset # unlabel_dataset = DatasetFromList(dataset_dicts, copy=False) if mapper is None: mapper = DatasetMapper(cfg, True) label_dataset = MapDataset(label_dataset, mapper) unlabel_dataset = MapDataset(unlabel_dataset, mapper) sampler_name = cfg.DATALOADER.SAMPLER_TRAIN logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": label_sampler = TrainingSampler(len(label_dataset)) unlabel_sampler = TrainingSampler(len(unlabel_dataset)) elif sampler_name == "RepeatFactorTrainingSampler": raise NotImplementedError("{} not yet supported.".format(sampler_name)) else: raise ValueError("Unknown training sampler: {}".format(sampler_name)) return build_semisup_batch_data_loader_two_crop( (label_dataset, unlabel_dataset), (label_sampler, unlabel_sampler), cfg.SOLVER.IMG_PER_BATCH_LABEL, cfg.SOLVER.IMG_PER_BATCH_UNLABEL, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, ) # batch data loader def build_semisup_batch_data_loader_two_crop( dataset, sampler, total_batch_size_label, total_batch_size_unlabel, *, aspect_ratio_grouping=False, num_workers=0 ): world_size = get_world_size() assert ( total_batch_size_label > 0 and total_batch_size_label % world_size == 0 ), "Total label batch size ({}) must be divisible by the number of gpus ({}).".format( total_batch_size_label, world_size ) assert ( total_batch_size_unlabel > 0 and total_batch_size_unlabel % world_size == 0 ), "Total unlabel batch size ({}) must be divisible by the number of gpus ({}).".format( total_batch_size_label, world_size ) batch_size_label = total_batch_size_label // world_size batch_size_unlabel = total_batch_size_unlabel // world_size label_dataset, unlabel_dataset = dataset label_sampler, unlabel_sampler = sampler if aspect_ratio_grouping: label_data_loader = torch.utils.data.DataLoader( label_dataset, sampler=label_sampler, num_workers=num_workers, batch_sampler=None, collate_fn=operator.itemgetter( 0 ), # don't batch, but yield individual elements worker_init_fn=worker_init_reset_seed, ) # yield individual mapped dict unlabel_data_loader = torch.utils.data.DataLoader( unlabel_dataset, sampler=unlabel_sampler, num_workers=num_workers, batch_sampler=None, collate_fn=operator.itemgetter( 0 ), # don't batch, but yield individual elements worker_init_fn=worker_init_reset_seed, ) # yield individual mapped dict return AspectRatioGroupedSemiSupDatasetTwoCrop( (label_data_loader, unlabel_data_loader), (batch_size_label, batch_size_unlabel), ) else: raise NotImplementedError("ASPECT_RATIO_GROUPING = False is not supported yet") ================================================ FILE: adapteacher/data/common.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import logging from detectron2.data.common import MapDataset, AspectRatioGroupedDataset class MapDatasetTwoCrop(MapDataset): """ Map a function over the elements in a dataset. This customized MapDataset transforms an image with two augmentations as two inputs (queue and key). Args: dataset: a dataset where map function is applied. map_func: a callable which maps the element in dataset. map_func is responsible for error handling, when error happens, it needs to return None so the MapDataset will randomly use other elements from the dataset. """ def __getitem__(self, idx): retry_count = 0 cur_idx = int(idx) while True: data = self._map_func(self._dataset[cur_idx]) if data is not None: self._fallback_candidates.add(cur_idx) return data # _map_func fails for this idx, use a random new index from the pool retry_count += 1 self._fallback_candidates.discard(cur_idx) cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0] if retry_count >= 3: logger = logging.getLogger(__name__) logger.warning( "Failed to apply `_map_func` for idx: {}, retry count: {}".format( idx, retry_count ) ) class AspectRatioGroupedDatasetTwoCrop(AspectRatioGroupedDataset): """ Batch data that have similar aspect ratio together. In this implementation, images whose aspect ratio < (or >) 1 will be batched together. This improves training speed because the images then need less padding to form a batch. It assumes the underlying dataset produces dicts with "width" and "height" keys. It will then produce a list of original dicts with length = batch_size, all with similar aspect ratios. """ def __init__(self, dataset, batch_size): """ Args: dataset: an iterable. Each element must be a dict with keys "width" and "height", which will be used to batch data. batch_size (int): """ self.dataset = dataset self.batch_size = batch_size self._buckets = [[] for _ in range(2)] self._buckets_key = [[] for _ in range(2)] # Hard-coded two aspect ratio groups: w > h and w < h. # Can add support for more aspect ratio groups, but doesn't seem useful def __iter__(self): for d in self.dataset: # d is a tuple with len = 2 # It's two images (same size) from the same image instance w, h = d[0]["width"], d[0]["height"] bucket_id = 0 if w > h else 1 # bucket = bucket for normal images bucket = self._buckets[bucket_id] bucket.append(d[0]) # buckets_key = bucket for augmented images buckets_key = self._buckets_key[bucket_id] buckets_key.append(d[1]) if len(bucket) == self.batch_size: yield (bucket[:], buckets_key[:]) del bucket[:] del buckets_key[:] class AspectRatioGroupedSemiSupDatasetTwoCrop(AspectRatioGroupedDataset): """ Batch data that have similar aspect ratio together. In this implementation, images whose aspect ratio < (or >) 1 will be batched together. This improves training speed because the images then need less padding to form a batch. It assumes the underlying dataset produces dicts with "width" and "height" keys. It will then produce a list of original dicts with length = batch_size, all with similar aspect ratios. """ def __init__(self, dataset, batch_size): """ Args: dataset: a tuple containing two iterable generators. (labeled and unlabeled data) Each element must be a dict with keys "width" and "height", which will be used to batch data. batch_size (int): """ self.label_dataset, self.unlabel_dataset = dataset self.batch_size_label = batch_size[0] self.batch_size_unlabel = batch_size[1] self._label_buckets = [[] for _ in range(2)] self._label_buckets_key = [[] for _ in range(2)] self._unlabel_buckets = [[] for _ in range(2)] self._unlabel_buckets_key = [[] for _ in range(2)] # Hard-coded two aspect ratio groups: w > h and w < h. # Can add support for more aspect ratio groups, but doesn't seem useful def __iter__(self): label_bucket, unlabel_bucket = [], [] for d_label, d_unlabel in zip(self.label_dataset, self.unlabel_dataset): # d is a tuple with len = 2 # It's two images (same size) from the same image instance # d[0] is with strong augmentation, d[1] is with weak augmentation # because we are grouping images with their aspect ratio # label and unlabel buckets might not have the same number of data # i.e., one could reach batch_size, while the other is still not if len(label_bucket) != self.batch_size_label: w, h = d_label[0]["width"], d_label[0]["height"] label_bucket_id = 0 if w > h else 1 label_bucket = self._label_buckets[label_bucket_id] label_bucket.append(d_label[0]) label_buckets_key = self._label_buckets_key[label_bucket_id] label_buckets_key.append(d_label[1]) if len(unlabel_bucket) != self.batch_size_unlabel: w, h = d_unlabel[0]["width"], d_unlabel[0]["height"] unlabel_bucket_id = 0 if w > h else 1 unlabel_bucket = self._unlabel_buckets[unlabel_bucket_id] unlabel_bucket.append(d_unlabel[0]) unlabel_buckets_key = self._unlabel_buckets_key[unlabel_bucket_id] unlabel_buckets_key.append(d_unlabel[1]) # yield the batch of data until all buckets are full if ( len(label_bucket) == self.batch_size_label and len(unlabel_bucket) == self.batch_size_unlabel ): # label_strong, label_weak, unlabed_strong, unlabled_weak yield ( label_bucket[:], label_buckets_key[:], unlabel_bucket[:], unlabel_buckets_key[:], ) del label_bucket[:] del label_buckets_key[:] del unlabel_bucket[:] del unlabel_buckets_key[:] ================================================ FILE: adapteacher/data/dataset_mapper.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import copy import logging import numpy as np from PIL import Image import torch import detectron2.data.detection_utils as utils import detectron2.data.transforms as T from detectron2.data.dataset_mapper import DatasetMapper from adapteacher.data.detection_utils import build_strong_augmentation class DatasetMapperTwoCropSeparate(DatasetMapper): """ This customized mapper produces two augmented images from a single image instance. This mapper makes sure that the two augmented images have the same cropping and thus the same size. A callable which takes a dataset dict in Detectron2 Dataset format, and map it into a format used by the model. This is the default callable to be used to map your dataset dict into training data. You may need to follow it to implement your own one for customized logic, such as a different way to read or transform images. See :doc:`/tutorials/data_loading` for details. The callable currently does the following: 1. Read the image from "file_name" 2. Applies cropping/geometric transforms to the image and annotations 3. Prepare data and annotations to Tensor and :class:`Instances` """ def __init__(self, cfg, is_train=True): self.augmentation = utils.build_augmentation(cfg, is_train) # include crop into self.augmentation if cfg.INPUT.CROP.ENABLED and is_train: self.augmentation.insert( 0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE) ) logging.getLogger(__name__).info( "Cropping used in training: " + str(self.augmentation[0]) ) self.compute_tight_boxes = True else: self.compute_tight_boxes = False self.strong_augmentation = build_strong_augmentation(cfg, is_train) # fmt: off self.img_format = cfg.INPUT.FORMAT self.mask_on = cfg.MODEL.MASK_ON self.mask_format = cfg.INPUT.MASK_FORMAT self.keypoint_on = cfg.MODEL.KEYPOINT_ON self.load_proposals = cfg.MODEL.LOAD_PROPOSALS # fmt: on if self.keypoint_on and is_train: self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices( cfg.DATASETS.TRAIN ) else: self.keypoint_hflip_indices = None if self.load_proposals: self.proposal_min_box_size = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE self.proposal_topk = ( cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN if is_train else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST ) self.is_train = is_train def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) # utils.check_image_size(dataset_dict, image) if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L" ).squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentation) image_weak_aug, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image_weak_aug.shape[:2] # h, w if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format ) if self.compute_tight_boxes and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() bboxes_d2_format = utils.filter_empty_instances(instances) dataset_dict["instances"] = bboxes_d2_format # apply strong augmentation # We use torchvision augmentation, which is not compatiable with # detectron2, which use numpy format for images. Thus, we need to # convert to PIL format first. image_pil = Image.fromarray(image_weak_aug.astype("uint8"), "RGB") image_strong_aug = np.array(self.strong_augmentation(image_pil)) dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image_strong_aug.transpose(2, 0, 1)) ) dataset_dict_key = copy.deepcopy(dataset_dict) dataset_dict_key["image"] = torch.as_tensor( np.ascontiguousarray(image_weak_aug.transpose(2, 0, 1)) ) assert dataset_dict["image"].size(1) == dataset_dict_key["image"].size(1) assert dataset_dict["image"].size(2) == dataset_dict_key["image"].size(2) return (dataset_dict, dataset_dict_key) ================================================ FILE: adapteacher/data/datasets/builtin.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import os import contextlib from detectron2.data import DatasetCatalog, MetadataCatalog from fvcore.common.timer import Timer # from fvcore.common.file_io import PathManager from iopath.common.file_io import PathManager from detectron2.data.datasets.pascal_voc import register_pascal_voc from detectron2.data.datasets.builtin_meta import _get_builtin_metadata from .cityscapes_foggy import load_cityscapes_instances import io import logging logger = logging.getLogger(__name__) JSON_ANNOTATIONS_DIR = "" _SPLITS_COCO_FORMAT = {} _SPLITS_COCO_FORMAT["coco"] = { "coco_2017_unlabel": ( "coco/unlabeled2017", "coco/annotations/image_info_unlabeled2017.json", ), "coco_2017_for_voc20": ( "coco", "coco/annotations/google/instances_unlabeledtrainval20class.json", ), } def register_coco_unlabel(root): for _, splits_per_dataset in _SPLITS_COCO_FORMAT.items(): for key, (image_root, json_file) in splits_per_dataset.items(): meta = {} register_coco_unlabel_instances( key, meta, os.path.join(root, json_file), os.path.join(root, image_root) ) def register_coco_unlabel_instances(name, metadata, json_file, image_root): """ Register a dataset in COCO's json annotation format for instance detection, instance segmentation and keypoint detection. (i.e., Type 1 and 2 in http://cocodataset.org/#format-data. `instances*.json` and `person_keypoints*.json` in the dataset). This is an example of how to register a new dataset. You can do something similar to this function, to register new datasets. Args: name (str): the name that identifies a dataset, e.g. "coco_2014_train". metadata (dict): extra metadata associated with this dataset. You can leave it as an empty dict. json_file (str): path to the json instance annotation file. image_root (str or path-like): directory which contains all the images. """ assert isinstance(name, str), name assert isinstance(json_file, (str, os.PathLike)), json_file assert isinstance(image_root, (str, os.PathLike)), image_root # 1. register a function which returns dicts DatasetCatalog.register( name, lambda: load_coco_unlabel_json(json_file, image_root, name) ) # 2. Optionally, add metadata about this dataset, # since they might be useful in evaluation, visualization or logging MetadataCatalog.get(name).set( json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata ) def load_coco_unlabel_json( json_file, image_root, dataset_name=None, extra_annotation_keys=None ): from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info( "Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()) ) id_map = None # sort indices for reproducible results img_ids = sorted(coco_api.imgs.keys()) imgs = coco_api.loadImgs(img_ids) logger.info("Loaded {} images in COCO format from {}".format(len(imgs), json_file)) dataset_dicts = [] for img_dict in imgs: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] dataset_dicts.append(record) return dataset_dicts _root = os.getenv("DETECTRON2_DATASETS", "datasets") register_coco_unlabel(_root) # ==== Predefined splits for raw cityscapes foggy images =========== _RAW_CITYSCAPES_SPLITS = { # "cityscapes_foggy_{task}_train": ("cityscape_foggy/leftImg8bit/train/", "cityscape_foggy/gtFine/train/"), # "cityscapes_foggy_{task}_val": ("cityscape_foggy/leftImg8bit/val/", "cityscape_foggy/gtFine/val/"), # "cityscapes_foggy_{task}_test": ("cityscape_foggy/leftImg8bit/test/", "cityscape_foggy/gtFine/test/"), "cityscapes_foggy_train": ("cityscapes_foggy/leftImg8bit/train/", "cityscapes_foggy/gtFine/train/"), "cityscapes_foggy_val": ("cityscapes_foggy/leftImg8bit/val/", "cityscapes_foggy/gtFine/val/"), "cityscapes_foggy_test": ("cityscapes_foggy/leftImg8bit/test/", "cityscapes_foggy/gtFine/test/"), } def register_all_cityscapes_foggy(root): # root = "manifold://mobile_vision_dataset/tree/yujheli/dataset" for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items(): meta = _get_builtin_metadata("cityscapes") image_dir = os.path.join(root, image_dir) gt_dir = os.path.join(root, gt_dir) # inst_key = key.format(task="instance_seg") inst_key = key # DatasetCatalog.register( # inst_key, # lambda x=image_dir, y=gt_dir: load_cityscapes_instances( # x, y, from_json=True, to_polygons=True # ), # ) DatasetCatalog.register( inst_key, lambda x=image_dir, y=gt_dir: load_cityscapes_instances( x, y, from_json=False, to_polygons=False ), ) # MetadataCatalog.get(inst_key).set( # image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta # ) # MetadataCatalog.get(inst_key).set( # image_dir=image_dir, gt_dir=gt_dir, evaluator_type="pascal_voc", **meta # ) MetadataCatalog.get(inst_key).set( image_dir=image_dir, gt_dir=gt_dir, evaluator_type="coco", **meta ) # ==== Predefined splits for Clipart (PASCAL VOC format) =========== def register_all_clipart(root): # root = "manifold://mobile_vision_dataset/tree/yujheli/dataset" SPLITS = [ ("Clipart1k_train", "clipart", "train"), ("Clipart1k_test", "clipart", "test"), ] for name, dirname, split in SPLITS: year = 2012 register_pascal_voc(name, os.path.join(root, dirname), split, year) MetadataCatalog.get(name).evaluator_type = "pascal_voc" # MetadataCatalog.get(name).evaluator_type = "coco" # ==== Predefined splits for Watercolor (PASCAL VOC format) =========== def register_all_water(root): # root = "manifold://mobile_vision_dataset/tree/yujheli/dataset" SPLITS = [ ("Watercolor_train", "watercolor", "train"), ("Watercolor_test", "watercolor", "test"), ] for name, dirname, split in SPLITS: year = 2012 # register_pascal_voc(name, os.path.join(root, dirname), split, year, class_names=["person", "dog","bicycle", "bird", "car", "cat"]) register_pascal_voc(name, os.path.join(root, dirname), split, year) MetadataCatalog.get(name).evaluator_type = "pascal_voc_water" # MetadataCatalog.get(name).thing_classes = ["person", "dog","bike", "bird", "car", "cat"] # MetadataCatalog.get(name).thing_classes = ["person", "dog","bicycle", "bird", "car", "cat"] # MetadataCatalog.get(name).evaluator_type = "coco" register_all_cityscapes_foggy(_root) register_all_clipart(_root) register_all_water(_root) ================================================ FILE: adapteacher/data/datasets/cityscapes_foggy.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. import functools import json import logging import multiprocessing as mp import numpy as np import os from itertools import chain import pycocotools.mask as mask_util from PIL import Image from detectron2.structures import BoxMode from detectron2.utils.comm import get_world_size from detectron2.utils.file_io import PathManager from detectron2.utils.logger import setup_logger try: import cv2 # noqa except ImportError: # OpenCV is an optional dependency at the moment pass logger = logging.getLogger(__name__) load_only_002 = False def _get_cityscapes_files(image_dir, gt_dir): files = [] # scan through the directory cities = PathManager.ls(image_dir) logger.info(f"{len(cities)} cities found in '{image_dir}'.") for city in cities: city_img_dir = os.path.join(image_dir, city) city_gt_dir = os.path.join(gt_dir, city) for basename in PathManager.ls(city_img_dir): if load_only_002 and '0.02.png' not in basename: continue image_file = os.path.join(city_img_dir, basename) # suffix = "leftImg8bit.png" # assert basename.endswith(suffix), basename # basename = basename[: -len(suffix)] suffix = 'leftImg8bit_foggy' basename = basename.split(suffix)[0] instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png") label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png") json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json") files.append((image_file, instance_file, label_file, json_file)) assert len(files), "No images found in {}".format(image_dir) for f in files[0]: assert PathManager.isfile(f), f return files def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True): """ Args: image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". from_json (bool): whether to read annotations from the raw json file or the png files. to_polygons (bool): whether to represent the segmentation as polygons (COCO's format) instead of masks (cityscapes's format). Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets `_ ) """ if from_json: assert to_polygons, ( "Cityscapes's json annotations are in polygon format. " "Converting to mask format is not supported now." ) files = _get_cityscapes_files(image_dir, gt_dir) logger.info("Preprocessing cityscapes annotations ...") # This is still not fast: all workers will execute duplicate works and will # take up to 10m on a 8GPU server. pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4)) ret = pool.map( functools.partial(_cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons), files, ) logger.info("Loaded {} images from {}".format(len(ret), image_dir)) pool.close() # Map cityscape ids to contiguous ids from cityscapesscripts.helpers.labels import labels labels = [l for l in labels if l.hasInstances and not l.ignoreInEval] dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)} for dict_per_image in ret: for anno in dict_per_image["annotations"]: anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]] return ret def load_cityscapes_semantic(image_dir, gt_dir): """ Args: image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". Returns: list[dict]: a list of dict, each has "file_name" and "sem_seg_file_name". """ ret = [] # gt_dir is small and contain many small files. make sense to fetch to local first gt_dir = PathManager.get_local_path(gt_dir) for image_file, _, label_file, json_file in _get_cityscapes_files(image_dir, gt_dir): label_file = label_file.replace("labelIds", "labelTrainIds") with PathManager.open(json_file, "r") as f: jsonobj = json.load(f) ret.append( { "file_name": image_file, "sem_seg_file_name": label_file, "height": jsonobj["imgHeight"], "width": jsonobj["imgWidth"], } ) assert len(ret), f"No images found in {image_dir}!" assert PathManager.isfile( ret[0]["sem_seg_file_name"] ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa return ret def _cityscapes_files_to_dict(files, from_json, to_polygons): """ Parse cityscapes annotation files to a instance segmentation dataset dict. Args: files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file) from_json (bool): whether to read annotations from the raw json file or the png files. to_polygons (bool): whether to represent the segmentation as polygons (COCO's format) instead of masks (cityscapes's format). Returns: A dict in Detectron2 Dataset format. """ from cityscapesscripts.helpers.labels import id2label, name2label image_file, instance_id_file, _, json_file = files annos = [] if from_json: from shapely.geometry import MultiPolygon, Polygon with PathManager.open(json_file, "r") as f: jsonobj = json.load(f) ret = { "file_name": image_file, "image_id": os.path.basename(image_file), "height": jsonobj["imgHeight"], "width": jsonobj["imgWidth"], } # `polygons_union` contains the union of all valid polygons. polygons_union = Polygon() # CityscapesScripts draw the polygons in sequential order # and each polygon *overwrites* existing ones. See # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa # We use reverse order, and each polygon *avoids* early ones. # This will resolve the ploygon overlaps in the same way as CityscapesScripts. for obj in jsonobj["objects"][::-1]: if "deleted" in obj: # cityscapes data format specific continue label_name = obj["label"] try: label = name2label[label_name] except KeyError: if label_name.endswith("group"): # crowd area label = name2label[label_name[: -len("group")]] else: raise if label.id < 0: # cityscapes data format continue # Cityscapes's raw annotations uses integer coordinates # Therefore +0.5 here poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5 # CityscapesScript uses PIL.ImageDraw.polygon to rasterize # polygons for evaluation. This function operates in integer space # and draws each pixel whose center falls into the polygon. # Therefore it draws a polygon which is 0.5 "fatter" in expectation. # We therefore dilate the input polygon by 0.5 as our input. poly = Polygon(poly_coord).buffer(0.5, resolution=4) if not label.hasInstances or label.ignoreInEval: # even if we won't store the polygon it still contributes to overlaps resolution polygons_union = polygons_union.union(poly) continue # Take non-overlapping part of the polygon poly_wo_overlaps = poly.difference(polygons_union) if poly_wo_overlaps.is_empty: continue polygons_union = polygons_union.union(poly) anno = {} anno["iscrowd"] = label_name.endswith("group") anno["category_id"] = label.id if isinstance(poly_wo_overlaps, Polygon): poly_list = [poly_wo_overlaps] elif isinstance(poly_wo_overlaps, MultiPolygon): poly_list = poly_wo_overlaps.geoms else: raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps)) poly_coord = [] for poly_el in poly_list: # COCO API can work only with exterior boundaries now, hence we store only them. # TODO: store both exterior and interior boundaries once other parts of the # codebase support holes in polygons. poly_coord.append(list(chain(*poly_el.exterior.coords))) anno["segmentation"] = poly_coord (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds anno["bbox"] = (xmin, ymin, xmax, ymax) anno["bbox_mode"] = BoxMode.XYXY_ABS annos.append(anno) else: # See also the official annotation parsing scripts at # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa with PathManager.open(instance_id_file, "rb") as f: inst_image = np.asarray(Image.open(f), order="F") # ids < 24 are stuff labels (filtering them first is about 5% faster) flattened_ids = np.unique(inst_image[inst_image >= 24]) ret = { "file_name": image_file, "image_id": os.path.basename(image_file), "height": inst_image.shape[0], "width": inst_image.shape[1], } for instance_id in flattened_ids: # For non-crowd annotations, instance_id // 1000 is the label_id # Crowd annotations have <1000 instance ids label_id = instance_id // 1000 if instance_id >= 1000 else instance_id label = id2label[label_id] if not label.hasInstances or label.ignoreInEval: continue anno = {} anno["iscrowd"] = instance_id < 1000 anno["category_id"] = label.id mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F") inds = np.nonzero(mask) ymin, ymax = inds[0].min(), inds[0].max() xmin, xmax = inds[1].min(), inds[1].max() anno["bbox"] = (xmin, ymin, xmax, ymax) if xmax <= xmin or ymax <= ymin: continue anno["bbox_mode"] = BoxMode.XYXY_ABS if to_polygons: # This conversion comes from D4809743 and D5171122, # when Mask-RCNN was first developed. contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[ -2 ] polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3] # opencv's can produce invalid polygons if len(polygons) == 0: continue anno["segmentation"] = polygons else: anno["segmentation"] = mask_util.encode(mask[:, :, None])[0] annos.append(anno) ret["annotations"] = annos return ret ================================================ FILE: adapteacher/data/detection_utils.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import logging import torchvision.transforms as transforms from adapteacher.data.transforms.augmentation_impl import ( GaussianBlur, ) def build_strong_augmentation(cfg, is_train): """ Create a list of :class:`Augmentation` from config. Now it includes resizing and flipping. Returns: list[Augmentation] """ logger = logging.getLogger(__name__) augmentation = [] if is_train: # This is simialr to SimCLR https://arxiv.org/abs/2002.05709 augmentation.append( transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8) ) augmentation.append(transforms.RandomGrayscale(p=0.2)) augmentation.append(transforms.RandomApply([GaussianBlur([0.1, 2.0])], p=0.5)) randcrop_transform = transforms.Compose( [ transforms.ToTensor(), transforms.RandomErasing( p=0.7, scale=(0.05, 0.2), ratio=(0.3, 3.3), value="random" ), transforms.RandomErasing( p=0.5, scale=(0.02, 0.2), ratio=(0.1, 6), value="random" ), transforms.RandomErasing( p=0.3, scale=(0.02, 0.2), ratio=(0.05, 8), value="random" ), transforms.ToPILImage(), ] ) augmentation.append(randcrop_transform) logger.info("Augmentations used in training: " + str(augmentation)) return transforms.Compose(augmentation) ================================================ FILE: adapteacher/data/transforms/augmentation_impl.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import random from PIL import ImageFilter class GaussianBlur: """ Gaussian blur augmentation in SimCLR https://arxiv.org/abs/2002.05709 Adapted from MoCo: https://github.com/facebookresearch/moco/blob/master/moco/loader.py Note that this implementation does not seem to be exactly the same as described in SimCLR. """ def __init__(self, sigma=[0.1, 2.0]): self.sigma = sigma def __call__(self, x): sigma = random.uniform(self.sigma[0], self.sigma[1]) x = x.filter(ImageFilter.GaussianBlur(radius=sigma)) return x ================================================ FILE: adapteacher/engine/hooks.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved from detectron2.engine.hooks import HookBase import detectron2.utils.comm as comm import torch import numpy as np from contextlib import contextmanager class LossEvalHook(HookBase): def __init__(self, eval_period, model, data_loader, model_output, model_name=""): self._model = model self._period = eval_period self._data_loader = data_loader self._model_output = model_output self._model_name = model_name def _do_loss_eval(self): record_acc_dict = {} with inference_context(self._model), torch.no_grad(): for _, inputs in enumerate(self._data_loader): record_dict = self._get_loss(inputs, self._model) # accumulate the losses for loss_type in record_dict.keys(): if loss_type not in record_acc_dict.keys(): record_acc_dict[loss_type] = record_dict[loss_type] else: record_acc_dict[loss_type] += record_dict[loss_type] # average for loss_type in record_acc_dict.keys(): record_acc_dict[loss_type] = record_acc_dict[loss_type] / len( self._data_loader ) # divide loss and other metrics loss_acc_dict = {} for key in record_acc_dict.keys(): if key[:4] == "loss": loss_acc_dict[key] = record_acc_dict[key] # only output the results of major node if comm.is_main_process(): total_losses_reduced = sum(loss for loss in loss_acc_dict.values()) self.trainer.storage.put_scalar( "val_total_loss_val" + self._model_name, total_losses_reduced ) record_acc_dict = { "val_" + k + self._model_name: record_acc_dict[k] for k in record_acc_dict.keys() } if len(record_acc_dict) > 1: self.trainer.storage.put_scalars(**record_acc_dict) def _get_loss(self, data, model): if self._model_output == "loss_only": record_dict = model(data) elif self._model_output == "loss_proposal": record_dict, _, _, _ = model(data, branch="val_loss", val_mode=True) elif self._model_output == "meanteacher": record_dict, _, _, _, _ = model(data) metrics_dict = { k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) for k, v in record_dict.items() } return metrics_dict def _write_losses(self, metrics_dict): # gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in detectron2. comm.synchronize() all_metrics_dict = comm.gather(metrics_dict, dst=0) if comm.is_main_process(): # average the rest metrics metrics_dict = { "val_" + k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } total_losses_reduced = sum(loss for loss in metrics_dict.values()) self.trainer.storage.put_scalar("val_total_loss_val", total_losses_reduced) if len(metrics_dict) > 1: self.trainer.storage.put_scalars(**metrics_dict) def _detect_anomaly(self, losses, loss_dict): if not torch.isfinite(losses).all(): raise FloatingPointError( "Loss became infinite or NaN at iteration={}!\nloss_dict = {}".format( self.trainer.iter, loss_dict ) ) def after_step(self): next_iter = self.trainer.iter + 1 is_final = next_iter == self.trainer.max_iter if is_final or (self._period > 0 and next_iter % self._period == 0): self._do_loss_eval() @contextmanager def inference_context(model): """ A context where the model is temporarily changed to eval mode, and restored to previous mode afterwards. Args: model: a torch Module """ training_mode = model.training model.eval() yield model.train(training_mode) ================================================ FILE: adapteacher/engine/probe.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved from detectron2.structures import pairwise_iou class OpenMatchTrainerProbe: def __init__(self, cfg): self.BOX_AP = 0.5 self.NUM_CLASSES = cfg.MODEL.ROI_HEADS.NUM_CLASSES # self.bbox_stat_list = ['compute_fp_gtoutlier', 'compute_num_box', 'compute_ood_acc'] def bbox_stat(self, unlabel_gt, unlabel_pseudo, name, bbox_stat_list): stats = {} sum_gpu_names = [] for metric in bbox_stat_list: stats_per, sum_gpu_names_per = getattr( self, metric)(unlabel_gt, unlabel_pseudo, name) stats.update(stats_per) sum_gpu_names.extend(sum_gpu_names_per) return stats, sum_gpu_names def compute_fp_gtoutlier(self, unlabel_gt, unlabel_pseudo, name): num_gt_ood_object = 0 num_gt_fp_ood_object = 0 sum_iou = 0.0 sum_gpu_names = [] results = {} if len(unlabel_gt) != 0: for gt, pseudo in zip(unlabel_gt, unlabel_pseudo): # import pdb; pdb. set_trace() if name == "pred": pp_boxes = pseudo.pred_boxes elif name == "pseudo_conf" or name == "pseudo_ood": # filter predicted ood box when evaluating this metric pseudo = pseudo[pseudo.gt_classes != -1] pp_boxes = pseudo.gt_boxes else: raise ValueError("Unknown name for probe roi bbox.") if len(gt) != 0 and len(pseudo) != 0: max_iou, max_idx = pairwise_iou( gt.gt_boxes.to('cuda'), pp_boxes).max(1) ood_idx = (gt.gt_classes == -1) num_gt_ood_object += ood_idx.sum().item() num_gt_fp_ood_object += (max_iou[ood_idx] > self.BOX_AP).sum().item() sum_iou += max_iou[ood_idx].sum().item() elif len(gt) != 0 and len(pseudo) == 0: ood_idx = (gt.gt_classes == -1) num_gt_ood_object += ood_idx.shape[0] results = {'Analysis_'+name+'/num_gt_ood_object': num_gt_ood_object, 'Analysis_'+name+'/num_gt_fp_ood_object': num_gt_fp_ood_object, 'Analysis_'+name+'/sum_iou': sum_iou} sum_gpu_names.extend(list(results.keys())) return results, sum_gpu_names def compute_num_box(self, unlabel_gt, unlabel_pseudo, name, processed=False): num_bbox = 0.0 size_bbox = 0.0 avg_conf = 0.0 # measure in and out box for openset SS-OD num_bbox_in = 0.0 num_bbox_out = 0.0 num_bg = 0.0 # when ground-truth is missing in unlabeled data if len(unlabel_gt) == 0: for pp_roi in unlabel_pseudo: if name == "pred": pp_boxes = pp_roi.pred_boxes pp_classes = pp_roi.pred_classes pp_scores = pp_roi.scores elif name == "pseudo_conf" or name == "pseudo_ood": pp_boxes = pp_roi.gt_boxes pp_classes = pp_roi.gt_classes pp_scores = pp_roi.scores elif name == "gt": pp_boxes = pp_roi.gt_boxes pp_classes = pp_roi.gt_classes else: raise ValueError("Unknown name for probe roi bbox.") # all boxes (in + out boxes) if len(pp_roi) != 0: # bbox number and size num_bbox += len(pp_roi) size_bbox += pp_boxes.area().mean().item() # average box confidence if name != "gt": avg_conf += pp_scores.mean() else: num_bbox += 0 size_bbox += torch.tensor(0).cuda() num_valid_img = len(unlabel_pseudo) else: # with ground-truth num_valid_img = 0 for gt, pp_roi in zip(unlabel_gt, unlabel_pseudo): if name == "pred": pp_boxes = pp_roi.pred_boxes pp_classes = pp_roi.pred_classes pp_scores = pp_roi.scores elif name == "pseudo_conf" or name == "pseudo_ood": # filter out ood pseudo-box when doing analysis pp_roi = pp_roi[pp_roi.gt_classes != -1] pp_boxes = pp_roi.gt_boxes pp_classes = pp_roi.gt_classes pp_scores = pp_roi.scores elif name == "gt": pp_boxes = pp_roi.gt_boxes pp_classes = pp_roi.gt_classes else: raise ValueError("Unknown name for probe roi bbox.") # all boxes (in + out boxes) if len(pp_roi) != 0: # bbox number and size num_bbox += len(pp_roi) size_bbox += pp_boxes.area().mean().item() # average box confidence if name != "gt": avg_conf += pp_scores.mean() else: num_bbox += 0 size_bbox += torch.tensor(0).cuda() # in and out class if name == "gt": pp_roi_in = pp_roi[pp_classes != -1] num_bbox_in += len(pp_roi_in) pp_roi_out = pp_roi[pp_classes == -1] num_bbox_out += len(pp_roi_out) num_valid_img += 1 elif name == "pred" or name == "pseudo_conf" or name == "pseudo_ood": if len(gt.gt_boxes.to('cuda'))>0 and len(pp_boxes) > 0: max_iou, max_idx = pairwise_iou(gt.gt_boxes.to('cuda'), pp_boxes).max(0) # for the ground-truth label for each pseudo-box gtclass4pseudo = gt.gt_classes[max_idx] matchgtbox = max_iou > 0.5 # compute the number of boxes (background, inlier, outlier) num_bg += (~matchgtbox).sum().item() num_bbox_in += (gtclass4pseudo[matchgtbox] != -1).sum().item() num_bbox_out += (gtclass4pseudo[matchgtbox] == -1).sum().item() num_valid_img += 1 else: raise ValueError("Unknown name for probe roi bbox.") box_probe = {} if processed == True: name = name+"processed" if num_bbox == 0: return box_probe, [] if num_valid_img >0 : box_probe["Analysis_" + name + "/Num_bbox"] = num_bbox / \ num_valid_img box_probe["Analysis_" + name + "/Size_bbox"] = size_bbox / \ num_valid_img box_probe["Analysis_" + name + "/Num_bbox_inlier"] = num_bbox_in / num_valid_img box_probe["Analysis_" + name + "/Num_bbox_outlier"] = num_bbox_out / num_valid_img if name != "gt": # prediciton, background number box_probe["Analysis_" + name + "/Conf"] = avg_conf / \ num_valid_img box_probe["Analysis_" + name + "/Num_bbox_background"] = num_bg / num_valid_img box_probe["Analysis_" + name + "/background_fp_ratio"] = num_bg / num_bbox box_probe["Analysis_" + name + "/background_tp_ratio"] = num_bbox_in / num_bbox else: box_probe["Analysis_" + name + "/Num_bbox"] = 0.0 box_probe["Analysis_" + name + "/Size_bbox"] = 0.0 box_probe["Analysis_" + name + "/Num_bbox_inlier"] = 0.0 box_probe["Analysis_" + name + "/Num_bbox_outlier"] = 0.0 if name != "gt": # prediciton, background number box_probe["Analysis_" + name + "/Conf"] = 0.0 box_probe["Analysis_" + name + "/Num_bbox_background"] = 0.0 box_probe["Analysis_" + name + "/background_fp_ratio"] = num_bg / num_bbox box_probe["Analysis_" + name + "/background_tp_ratio"] = num_bbox_in / num_bbox return box_probe, [] def compute_ood_acc(self, unlabel_gt, unlabel_pseudo, name, BOX_IOU=0.5): results = {} sum_gpu_names = [] if len(unlabel_gt) != 0: for metric in ['acc_outlier', 'recall_outlier']: for samples in ['_fg', '_all']: for fraction_part in ['_nume', '_deno']: results[metric+samples+fraction_part] = 0.0 for gt, pred in zip(unlabel_gt, unlabel_pseudo): if name == "pred": pp_boxes = pred.pred_boxes pp_ood_scores = pred.ood_scores elif name == "pseudo_conf" or name == "pseudo_ood": # assume these outlier are suppressed pred = pred[pred.gt_classes != -1] pp_boxes = pred.gt_boxes pp_ood_scores = pred.ood_scores else: raise ValueError("Unknown name for probe roi bbox.") if len(gt) != 0 and len(pred) != 0: # find the most overlapped ground-truth box for each pseudo-box max_iou, max_idx = pairwise_iou( gt.gt_boxes.to('cuda'), pp_boxes).max(0) # ignore background instances find_fg_mask = max_iou > BOX_IOU if find_fg_mask.sum() > 0: gt_corres = gt[max_idx].gt_classes.to("cuda") gt_outlier = (gt_corres[find_fg_mask] == -1) pred_outlier = pp_ood_scores[find_fg_mask][:, 0] > 0.5 # accurcay of ood detection (foreground) # acc_outlier_fg = (pred_outlier == gt_outlier).sum() /find_fg_mask.sum() results['acc_outlier_fg_nume'] += ( pred_outlier == gt_outlier).sum() results['acc_outlier_fg_deno'] += find_fg_mask.sum() # recall of ood detection (foreground) # recall_outlier_fg = (pred_outlier[gt_outlier] == gt_outlier[gt_outlier]).sum() /gt_outlier.sum() results['recall_outlier_fg_nume'] += ( pred_outlier[gt_outlier] == gt_outlier[gt_outlier]).sum() results['recall_outlier_fg_deno'] += gt_outlier.sum() # Regard backgound gt as outlier gt_corres = gt[max_idx].gt_classes.to("cuda") # convert all background gt as outlier gt_corres[~find_fg_mask] = -1 gt_outlier = gt_corres == -1 pred_outlier = pp_ood_scores[:, 0] > 0.5 # accurcay of ood detection (all) # acc_outlier_all = (pred_outlier == gt_outlier).sum() /len(pred) results['acc_outlier_all_nume'] += ( pred_outlier == gt_outlier).sum() results['acc_outlier_all_deno'] += len(pred) # recall of ood detection (all) # recall_outlier_all = (pred_outlier[gt_outlier] == gt_outlier[gt_outlier]).sum() /gt_outlier.sum() results['recall_outlier_all_nume'] += ( pred_outlier[gt_outlier] == gt_outlier[gt_outlier]).sum() results['recall_outlier_all_deno'] += gt_outlier.sum() results = {'Analysis_'+name+'/'+k: v for k, v in results.items()} sum_gpu_names.extend(list(results.keys())) return results, sum_gpu_names import torch def probe( cfg, proposals_roih_unsup_k, unlabel_data_k, pesudo_proposals_roih_unsup_k, record_dict, ): """ Probe for research development """ # [probe] roi result from weak branch (before pseudo-labeling) record_roih = probe_roih_bbox( proposals_roih_unsup_k, cfg.MODEL.ROI_HEADS.NUM_CLASSES, "roih" ) record_dict.update(record_roih) # [probe] roi result after pseudo-labeling from weak branch record_roih_pseudo = probe_roih_bbox( pesudo_proposals_roih_unsup_k, cfg.MODEL.ROI_HEADS.NUM_CLASSES, "roih_pseudo" ) record_dict.update(record_roih_pseudo) return record_dict def probe_roih_bbox(proposals_roih, num_cls, name=""): num_bbox = 0.0 size_bbox = 0.0 avg_conf = 0.0 pred_cls_list = [] for pp_roi in proposals_roih: if name == "roih": pp_boxes = pp_roi.pred_boxes pp_classes = pp_roi.pred_classes pp_scores = pp_roi.scores elif name == "roih_pseudo": pp_boxes = pp_roi.gt_boxes pp_classes = pp_roi.gt_classes pp_scores = pp_roi.scores elif name == "gt": pp_boxes = pp_roi.gt_boxes pp_classes = pp_roi.gt_classes else: raise ValueError(f"Unknown name for probe roi bbox '{name}'") device = pp_classes.device if pp_roi: # bbox number and size num_bbox += len(pp_roi) size_bbox += pp_boxes.area().mean() if name != "gt": avg_conf += pp_scores.mean() # ratio of majority class all_idx, cls_count = torch.unique(pp_classes, return_counts=True) major_cls_idx = all_idx[torch.argmax(cls_count)] major_cls_ratio = torch.max(cls_count).float() / pp_classes.numel() # cls_sum pred_cls_list.append(pp_classes) else: num_bbox += 0 size_bbox += torch.tensor(0).to(device) major_cls_idx = torch.tensor(0).to(device) major_cls_ratio = torch.tensor(0).to(device) # boxes monitor box_probe = {} box_probe["bbox_probe_" + name + "/Num_bbox"] = num_bbox / len(proposals_roih) box_probe["bbox_probe_" + name + "/Size_bbox"] = size_bbox.item() / len( proposals_roih ) if name != "gt": box_probe["bbox_probe_" + name + "/Conf"] = avg_conf / len(proposals_roih) box_probe["bbox_probe_" + name + "/Ratio_major_cls_idx"] = major_cls_idx.item() box_probe["bbox_probe_" + name + "/Ratio_major_cls"] = major_cls_ratio.item() return box_probe ================================================ FILE: adapteacher/engine/trainer.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import os import time import logging import torch from torch.nn.parallel import DistributedDataParallel from fvcore.nn.precise_bn import get_bn_modules import numpy as np from collections import OrderedDict import detectron2.utils.comm as comm from detectron2.checkpoint import DetectionCheckpointer from detectron2.engine import DefaultTrainer, SimpleTrainer, TrainerBase from detectron2.engine.train_loop import AMPTrainer from detectron2.utils.events import EventStorage from detectron2.evaluation import verify_results, DatasetEvaluators # from detectron2.evaluation import COCOEvaluator, verify_results, DatasetEvaluators from detectron2.data.dataset_mapper import DatasetMapper from detectron2.engine import hooks from detectron2.structures.boxes import Boxes from detectron2.structures.instances import Instances from detectron2.utils.env import TORCH_VERSION from detectron2.data import MetadataCatalog from adapteacher.data.build import ( build_detection_semisup_train_loader, build_detection_test_loader, build_detection_semisup_train_loader_two_crops, ) from adapteacher.data.dataset_mapper import DatasetMapperTwoCropSeparate from adapteacher.engine.hooks import LossEvalHook from adapteacher.modeling.meta_arch.ts_ensemble import EnsembleTSModel from adapteacher.checkpoint.detection_checkpoint import DetectionTSCheckpointer from adapteacher.solver.build import build_lr_scheduler from adapteacher.evaluation import PascalVOCDetectionEvaluator, COCOEvaluator from .probe import OpenMatchTrainerProbe import copy # Supervised-only Trainer class BaselineTrainer(DefaultTrainer): def __init__(self, cfg): """ Args: cfg (CfgNode): Use the custom checkpointer, which loads other backbone models with matching heuristics. """ cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size()) model = self.build_model(cfg) optimizer = self.build_optimizer(cfg, model) data_loader = self.build_train_loader(cfg) if comm.get_world_size() > 1: model = DistributedDataParallel( model, device_ids=[comm.get_local_rank()], broadcast_buffers=False ) TrainerBase.__init__(self) self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)( model, data_loader, optimizer ) self.scheduler = self.build_lr_scheduler(cfg, optimizer) self.checkpointer = DetectionCheckpointer( model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=self.scheduler, ) self.start_iter = 0 self.max_iter = cfg.SOLVER.MAX_ITER self.cfg = cfg self.register_hooks(self.build_hooks()) def resume_or_load(self, resume=True): """ If `resume==True` and `cfg.OUTPUT_DIR` contains the last checkpoint (defined by a `last_checkpoint` file), resume from the file. Resuming means loading all available states (eg. optimizer and scheduler) and update iteration counter from the checkpoint. ``cfg.MODEL.WEIGHTS`` will not be used. Otherwise, this is considered as an independent training. The method will load model weights from the file `cfg.MODEL.WEIGHTS` (but will not load other states) and start from iteration 0. Args: resume (bool): whether to do resume or not """ checkpoint = self.checkpointer.resume_or_load( self.cfg.MODEL.WEIGHTS, resume=resume ) if resume and self.checkpointer.has_checkpoint(): self.start_iter = checkpoint.get("iteration", -1) + 1 # The checkpoint stores the training iteration that just finished, thus we start # at the next iteration (or iter zero if there's no checkpoint). if isinstance(self.model, DistributedDataParallel): # broadcast loaded data/model from the first rank, because other # machines may not have access to the checkpoint file if TORCH_VERSION >= (1, 7): self.model._sync_params_and_buffers() self.start_iter = comm.all_gather(self.start_iter)[0] def train_loop(self, start_iter: int, max_iter: int): """ Args: start_iter, max_iter (int): See docs above """ logger = logging.getLogger(__name__) logger.info("Starting training from iteration {}".format(start_iter)) self.iter = self.start_iter = start_iter self.max_iter = max_iter with EventStorage(start_iter) as self.storage: try: self.before_train() for self.iter in range(start_iter, max_iter): self.before_step() self.run_step() self.after_step() except Exception: logger.exception("Exception during training:") raise finally: self.after_train() def run_step(self): self._trainer.iter = self.iter assert self.model.training, "[SimpleTrainer] model was changed to eval mode!" start = time.perf_counter() data = next(self._trainer._data_loader_iter) data_time = time.perf_counter() - start record_dict, _, _, _ = self.model(data, branch="supervised") num_gt_bbox = 0.0 for element in data: num_gt_bbox += len(element["instances"]) num_gt_bbox = num_gt_bbox / len(data) record_dict["bbox_num/gt_bboxes"] = num_gt_bbox loss_dict = {} for key in record_dict.keys(): if key[:4] == "loss" and key[-3:] != "val": loss_dict[key] = record_dict[key] losses = sum(loss_dict.values()) metrics_dict = record_dict metrics_dict["data_time"] = data_time self._write_metrics(metrics_dict) self.optimizer.zero_grad() losses.backward() self.optimizer.step() @classmethod def build_evaluator(cls, cfg, dataset_name, output_folder=None): if output_folder is None: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") evaluator_list = [] evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type if evaluator_type == "coco": evaluator_list.append(COCOEvaluator( dataset_name, output_dir=output_folder)) elif evaluator_type == "pascal_voc": return PascalVOCDetectionEvaluator(dataset_name) elif evaluator_type == "pascal_voc_water": return PascalVOCDetectionEvaluator(dataset_name, target_classnames=["bicycle", "bird", "car", "cat", "dog", "person"]) if len(evaluator_list) == 0: raise NotImplementedError( "no Evaluator for the dataset {} with the type {}".format( dataset_name, evaluator_type ) ) elif len(evaluator_list) == 1: return evaluator_list[0] return DatasetEvaluators(evaluator_list) @classmethod def build_train_loader(cls, cfg): return build_detection_semisup_train_loader(cfg, mapper=None) @classmethod def build_test_loader(cls, cfg, dataset_name): """ Returns: iterable """ return build_detection_test_loader(cfg, dataset_name) def build_hooks(self): """ Build a list of default hooks, including timing, evaluation, checkpointing, lr scheduling, precise BN, writing events. Returns: list[HookBase]: """ cfg = self.cfg.clone() cfg.defrost() cfg.DATALOADER.NUM_WORKERS = 0 ret = [ hooks.IterationTimer(), hooks.LRScheduler(self.optimizer, self.scheduler), hooks.PreciseBN( cfg.TEST.EVAL_PERIOD, self.model, self.build_train_loader(cfg), cfg.TEST.PRECISE_BN.NUM_ITER, ) if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model) else None, ] if comm.is_main_process(): ret.append( hooks.PeriodicCheckpointer( self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD ) ) def test_and_save_results(): self._last_eval_results = self.test(self.cfg, self.model) return self._last_eval_results ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results)) if comm.is_main_process(): ret.append(hooks.PeriodicWriter(self.build_writers(), period=20)) return ret def _write_metrics(self, metrics_dict: dict): """ Args: metrics_dict (dict): dict of scalar metrics """ metrics_dict = { k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) for k, v in metrics_dict.items() } # gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in detectron2. all_metrics_dict = comm.gather(metrics_dict) if comm.is_main_process(): if "data_time" in all_metrics_dict[0]: data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) self.storage.put_scalar("data_time", data_time) metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } loss_dict = {} for key in metrics_dict.keys(): if key[:4] == "loss": loss_dict[key] = metrics_dict[key] total_losses_reduced = sum(loss for loss in loss_dict.values()) self.storage.put_scalar("total_loss", total_losses_reduced) if len(metrics_dict) > 1: self.storage.put_scalars(**metrics_dict) # Adaptive Teacher Trainer class ATeacherTrainer(DefaultTrainer): def __init__(self, cfg): """ Args: cfg (CfgNode): Use the custom checkpointer, which loads other backbone models with matching heuristics. """ cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size()) data_loader = self.build_train_loader(cfg) # create an student model model = self.build_model(cfg) optimizer = self.build_optimizer(cfg, model) # create an teacher model model_teacher = self.build_model(cfg) self.model_teacher = model_teacher # For training, wrap with DDP. But don't need this for inference. if comm.get_world_size() > 1: model = DistributedDataParallel( model, device_ids=[comm.get_local_rank()], broadcast_buffers=False ) TrainerBase.__init__(self) self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)( model, data_loader, optimizer ) self.scheduler = self.build_lr_scheduler(cfg, optimizer) # Ensemble teacher and student model is for model saving and loading ensem_ts_model = EnsembleTSModel(model_teacher, model) self.checkpointer = DetectionTSCheckpointer( ensem_ts_model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=self.scheduler, ) self.start_iter = 0 self.max_iter = cfg.SOLVER.MAX_ITER self.cfg = cfg self.probe = OpenMatchTrainerProbe(cfg) self.register_hooks(self.build_hooks()) def resume_or_load(self, resume=True): """ If `resume==True` and `cfg.OUTPUT_DIR` contains the last checkpoint (defined by a `last_checkpoint` file), resume from the file. Resuming means loading all available states (eg. optimizer and scheduler) and update iteration counter from the checkpoint. ``cfg.MODEL.WEIGHTS`` will not be used. Otherwise, this is considered as an independent training. The method will load model weights from the file `cfg.MODEL.WEIGHTS` (but will not load other states) and start from iteration 0. Args: resume (bool): whether to do resume or not """ checkpoint = self.checkpointer.resume_or_load( self.cfg.MODEL.WEIGHTS, resume=resume ) if resume and self.checkpointer.has_checkpoint(): self.start_iter = checkpoint.get("iteration", -1) + 1 # The checkpoint stores the training iteration that just finished, thus we start # at the next iteration (or iter zero if there's no checkpoint). if isinstance(self.model, DistributedDataParallel): # broadcast loaded data/model from the first rank, because other # machines may not have access to the checkpoint file if TORCH_VERSION >= (1, 7): self.model._sync_params_and_buffers() self.start_iter = comm.all_gather(self.start_iter)[0] @classmethod def build_evaluator(cls, cfg, dataset_name, output_folder=None): if output_folder is None: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") evaluator_list = [] evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type if evaluator_type == "coco": evaluator_list.append(COCOEvaluator( dataset_name, output_dir=output_folder)) elif evaluator_type == "pascal_voc": return PascalVOCDetectionEvaluator(dataset_name) elif evaluator_type == "pascal_voc_water": return PascalVOCDetectionEvaluator(dataset_name, target_classnames=["bicycle", "bird", "car", "cat", "dog", "person"]) if len(evaluator_list) == 0: raise NotImplementedError( "no Evaluator for the dataset {} with the type {}".format( dataset_name, evaluator_type ) ) elif len(evaluator_list) == 1: return evaluator_list[0] return DatasetEvaluators(evaluator_list) @classmethod def build_train_loader(cls, cfg): mapper = DatasetMapperTwoCropSeparate(cfg, True) return build_detection_semisup_train_loader_two_crops(cfg, mapper) @classmethod def build_lr_scheduler(cls, cfg, optimizer): return build_lr_scheduler(cfg, optimizer) def train(self): self.train_loop(self.start_iter, self.max_iter) if hasattr(self, "_last_eval_results") and comm.is_main_process(): verify_results(self.cfg, self._last_eval_results) return self._last_eval_results def train_loop(self, start_iter: int, max_iter: int): logger = logging.getLogger(__name__) logger.info("Starting training from iteration {}".format(start_iter)) self.iter = self.start_iter = start_iter self.max_iter = max_iter with EventStorage(start_iter) as self.storage: try: self.before_train() for self.iter in range(start_iter, max_iter): self.before_step() self.run_step_full_semisup() self.after_step() except Exception: logger.exception("Exception during training:") raise finally: self.after_train() # ===================================================== # ================== Pseduo-labeling ================== # ===================================================== def threshold_bbox(self, proposal_bbox_inst, thres=0.7, proposal_type="roih"): if proposal_type == "rpn": valid_map = proposal_bbox_inst.objectness_logits > thres # create instances containing boxes and gt_classes image_shape = proposal_bbox_inst.image_size new_proposal_inst = Instances(image_shape) # create box new_bbox_loc = proposal_bbox_inst.proposal_boxes.tensor[valid_map, :] new_boxes = Boxes(new_bbox_loc) # add boxes to instances new_proposal_inst.gt_boxes = new_boxes new_proposal_inst.objectness_logits = proposal_bbox_inst.objectness_logits[ valid_map ] elif proposal_type == "roih": valid_map = proposal_bbox_inst.scores > thres # create instances containing boxes and gt_classes image_shape = proposal_bbox_inst.image_size new_proposal_inst = Instances(image_shape) # create box new_bbox_loc = proposal_bbox_inst.pred_boxes.tensor[valid_map, :] new_boxes = Boxes(new_bbox_loc) # add boxes to instances new_proposal_inst.gt_boxes = new_boxes new_proposal_inst.gt_classes = proposal_bbox_inst.pred_classes[valid_map] new_proposal_inst.scores = proposal_bbox_inst.scores[valid_map] return new_proposal_inst def process_pseudo_label( self, proposals_rpn_unsup_k, cur_threshold, proposal_type, psedo_label_method="" ): list_instances = [] num_proposal_output = 0.0 for proposal_bbox_inst in proposals_rpn_unsup_k: # thresholding if psedo_label_method == "thresholding": proposal_bbox_inst = self.threshold_bbox( proposal_bbox_inst, thres=cur_threshold, proposal_type=proposal_type ) else: raise ValueError("Unkown pseudo label boxes methods") num_proposal_output += len(proposal_bbox_inst) list_instances.append(proposal_bbox_inst) num_proposal_output = num_proposal_output / len(proposals_rpn_unsup_k) return list_instances, num_proposal_output def remove_label(self, label_data): for label_datum in label_data: if "instances" in label_datum.keys(): del label_datum["instances"] return label_data def add_label(self, unlabled_data, label): for unlabel_datum, lab_inst in zip(unlabled_data, label): unlabel_datum["instances"] = lab_inst return unlabled_data def get_label(self, label_data): label_list = [] for label_datum in label_data: if "instances" in label_datum.keys(): label_list.append(copy.deepcopy(label_datum["instances"])) return label_list # def get_label_test(self, label_data): # label_list = [] # for label_datum in label_data: # if "instances" in label_datum.keys(): # label_list.append(label_datum["instances"]) # ===================================================== # =================== Training Flow =================== # ===================================================== def run_step_full_semisup(self): self._trainer.iter = self.iter assert self.model.training, "[UBTeacherTrainer] model was changed to eval mode!" start = time.perf_counter() data = next(self._trainer._data_loader_iter) # data_q and data_k from different augmentations (q:strong, k:weak) # label_strong, label_weak, unlabed_strong, unlabled_weak label_data_q, label_data_k, unlabel_data_q, unlabel_data_k = data data_time = time.perf_counter() - start # burn-in stage (supervised training with labeled data) if self.iter < self.cfg.SEMISUPNET.BURN_UP_STEP: # input both strong and weak supervised data into model label_data_q.extend(label_data_k) record_dict, _, _, _ = self.model( label_data_q, branch="supervised") # weight losses loss_dict = {} for key in record_dict.keys(): if key[:4] == "loss": loss_dict[key] = record_dict[key] * 1 losses = sum(loss_dict.values()) else: if self.iter == self.cfg.SEMISUPNET.BURN_UP_STEP: # update copy the the whole model self._update_teacher_model(keep_rate=0.00) # self.model.build_discriminator() elif ( self.iter - self.cfg.SEMISUPNET.BURN_UP_STEP ) % self.cfg.SEMISUPNET.TEACHER_UPDATE_ITER == 0: self._update_teacher_model( keep_rate=self.cfg.SEMISUPNET.EMA_KEEP_RATE) record_dict = {} ######################## For probe ################################# # import pdb; pdb. set_trace() gt_unlabel_k = self.get_label(unlabel_data_k) # gt_unlabel_q = self.get_label_test(unlabel_data_q) # 0. remove unlabeled data labels unlabel_data_q = self.remove_label(unlabel_data_q) unlabel_data_k = self.remove_label(unlabel_data_k) # 1. generate the pseudo-label using teacher model with torch.no_grad(): ( _, proposals_rpn_unsup_k, proposals_roih_unsup_k, _, ) = self.model_teacher(unlabel_data_k, branch="unsup_data_weak") ######################## For probe ################################# # import pdb; pdb. set_trace() # probe_metrics = ['compute_fp_gtoutlier', 'compute_num_box'] # probe_metrics = ['compute_num_box'] # analysis_pred, _ = self.probe.compute_num_box(gt_unlabel_k,proposals_roih_unsup_k,'pred') # record_dict.update(analysis_pred) ######################## For probe END ################################# # 2. Pseudo-labeling cur_threshold = self.cfg.SEMISUPNET.BBOX_THRESHOLD joint_proposal_dict = {} joint_proposal_dict["proposals_rpn"] = proposals_rpn_unsup_k #Process pseudo labels and thresholding ( pesudo_proposals_rpn_unsup_k, nun_pseudo_bbox_rpn, ) = self.process_pseudo_label( proposals_rpn_unsup_k, cur_threshold, "rpn", "thresholding" ) # analysis_pred, _ = self.probe.compute_num_box(gt_unlabel_k,pesudo_proposals_rpn_unsup_k,'pred',True) # record_dict.update(analysis_pred) joint_proposal_dict["proposals_pseudo_rpn"] = pesudo_proposals_rpn_unsup_k # Pseudo_labeling for ROI head (bbox location/objectness) pesudo_proposals_roih_unsup_k, _ = self.process_pseudo_label( proposals_roih_unsup_k, cur_threshold, "roih", "thresholding" ) joint_proposal_dict["proposals_pseudo_roih"] = pesudo_proposals_roih_unsup_k # 3. add pseudo-label to unlabeled data unlabel_data_q = self.add_label( unlabel_data_q, joint_proposal_dict["proposals_pseudo_roih"] ) unlabel_data_k = self.add_label( unlabel_data_k, joint_proposal_dict["proposals_pseudo_roih"] ) all_label_data = label_data_q + label_data_k all_unlabel_data = unlabel_data_q # 4. input both strongly and weakly augmented labeled data into student model record_all_label_data, _, _, _ = self.model( all_label_data, branch="supervised" ) record_dict.update(record_all_label_data) # 5. input strongly augmented unlabeled data into model record_all_unlabel_data, _, _, _ = self.model( all_unlabel_data, branch="supervised_target" ) new_record_all_unlabel_data = {} for key in record_all_unlabel_data.keys(): new_record_all_unlabel_data[key + "_pseudo"] = record_all_unlabel_data[ key ] record_dict.update(new_record_all_unlabel_data) # 6. input weakly labeled data (source) and weakly unlabeled data (target) to student model # give sign to the target data for i_index in range(len(unlabel_data_k)): # unlabel_data_item = {} for k, v in unlabel_data_k[i_index].items(): # label_data_k[i_index][k + "_unlabeled"] = v label_data_k[i_index][k + "_unlabeled"] = v # unlabel_data_k[i_index] = unlabel_data_item all_domain_data = label_data_k # all_domain_data = label_data_k + unlabel_data_k record_all_domain_data, _, _, _ = self.model(all_domain_data, branch="domain") record_dict.update(record_all_domain_data) # weight losses loss_dict = {} for key in record_dict.keys(): if key.startswith("loss"): if key == "loss_rpn_loc_pseudo" or key == "loss_box_reg_pseudo": # pseudo bbox regression <- 0 loss_dict[key] = record_dict[key] * 0 elif key[-6:] == "pseudo": # unsupervised loss loss_dict[key] = ( record_dict[key] * self.cfg.SEMISUPNET.UNSUP_LOSS_WEIGHT ) elif ( key == "loss_D_img_s" or key == "loss_D_img_t" ): # set weight for discriminator # import pdb # pdb.set_trace() loss_dict[key] = record_dict[key] * self.cfg.SEMISUPNET.DIS_LOSS_WEIGHT #Need to modify defaults and yaml else: # supervised loss loss_dict[key] = record_dict[key] * 1 losses = sum(loss_dict.values()) metrics_dict = record_dict metrics_dict["data_time"] = data_time self._write_metrics(metrics_dict) self.optimizer.zero_grad() losses.backward() self.optimizer.step() def _write_metrics(self, metrics_dict: dict): metrics_dict = { k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) for k, v in metrics_dict.items() } # gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in detectron2. all_metrics_dict = comm.gather(metrics_dict) # all_hg_dict = comm.gather(hg_dict) if comm.is_main_process(): if "data_time" in all_metrics_dict[0]: # data_time among workers can have high variance. The actual latency # caused by data_time is the maximum among workers. data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) self.storage.put_scalar("data_time", data_time) # average the rest metrics metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } # append the list loss_dict = {} for key in metrics_dict.keys(): if key[:4] == "loss": loss_dict[key] = metrics_dict[key] total_losses_reduced = sum(loss for loss in loss_dict.values()) self.storage.put_scalar("total_loss", total_losses_reduced) if len(metrics_dict) > 1: self.storage.put_scalars(**metrics_dict) @torch.no_grad() def _update_teacher_model(self, keep_rate=0.9996): if comm.get_world_size() > 1: student_model_dict = { key[7:]: value for key, value in self.model.state_dict().items() } else: student_model_dict = self.model.state_dict() new_teacher_dict = OrderedDict() for key, value in self.model_teacher.state_dict().items(): if key in student_model_dict.keys(): new_teacher_dict[key] = ( student_model_dict[key] * (1 - keep_rate) + value * keep_rate ) else: raise Exception("{} is not found in student model".format(key)) self.model_teacher.load_state_dict(new_teacher_dict) @torch.no_grad() def _copy_main_model(self): # initialize all parameters if comm.get_world_size() > 1: rename_model_dict = { key[7:]: value for key, value in self.model.state_dict().items() } self.model_teacher.load_state_dict(rename_model_dict) else: self.model_teacher.load_state_dict(self.model.state_dict()) @classmethod def build_test_loader(cls, cfg, dataset_name): return build_detection_test_loader(cfg, dataset_name) def build_hooks(self): cfg = self.cfg.clone() cfg.defrost() cfg.DATALOADER.NUM_WORKERS = 0 # save some memory and time for PreciseBN ret = [ hooks.IterationTimer(), hooks.LRScheduler(self.optimizer, self.scheduler), hooks.PreciseBN( # Run at the same freq as (but before) evaluation. cfg.TEST.EVAL_PERIOD, self.model, # Build a new data loader to not affect training self.build_train_loader(cfg), cfg.TEST.PRECISE_BN.NUM_ITER, ) if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model) else None, ] # Do PreciseBN before checkpointer, because it updates the model and need to # be saved by checkpointer. # This is not always the best: if checkpointing has a different frequency, # some checkpoints may have more precise statistics than others. if comm.is_main_process(): ret.append( hooks.PeriodicCheckpointer( self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD ) ) def test_and_save_results_student(): self._last_eval_results_student = self.test(self.cfg, self.model) _last_eval_results_student = { k + "_student": self._last_eval_results_student[k] for k in self._last_eval_results_student.keys() } return _last_eval_results_student def test_and_save_results_teacher(): self._last_eval_results_teacher = self.test( self.cfg, self.model_teacher) return self._last_eval_results_teacher ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results_student)) ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results_teacher)) if comm.is_main_process(): # run writers in the end, so that evaluation metrics are written ret.append(hooks.PeriodicWriter(self.build_writers(), period=20)) return ret ================================================ FILE: adapteacher/evaluation/__init__.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. from .coco_evaluation import COCOEvaluator from .pascal_voc_evaluation import PascalVOCDetectionEvaluator # __all__ = [k for k in globals().keys() if not k.startswith("_")] __all__ = [ "COCOEvaluator", "PascalVOCDetectionEvaluator" ] ================================================ FILE: adapteacher/evaluation/coco_evaluation.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. import contextlib import copy import io import itertools import json import logging import numpy as np import os import pickle from collections import OrderedDict import pycocotools.mask as mask_util import torch from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval from tabulate import tabulate import detectron2.utils.comm as comm from detectron2.config import CfgNode from detectron2.data import MetadataCatalog from detectron2.data.datasets.coco import convert_to_coco_dict from detectron2.evaluation.fast_eval_api import COCOeval_opt from detectron2.structures import Boxes, BoxMode, pairwise_iou from detectron2.utils.file_io import PathManager from detectron2.utils.logger import create_small_table from detectron2.evaluation import DatasetEvaluator from iopath.common.file_io import file_lock logger = logging.getLogger(__name__) def convert_to_coco_json(dataset_name, output_file, allow_cached=True): """ Converts dataset into COCO format and saves it to a json file. dataset_name must be registered in DatasetCatalog and in detectron2's standard format. Args: dataset_name: reference from the config file to the catalogs must be registered in DatasetCatalog and in detectron2's standard format output_file: path of json file that will be saved to allow_cached: if json file is already present then skip conversion """ # TODO: The dataset or the conversion script *may* change, # a checksum would be useful for validating the cached data PathManager.mkdirs(os.path.dirname(output_file)) with file_lock(output_file): if PathManager.exists(output_file) and allow_cached: logger.warning( f"Using previously cached COCO format annotations at '{output_file}'. " "You need to clear the cache file if your dataset has been modified." ) else: logger.info(f"Converting annotations of dataset '{dataset_name}' to COCO format ...)") coco_dict = convert_to_coco_dict(dataset_name) logger.info(f"Caching COCO format annotations at '{output_file}' ...") tmp_file = output_file #+ ".tmp" # with PathManager.open(tmp_file, "w") as f: # json.dump(coco_dict, f) # shutil.move(tmp_file, output_file) with PathManager.open(tmp_file, "w") as f: json.dump(coco_dict, f) class COCOEvaluator(DatasetEvaluator): """ Evaluate AR for object proposals, AP for instance detection/segmentation, AP for keypoint detection outputs using COCO's metrics. See http://cocodataset.org/#detection-eval and http://cocodataset.org/#keypoints-eval to understand its metrics. The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means the metric cannot be computed (e.g. due to no predictions made). In addition to COCO, this evaluator is able to support any bounding box detection, instance segmentation, or keypoint detection dataset. """ def __init__( self, dataset_name, tasks=None, distributed=True, output_dir=None, *, use_fast_impl=True, kpt_oks_sigmas=(), ): """ Args: dataset_name (str): name of the dataset to be evaluated. It must have either the following corresponding metadata: "json_file": the path to the COCO format annotation Or it must be in detectron2's standard dataset format so it can be converted to COCO format automatically. tasks (tuple[str]): tasks that can be evaluated under the given configuration. A task is one of "bbox", "segm", "keypoints". By default, will infer this automatically from predictions. distributed (True): if True, will collect results from all ranks and run evaluation in the main process. Otherwise, will only evaluate the results in the current process. output_dir (str): optional, an output directory to dump all results predicted on the dataset. The dump contains two files: 1. "instances_predictions.pth" a file that can be loaded with `torch.load` and contains all the results in the format they are produced by the model. 2. "coco_instances_results.json" a json file in COCO's result format. use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP. Although the results should be very close to the official implementation in COCO API, it is still recommended to compute results with the official API for use in papers. The faster implementation also uses more RAM. kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval When empty, it will use the defaults in COCO. Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. """ self._logger = logging.getLogger(__name__) self._distributed = distributed self._output_dir = output_dir self._use_fast_impl = use_fast_impl if tasks is not None and isinstance(tasks, CfgNode): kpt_oks_sigmas = ( tasks.TEST.KEYPOINT_OKS_SIGMAS if not kpt_oks_sigmas else kpt_oks_sigmas ) self._logger.warn( "COCO Evaluator instantiated using config, this is deprecated behavior." " Please pass in explicit arguments instead." ) self._tasks = None # Infering it from predictions should be better else: self._tasks = tasks self._cpu_device = torch.device("cpu") self._metadata = MetadataCatalog.get(dataset_name) if not hasattr(self._metadata, "json_file"): self._logger.info( f"'{dataset_name}' is not registered by `register_coco_instances`." " Therefore trying to convert it to COCO format ..." ) cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json") self._metadata.json_file = cache_path convert_to_coco_json(dataset_name, cache_path) json_file = PathManager.get_local_path(self._metadata.json_file) with contextlib.redirect_stdout(io.StringIO()): self._coco_api = COCO(json_file) # Test set json files do not contain annotations (evaluation must be # performed using the COCO evaluation server). self._do_evaluation = "annotations" in self._coco_api.dataset if self._do_evaluation: self._kpt_oks_sigmas = kpt_oks_sigmas def reset(self): self._predictions = [] def process(self, inputs, outputs): """ Args: inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). It is a list of dict. Each dict corresponds to an image and contains keys like "height", "width", "file_name", "image_id". outputs: the outputs of a COCO model. It is a list of dicts with key "instances" that contains :class:`Instances`. """ for input, output in zip(inputs, outputs): prediction = {"image_id": input["image_id"]} if "instances" in output: instances = output["instances"].to(self._cpu_device) prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) if "proposals" in output: prediction["proposals"] = output["proposals"].to(self._cpu_device) if len(prediction) > 1: self._predictions.append(prediction) def evaluate(self, img_ids=None): """ Args: img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset """ if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions if len(predictions) == 0: self._logger.warning("[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) self._results = OrderedDict() if "proposals" in predictions[0]: self._eval_box_proposals(predictions) if "instances" in predictions[0]: self._eval_predictions(predictions, img_ids=img_ids) # Copy so the caller can do whatever with results return copy.deepcopy(self._results) def _tasks_from_predictions(self, predictions): """ Get COCO API "tasks" (i.e. iou_type) from COCO-format predictions. """ tasks = {"bbox"} for pred in predictions: if "segmentation" in pred: tasks.add("segm") if "keypoints" in pred: tasks.add("keypoints") return sorted(tasks) def _eval_predictions(self, predictions, img_ids=None): """ Evaluate predictions. Fill self._results with the metrics of the tasks. """ self._logger.info("Preparing results for COCO format ...") coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) tasks = self._tasks or self._tasks_from_predictions(coco_results) # unmap the category ids for COCO if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) num_classes = len(all_contiguous_ids) assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1 reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()} for result in coco_results: category_id = result["category_id"] assert category_id < num_classes, ( f"A prediction has class={category_id}, " f"but the dataset only has {num_classes} classes and " f"predicted class id should be in [0, {num_classes - 1}]." ) result["category_id"] = reverse_id_mapping[category_id] if self._output_dir: file_path = os.path.join(self._output_dir, "coco_instances_results.json") self._logger.info("Saving results to {}".format(file_path)) with PathManager.open(file_path, "w") as f: f.write(json.dumps(coco_results)) f.flush() if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info( "Evaluating predictions with {} COCO API...".format( "unofficial" if self._use_fast_impl else "official" ) ) for task in sorted(tasks): assert task in {"bbox", "segm", "keypoints"}, f"Got unknown task: {task}!" coco_eval = ( _evaluate_predictions_on_coco( self._coco_api, coco_results, task, kpt_oks_sigmas=self._kpt_oks_sigmas, use_fast_impl=self._use_fast_impl, img_ids=img_ids, ) if len(coco_results) > 0 else None # cocoapi does not handle empty results very well ) res = self._derive_coco_results( coco_eval, task, class_names=self._metadata.get("thing_classes") ) self._results[task] = res def _eval_box_proposals(self, predictions): """ Evaluate the box proposals in predictions. Fill self._results with the metrics for "box_proposals" task. """ if self._output_dir: # Saving generated box proposals to file. # Predicted box_proposals are in XYXY_ABS mode. bbox_mode = BoxMode.XYXY_ABS.value ids, boxes, objectness_logits = [], [], [] for prediction in predictions: ids.append(prediction["image_id"]) boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) proposal_data = { "boxes": boxes, "objectness_logits": objectness_logits, "ids": ids, "bbox_mode": bbox_mode, } with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: pickle.dump(proposal_data, f) if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info("Evaluating bbox proposals ...") res = {} areas = {"all": "", "small": "s", "medium": "m", "large": "l"} for limit in [100, 1000]: for area, suffix in areas.items(): stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit) key = "AR{}@{:d}".format(suffix, limit) res[key] = float(stats["ar"].item() * 100) self._logger.info("Proposal metrics: \n" + create_small_table(res)) self._results["box_proposals"] = res def _derive_coco_results(self, coco_eval, iou_type, class_names=None): """ Derive the desired score numbers from summarized COCOeval. Args: coco_eval (None or COCOEval): None represents no predictions from model. iou_type (str): class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"], "keypoints": ["AP", "AP50", "AP75", "APm", "APl"], }[iou_type] if coco_eval is None: self._logger.warn("No predictions from the model!") return {metric: float("nan") for metric in metrics} # the standard metrics results = { metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan") for idx, metric in enumerate(metrics) } self._logger.info( "Evaluation results for {}: \n".format(iou_type) + create_small_table(results) ) if not np.isfinite(sum(results.values())): self._logger.info("Some metrics cannot be computed and is shown as NaN.") if class_names is None or len(class_names) <= 1: return results # Compute per-category AP # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa precisions = coco_eval.eval["precision"] # precision has dims (iou, recall, cls, area range, max dets) assert len(class_names) == precisions.shape[2] results_per_category = [] for idx, name in enumerate(class_names): # area range index 0: all area ranges # max dets index -1: typically 100 per image precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] ap = np.mean(precision) if precision.size else float("nan") results_per_category.append(("{}".format(name), float(ap * 100))) # tabulate it N_COLS = min(6, len(results_per_category) * 2) results_flatten = list(itertools.chain(*results_per_category)) results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)]) table = tabulate( results_2d, tablefmt="pipe", floatfmt=".3f", headers=["category", "AP"] * (N_COLS // 2), numalign="left", ) self._logger.info("Per-category {} AP: \n".format(iou_type) + table) # results.update({"AP-" + name: ap for name, ap in results_per_category}) results_per_category_AP50 = [] for idx, name in enumerate(class_names): # area range index 0: all area ranges # max dets index -1: typically 100 per image t = np.where(.5 == coco_eval.params.iouThrs)[0] precisions_50 = precisions[t] precisions_50 = precisions_50[:, :, idx, 0, -1] precisions_50 = precisions_50[precisions_50 > -1] ap = np.mean(precisions_50) if precisions_50.size else float("nan") results_per_category_AP50.append(("{}".format(name), float(ap * 100))) # tabulate it N_COLS = min(6, len(results_per_category_AP50) * 2) results_flatten = list(itertools.chain(*results_per_category_AP50)) results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)]) table = tabulate( results_2d, tablefmt="pipe", floatfmt=".3f", headers=["category", "AP50"] * (N_COLS // 2), numalign="left", ) self._logger.info("Per-category {} AP50: \n".format(iou_type) + table) results.update({"AP50-" + name: ap for name, ap in results_per_category_AP50}) return results def instances_to_coco_json(instances, img_id): """ Dump an "Instances" object to a COCO-format json that's used for evaluation. Args: instances (Instances): img_id (int): the image id Returns: list[dict]: list of json annotations in COCO format. """ num_instance = len(instances) if num_instance == 0: return [] boxes = instances.pred_boxes.tensor.numpy() boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) boxes = boxes.tolist() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() has_mask = instances.has("pred_masks") if has_mask: # use RLE to encode the masks, because they are too large and takes memory # since this evaluator stores outputs of the entire dataset rles = [ mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in instances.pred_masks ] for rle in rles: # "counts" is an array encoded by mask_util as a byte-stream. Python3's # json writer which always produces strings cannot serialize a bytestream # unless you decode it. Thankfully, utf-8 works out (which is also what # the pycocotools/_mask.pyx does). rle["counts"] = rle["counts"].decode("utf-8") has_keypoints = instances.has("pred_keypoints") if has_keypoints: keypoints = instances.pred_keypoints results = [] for k in range(num_instance): result = { "image_id": img_id, "category_id": classes[k], "bbox": boxes[k], "score": scores[k], } if has_mask: result["segmentation"] = rles[k] if has_keypoints: # In COCO annotations, # keypoints coordinates are pixel indices. # However our predictions are floating point coordinates. # Therefore we subtract 0.5 to be consistent with the annotation format. # This is the inverse of data loading logic in `datasets/coco.py`. keypoints[k][:, :2] -= 0.5 result["keypoints"] = keypoints[k].flatten().tolist() results.append(result) return results # inspired from Detectron: # https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None): """ Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0 ** 2, 1e5 ** 2], # all [0 ** 2, 32 ** 2], # small [32 ** 2, 96 ** 2], # medium [96 ** 2, 1e5 ** 2], # large [96 ** 2, 128 ** 2], # 96-128 [128 ** 2, 256 ** 2], # 128-256 [256 ** 2, 512 ** 2], # 256-512 [512 ** 2, 1e5 ** 2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for prediction_dict in dataset_predictions: predictions = prediction_dict["proposals"] # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = predictions.objectness_logits.sort(descending=True)[1] predictions = predictions[inds] ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"]) anno = coco_api.loadAnns(ann_ids) gt_boxes = [ BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno if obj["iscrowd"] == 0 ] gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes gt_boxes = Boxes(gt_boxes) gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0]) if len(gt_boxes) == 0 or len(predictions) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if limit is not None and len(predictions) > limit: predictions = predictions[:limit] overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(predictions), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = ( torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) ) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, } def _evaluate_predictions_on_coco( coco_gt, coco_results, iou_type, kpt_oks_sigmas=None, use_fast_impl=True, img_ids=None ): """ Evaluate the coco results using COCOEval API. """ assert len(coco_results) > 0 if iou_type == "segm": coco_results = copy.deepcopy(coco_results) # When evaluating mask AP, if the results contain bbox, cocoapi will # use the box area as the area of the instance, instead of the mask area. # This leads to a different definition of small/medium/large. # We remove the bbox field to let mask AP use mask area. for c in coco_results: c.pop("bbox", None) coco_dt = coco_gt.loadRes(coco_results) coco_eval = (COCOeval_opt if use_fast_impl else COCOeval)(coco_gt, coco_dt, iou_type) if img_ids is not None: coco_eval.params.imgIds = img_ids if iou_type == "keypoints": # Use the COCO default keypoint OKS sigmas unless overrides are specified if kpt_oks_sigmas: assert hasattr(coco_eval.params, "kpt_oks_sigmas"), "pycocotools is too old!" coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas) # COCOAPI requires every detection and every gt to have keypoints, so # we just take the first entry from both num_keypoints_dt = len(coco_results[0]["keypoints"]) // 3 num_keypoints_gt = len(next(iter(coco_gt.anns.values()))["keypoints"]) // 3 num_keypoints_oks = len(coco_eval.params.kpt_oks_sigmas) assert num_keypoints_oks == num_keypoints_dt == num_keypoints_gt, ( f"[COCOEvaluator] Prediction contain {num_keypoints_dt} keypoints. " f"Ground truth contains {num_keypoints_gt} keypoints. " f"The length of cfg.TEST.KEYPOINT_OKS_SIGMAS is {num_keypoints_oks}. " "They have to agree with each other. For meaning of OKS, please refer to " "http://cocodataset.org/#keypoints-eval." ) coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() return coco_eval ================================================ FILE: adapteacher/evaluation/pascal_voc_evaluation.py ================================================ # -*- coding: utf-8 -*- # Copyright (c) Facebook, Inc. and its affiliates. import logging import numpy as np import os import tempfile import xml.etree.ElementTree as ET from collections import OrderedDict, defaultdict from functools import lru_cache import torch from detectron2.data import MetadataCatalog from detectron2.utils import comm from detectron2.utils.file_io import PathManager from detectron2.evaluation import DatasetEvaluator class PascalVOCDetectionEvaluator(DatasetEvaluator): """ Evaluate Pascal VOC style AP for Pascal VOC dataset. It contains a synchronization, therefore has to be called from all ranks. Note that the concept of AP can be implemented in different ways and may not produce identical results. This class mimics the implementation of the official Pascal VOC Matlab API, and should produce similar but not identical results to the official API. """ def __init__(self, dataset_name, target_classnames=None): """ Args: dataset_name (str): name of the dataset, e.g., "voc_2007_test" """ self._dataset_name = dataset_name meta = MetadataCatalog.get(dataset_name) # Too many tiny files, download all to local for speed. annotation_dir_local = PathManager.get_local_path( os.path.join(meta.dirname, "Annotations/") ) self._anno_file_template = os.path.join(annotation_dir_local, "{}.xml") self._image_set_path = os.path.join(meta.dirname, "ImageSets", "Main", meta.split + ".txt") self._class_names = meta.thing_classes assert meta.year in [2007, 2012], meta.year self._is_2007 = meta.year == 2007 self._cpu_device = torch.device("cpu") self._logger = logging.getLogger(__name__) if target_classnames == None: self.target_classnames = self._class_names else: self.target_classnames = target_classnames def reset(self): self._predictions = defaultdict(list) # class name -> list of prediction strings def process(self, inputs, outputs): for input, output in zip(inputs, outputs): image_id = input["image_id"] instances = output["instances"].to(self._cpu_device) boxes = instances.pred_boxes.tensor.numpy() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() for box, score, cls in zip(boxes, scores, classes): xmin, ymin, xmax, ymax = box # The inverse of data loading logic in `datasets/pascal_voc.py` xmin += 1 ymin += 1 self._predictions[cls].append( f"{image_id} {score:.3f} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}" ) def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". """ all_predictions = comm.gather(self._predictions, dst=0) if not comm.is_main_process(): return predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) del all_predictions self._logger.info( "Evaluating {} using {} metric. " "Note that results do not use the official Matlab API.".format( self._dataset_name, 2007 if self._is_2007 else 2012 ) ) with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: res_file_template = os.path.join(dirname, "{}.txt") aps = defaultdict(list) # iou -> ap per class for cls_id, cls_name in enumerate(self._class_names): if cls_name not in self.target_classnames: continue lines = predictions.get(cls_id, [""]) with open(res_file_template.format(cls_name), "w") as f: f.write("\n".join(lines)) for thresh in range(50, 100, 5): rec, prec, ap = voc_eval( res_file_template, self._anno_file_template, self._image_set_path, cls_name, ovthresh=thresh / 100.0, use_07_metric=self._is_2007, ) aps[thresh].append(ap * 100) ret = OrderedDict() mAP = {iou: np.mean(x) for iou, x in aps.items()} ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]} #Add the codes for AP50 for idx, name in enumerate(self.target_classnames): ret["bbox"].update({"AP50-" + name: aps[50][idx]}) return ret ############################################################################## # # Below code is modified from # https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py # -------------------------------------------------------- # Fast/er R-CNN # Licensed under The MIT License [see LICENSE for details] # Written by Bharath Hariharan # -------------------------------------------------------- """Python implementation of the PASCAL VOC devkit's AP evaluation code.""" @lru_cache(maxsize=None) def parse_rec(filename): """Parse a PASCAL VOC xml file.""" with PathManager.open(filename) as f: tree = ET.parse(f) objects = [] for obj in tree.findall("object"): obj_struct = {} obj_struct["name"] = obj.find("name").text obj_struct["pose"] = obj.find("pose").text obj_struct["truncated"] = int(obj.find("truncated").text) obj_struct["difficult"] = int(obj.find("difficult").text) bbox = obj.find("bndbox") obj_struct["bbox"] = [ int(bbox.find("xmin").text), int(bbox.find("ymin").text), int(bbox.find("xmax").text), int(bbox.find("ymax").text), ] objects.append(obj_struct) return objects def voc_ap(rec, prec, use_07_metric=False): """Compute VOC AP given precision and recall. If use_07_metric is true, uses the VOC 07 11-point method (default:False). """ if use_07_metric: # 11 point metric ap = 0.0 for t in np.arange(0.0, 1.1, 0.1): if np.sum(rec >= t) == 0: p = 0 else: p = np.max(prec[rec >= t]) ap = ap + p / 11.0 else: # correct AP calculation # first append sentinel values at the end mrec = np.concatenate(([0.0], rec, [1.0])) mpre = np.concatenate(([0.0], prec, [0.0])) # compute the precision envelope for i in range(mpre.size - 1, 0, -1): mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) # to calculate area under PR curve, look for points # where X axis (recall) changes value i = np.where(mrec[1:] != mrec[:-1])[0] # and sum (\Delta recall) * prec ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) return ap def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False): """rec, prec, ap = voc_eval(detpath, annopath, imagesetfile, classname, [ovthresh], [use_07_metric]) Top level function that does the PASCAL VOC evaluation. detpath: Path to detections detpath.format(classname) should produce the detection results file. annopath: Path to annotations annopath.format(imagename) should be the xml annotations file. imagesetfile: Text file containing the list of images, one image per line. classname: Category name (duh) [ovthresh]: Overlap threshold (default = 0.5) [use_07_metric]: Whether to use VOC07's 11 point AP computation (default False) """ # assumes detections are in detpath.format(classname) # assumes annotations are in annopath.format(imagename) # assumes imagesetfile is a text file with each line an image name # first load gt # read list of images with PathManager.open(imagesetfile, "r") as f: lines = f.readlines() imagenames = [x.strip() for x in lines] # load annots recs = {} for imagename in imagenames: recs[imagename] = parse_rec(annopath.format(imagename)) # extract gt objects for this class class_recs = {} npos = 0 for imagename in imagenames: R = [obj for obj in recs[imagename] if obj["name"] == classname] bbox = np.array([x["bbox"] for x in R]) difficult = np.array([x["difficult"] for x in R]).astype(np.bool) # difficult = np.array([False for x in R]).astype(np.bool) # treat all "difficult" as GT det = [False] * len(R) npos = npos + sum(~difficult) class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det} # read dets detfile = detpath.format(classname) with open(detfile, "r") as f: lines = f.readlines() splitlines = [x.strip().split(" ") for x in lines] image_ids = [x[0] for x in splitlines] confidence = np.array([float(x[1]) for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4) # sort by confidence sorted_ind = np.argsort(-confidence) BB = BB[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] # go down dets and mark TPs and FPs nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf BBGT = R["bbox"].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1.0, 0.0) ih = np.maximum(iymax - iymin + 1.0, 0.0) inters = iw * ih # union uni = ( (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0) + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0) - inters ) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not R["difficult"][jmax]: if not R["det"][jmax]: tp[d] = 1.0 R["det"][jmax] = 1 else: fp[d] = 1.0 else: fp[d] = 1.0 # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = voc_ap(rec, prec, use_07_metric) return rec, prec, ap ================================================ FILE: adapteacher/modeling/meta_arch/rcnn.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import numpy as np import torch import torch.nn as nn from torch.nn import functional as F from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY from detectron2.modeling.meta_arch.rcnn import GeneralizedRCNN from detectron2.config import configurable # from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY # from detectron2.modeling.meta_arch.rcnn import GeneralizedRCNN import logging from typing import Dict, Tuple, List, Optional from collections import OrderedDict from detectron2.modeling.proposal_generator import build_proposal_generator from detectron2.modeling.backbone import build_backbone, Backbone from detectron2.modeling.roi_heads import build_roi_heads from detectron2.utils.events import get_event_storage from detectron2.structures import ImageList ############### Image discriminator ############## class FCDiscriminator_img(nn.Module): def __init__(self, num_classes, ndf1=256, ndf2=128): super(FCDiscriminator_img, self).__init__() self.conv1 = nn.Conv2d(num_classes, ndf1, kernel_size=3, padding=1) self.conv2 = nn.Conv2d(ndf1, ndf2, kernel_size=3, padding=1) self.conv3 = nn.Conv2d(ndf2, ndf2, kernel_size=3, padding=1) self.classifier = nn.Conv2d(ndf2, 1, kernel_size=3, padding=1) self.leaky_relu = nn.LeakyReLU(negative_slope=0.2, inplace=True) def forward(self, x): x = self.conv1(x) x = self.leaky_relu(x) x = self.conv2(x) x = self.leaky_relu(x) x = self.conv3(x) x = self.leaky_relu(x) x = self.classifier(x) return x ################################# ################ Gradient reverse function class GradReverse(torch.autograd.Function): @staticmethod def forward(ctx, x): return x.view_as(x) @staticmethod def backward(ctx, grad_output): return grad_output.neg() def grad_reverse(x): return GradReverse.apply(x) ####################### @META_ARCH_REGISTRY.register() class DAobjTwoStagePseudoLabGeneralizedRCNN(GeneralizedRCNN): @configurable def __init__( self, *, backbone: Backbone, proposal_generator: nn.Module, roi_heads: nn.Module, pixel_mean: Tuple[float], pixel_std: Tuple[float], input_format: Optional[str] = None, vis_period: int = 0, dis_type: str, # dis_loss_weight: float = 0, ): """ Args: backbone: a backbone module, must follow detectron2's backbone interface proposal_generator: a module that generates proposals using backbone features roi_heads: a ROI head that performs per-region computation pixel_mean, pixel_std: list or tuple with #channels element, representing the per-channel mean and std to be used to normalize the input image input_format: describe the meaning of channels of input. Needed by visualization vis_period: the period to run visualization. Set to 0 to disable. """ super(GeneralizedRCNN, self).__init__() self.backbone = backbone self.proposal_generator = proposal_generator self.roi_heads = roi_heads self.input_format = input_format self.vis_period = vis_period if vis_period > 0: assert input_format is not None, "input_format is required for visualization!" self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False) self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False) assert ( self.pixel_mean.shape == self.pixel_std.shape ), f"{self.pixel_mean} and {self.pixel_std} have different shapes!" # @yujheli: you may need to build your discriminator here self.dis_type = dis_type self.D_img = None # self.D_img = FCDiscriminator_img(self.backbone._out_feature_channels['res4']) # Need to know the channel # self.D_img = None self.D_img = FCDiscriminator_img(self.backbone._out_feature_channels[self.dis_type]) # Need to know the channel # self.bceLoss_func = nn.BCEWithLogitsLoss() def build_discriminator(self): self.D_img = FCDiscriminator_img(self.backbone._out_feature_channels[self.dis_type]).to(self.device) # Need to know the channel @classmethod def from_config(cls, cfg): backbone = build_backbone(cfg) return { "backbone": backbone, "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()), "roi_heads": build_roi_heads(cfg, backbone.output_shape()), "input_format": cfg.INPUT.FORMAT, "vis_period": cfg.VIS_PERIOD, "pixel_mean": cfg.MODEL.PIXEL_MEAN, "pixel_std": cfg.MODEL.PIXEL_STD, "dis_type": cfg.SEMISUPNET.DIS_TYPE, # "dis_loss_ratio": cfg.xxx, } def preprocess_image_train(self, batched_inputs: List[Dict[str, torch.Tensor]]): """ Normalize, pad and batch the input images. """ images = [x["image"].to(self.device) for x in batched_inputs] images = [(x - self.pixel_mean) / self.pixel_std for x in images] images = ImageList.from_tensors(images, self.backbone.size_divisibility) images_t = [x["image_unlabeled"].to(self.device) for x in batched_inputs] images_t = [(x - self.pixel_mean) / self.pixel_std for x in images_t] images_t = ImageList.from_tensors(images_t, self.backbone.size_divisibility) return images, images_t def forward( self, batched_inputs, branch="supervised", given_proposals=None, val_mode=False ): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances (optional): groundtruth :class:`Instances` * proposals (optional): :class:`Instances`, precomputed proposals. Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: list[dict]: Each dict is the output for one input image. The dict contains one key "instances" whose value is a :class:`Instances`. The :class:`Instances` object has the following keys: "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints" """ if self.D_img == None: self.build_discriminator() if (not self.training) and (not val_mode): # only conduct when testing mode return self.inference(batched_inputs) source_label = 0 target_label = 1 if branch == "domain": # self.D_img.train() # source_label = 0 # target_label = 1 # images = self.preprocess_image(batched_inputs) images_s, images_t = self.preprocess_image_train(batched_inputs) features = self.backbone(images_s.tensor) # import pdb # pdb.set_trace() features_s = grad_reverse(features[self.dis_type]) D_img_out_s = self.D_img(features_s) loss_D_img_s = F.binary_cross_entropy_with_logits(D_img_out_s, torch.FloatTensor(D_img_out_s.data.size()).fill_(source_label).to(self.device)) features_t = self.backbone(images_t.tensor) features_t = grad_reverse(features_t[self.dis_type]) # features_t = grad_reverse(features_t['p2']) D_img_out_t = self.D_img(features_t) loss_D_img_t = F.binary_cross_entropy_with_logits(D_img_out_t, torch.FloatTensor(D_img_out_t.data.size()).fill_(target_label).to(self.device)) # import pdb # pdb.set_trace() losses = {} losses["loss_D_img_s"] = loss_D_img_s losses["loss_D_img_t"] = loss_D_img_t return losses, [], [], None # self.D_img.eval() images = self.preprocess_image(batched_inputs) if "instances" in batched_inputs[0]: gt_instances = [x["instances"].to(self.device) for x in batched_inputs] else: gt_instances = None features = self.backbone(images.tensor) # TODO: remove the usage of if else here. This needs to be re-organized if branch == "supervised": features_s = grad_reverse(features[self.dis_type]) D_img_out_s = self.D_img(features_s) loss_D_img_s = F.binary_cross_entropy_with_logits(D_img_out_s, torch.FloatTensor(D_img_out_s.data.size()).fill_(source_label).to(self.device)) # Region proposal network proposals_rpn, proposal_losses = self.proposal_generator( images, features, gt_instances ) # roi_head lower branch _, detector_losses = self.roi_heads( images, features, proposals_rpn, compute_loss=True, targets=gt_instances, branch=branch, ) # visualization if self.vis_period > 0: storage = get_event_storage() if storage.iter % self.vis_period == 0: self.visualize_training(batched_inputs, proposals_rpn, branch) losses = {} losses.update(detector_losses) losses.update(proposal_losses) losses["loss_D_img_s"] = loss_D_img_s*0.001 return losses, [], [], None elif branch == "supervised_target": # features_t = grad_reverse(features_t[self.dis_type]) # D_img_out_t = self.D_img(features_t) # loss_D_img_t = F.binary_cross_entropy_with_logits(D_img_out_t, torch.FloatTensor(D_img_out_t.data.size()).fill_(target_label).to(self.device)) # Region proposal network proposals_rpn, proposal_losses = self.proposal_generator( images, features, gt_instances ) # roi_head lower branch _, detector_losses = self.roi_heads( images, features, proposals_rpn, compute_loss=True, targets=gt_instances, branch=branch, ) # visualization if self.vis_period > 0: storage = get_event_storage() if storage.iter % self.vis_period == 0: self.visualize_training(batched_inputs, proposals_rpn, branch) losses = {} losses.update(detector_losses) losses.update(proposal_losses) # losses["loss_D_img_t"] = loss_D_img_t*0.001 # losses["loss_D_img_s"] = loss_D_img_s*0.001 return losses, [], [], None elif branch == "unsup_data_weak": """ unsupervised weak branch: input image without any ground-truth label; output proposals of rpn and roi-head """ # Region proposal network proposals_rpn, _ = self.proposal_generator( images, features, None, compute_loss=False ) # roi_head lower branch (keep this for further production) # notice that we do not use any target in ROI head to do inference! proposals_roih, ROI_predictions = self.roi_heads( images, features, proposals_rpn, targets=None, compute_loss=False, branch=branch, ) # if self.vis_period > 0: # storage = get_event_storage() # if storage.iter % self.vis_period == 0: # self.visualize_training(batched_inputs, proposals_rpn, branch) return {}, proposals_rpn, proposals_roih, ROI_predictions elif branch == "unsup_data_strong": raise NotImplementedError() elif branch == "val_loss": raise NotImplementedError() def visualize_training(self, batched_inputs, proposals, branch=""): """ This function different from the original one: - it adds "branch" to the `vis_name`. A function used to visualize images and proposals. It shows ground truth bounding boxes on the original image and up to 20 predicted object proposals on the original image. Users can implement different visualization functions for different models. Args: batched_inputs (list): a list that contains input to the model. proposals (list): a list that contains predicted proposals. Both batched_inputs and proposals should have the same length. """ from detectron2.utils.visualizer import Visualizer storage = get_event_storage() max_vis_prop = 20 for input, prop in zip(batched_inputs, proposals): img = input["image"] img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format) v_gt = Visualizer(img, None) v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes) anno_img = v_gt.get_image() box_size = min(len(prop.proposal_boxes), max_vis_prop) v_pred = Visualizer(img, None) v_pred = v_pred.overlay_instances( boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy() ) prop_img = v_pred.get_image() vis_img = np.concatenate((anno_img, prop_img), axis=1) vis_img = vis_img.transpose(2, 0, 1) vis_name = ( "Left: GT bounding boxes " + branch + "; Right: Predicted proposals " + branch ) storage.put_image(vis_name, vis_img) break # only visualize one image in a batch @META_ARCH_REGISTRY.register() class TwoStagePseudoLabGeneralizedRCNN(GeneralizedRCNN): def forward( self, batched_inputs, branch="supervised", given_proposals=None, val_mode=False ): if (not self.training) and (not val_mode): return self.inference(batched_inputs) images = self.preprocess_image(batched_inputs) if "instances" in batched_inputs[0]: gt_instances = [x["instances"].to(self.device) for x in batched_inputs] else: gt_instances = None features = self.backbone(images.tensor) if branch == "supervised": # Region proposal network proposals_rpn, proposal_losses = self.proposal_generator( images, features, gt_instances ) # # roi_head lower branch _, detector_losses = self.roi_heads( images, features, proposals_rpn, gt_instances, branch=branch ) losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses, [], [], None elif branch == "unsup_data_weak": # Region proposal network proposals_rpn, _ = self.proposal_generator( images, features, None, compute_loss=False ) # roi_head lower branch (keep this for further production) # notice that we do not use any target in ROI head to do inference ! proposals_roih, ROI_predictions = self.roi_heads( images, features, proposals_rpn, targets=None, compute_loss=False, branch=branch, ) return {}, proposals_rpn, proposals_roih, ROI_predictions elif branch == "val_loss": # Region proposal network proposals_rpn, proposal_losses = self.proposal_generator( images, features, gt_instances, compute_val_loss=True ) # roi_head lower branch _, detector_losses = self.roi_heads( images, features, proposals_rpn, gt_instances, branch=branch, compute_val_loss=True, ) losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses, [], [], None ================================================ FILE: adapteacher/modeling/meta_arch/ts_ensemble.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved from torch.nn.parallel import DataParallel, DistributedDataParallel import torch.nn as nn class EnsembleTSModel(nn.Module): def __init__(self, modelTeacher, modelStudent): super(EnsembleTSModel, self).__init__() if isinstance(modelTeacher, (DistributedDataParallel, DataParallel)): modelTeacher = modelTeacher.module if isinstance(modelStudent, (DistributedDataParallel, DataParallel)): modelStudent = modelStudent.module self.modelTeacher = modelTeacher self.modelStudent = modelStudent ================================================ FILE: adapteacher/modeling/meta_arch/vgg.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import torch.nn as nn import copy import torch from typing import Union, List, Dict, Any, cast from detectron2.modeling.backbone import ( ResNet, Backbone, build_resnet_backbone, BACKBONE_REGISTRY ) from detectron2.modeling.backbone.fpn import FPN, LastLevelMaxPool, LastLevelP6P7 def make_layers(cfg: List[Union[str, int]], batch_norm: bool = False) -> nn.Sequential: layers: List[nn.Module] = [] in_channels = 3 for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: v = cast(int, v) conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v return nn.Sequential(*layers) cfgs: Dict[str, List[Union[str, int]]] = { 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], } class vgg_backbone(Backbone): """ Backbone (bottom-up) for FBNet. Hierarchy: trunk0: xif0_0 xif0_1 ... trunk1: xif1_0 xif1_1 ... ... Output features: The outputs from each "stage", i.e. trunkX. """ def __init__(self, cfg): super().__init__() self.vgg = make_layers(cfgs['vgg16'],batch_norm=True) self._initialize_weights() # self.stage_names_index = {'vgg1':3, 'vgg2':8 , 'vgg3':15, 'vgg4':22, 'vgg5':29} _out_feature_channels = [64, 128, 256, 512, 512] _out_feature_strides = [2, 4, 8, 16, 32] # stages, shape_specs = build_fbnet( # cfg, # name="trunk", # in_channels=cfg.MODEL.FBNET_V2.STEM_IN_CHANNELS # ) # nn.Sequential(*list(self.vgg.features._modules.values())[:14]) self.stages = [nn.Sequential(*list(self.vgg._modules.values())[0:7]),\ nn.Sequential(*list(self.vgg._modules.values())[7:14]),\ nn.Sequential(*list(self.vgg._modules.values())[14:24]),\ nn.Sequential(*list(self.vgg._modules.values())[24:34]),\ nn.Sequential(*list(self.vgg._modules.values())[34:]),] self._out_feature_channels = {} self._out_feature_strides = {} self._stage_names = [] for i, stage in enumerate(self.stages): name = "vgg{}".format(i) self.add_module(name, stage) self._stage_names.append(name) self._out_feature_channels[name] = _out_feature_channels[i] self._out_feature_strides[name] = _out_feature_strides[i] self._out_features = self._stage_names del self.vgg def forward(self, x): features = {} for name, stage in zip(self._stage_names, self.stages): x = stage(x) # if name in self._out_features: # outputs[name] = x features[name] = x # import pdb # pdb.set_trace() return features def _initialize_weights(self) -> None: for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0) @BACKBONE_REGISTRY.register() #already register in baseline model def build_vgg_backbone(cfg, _): return vgg_backbone(cfg) @BACKBONE_REGISTRY.register() #already register in baseline model def build_vgg_fpn_backbone(cfg, _): # backbone = FPN( # bottom_up=build_vgg_backbone(cfg), # in_features=cfg.MODEL.FPN.IN_FEATURES, # out_channels=cfg.MODEL.FPN.OUT_CHANNELS, # norm=cfg.MODEL.FPN.NORM, # top_block=LastLevelMaxPool(), # ) bottom_up = vgg_backbone(cfg) in_features = cfg.MODEL.FPN.IN_FEATURES out_channels = cfg.MODEL.FPN.OUT_CHANNELS backbone = FPN( bottom_up=bottom_up, in_features=in_features, out_channels=out_channels, norm=cfg.MODEL.FPN.NORM, top_block=LastLevelMaxPool(), # fuse_type=cfg.MODEL.FPN.FUSE_TYPE, ) # return backbone return backbone ================================================ FILE: adapteacher/modeling/proposal_generator/rpn.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved from typing import Dict, Optional import torch from detectron2.structures import ImageList, Instances from detectron2.modeling.proposal_generator import RPN from detectron2.modeling.proposal_generator.build import PROPOSAL_GENERATOR_REGISTRY @PROPOSAL_GENERATOR_REGISTRY.register() class PseudoLabRPN(RPN): """ Region Proposal Network, introduced by :paper:`Faster R-CNN`. """ def forward( self, images: ImageList, features: Dict[str, torch.Tensor], gt_instances: Optional[Instances] = None, compute_loss: bool = True, compute_val_loss: bool = False, ): features = [features[f] for f in self.in_features] anchors = self.anchor_generator(features) pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features) pred_objectness_logits = [ # (N, A, Hi, Wi) -> (N, Hi, Wi, A) -> (N, Hi*Wi*A) score.permute(0, 2, 3, 1).flatten(1) for score in pred_objectness_logits ] pred_anchor_deltas = [ # (N, A*B, Hi, Wi) -> (N, A, B, Hi, Wi) -> (N, Hi, Wi, A, B) -> (N, Hi*Wi*A, B) x.view( x.shape[0], -1, self.anchor_generator.box_dim, x.shape[-2], x.shape[-1] ) .permute(0, 3, 4, 1, 2) .flatten(1, -2) for x in pred_anchor_deltas ] if (self.training and compute_loss) or compute_val_loss: gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances) losses = self.losses( anchors, pred_objectness_logits, gt_labels, pred_anchor_deltas, gt_boxes ) losses = {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()} else: # inference losses = {} proposals = self.predict_proposals( anchors, pred_objectness_logits, pred_anchor_deltas, images.image_sizes ) return proposals, losses ================================================ FILE: adapteacher/modeling/roi_heads/fast_rcnn.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import torch from torch import nn from torch.nn import functional as F from detectron2.modeling.roi_heads.fast_rcnn import ( FastRCNNOutputLayers, FastRCNNOutputs, ) # focal loss class FastRCNNFocaltLossOutputLayers(FastRCNNOutputLayers): def __init__(self, cfg, input_shape): super(FastRCNNFocaltLossOutputLayers, self).__init__(cfg, input_shape) self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES def losses(self, predictions, proposals): """ Args: predictions: return values of :meth:`forward()`. proposals (list[Instances]): proposals that match the features that were used to compute predictions. """ scores, proposal_deltas = predictions losses = FastRCNNFocalLoss( self.box2box_transform, scores, proposal_deltas, proposals, self.smooth_l1_beta, self.box_reg_loss_type, num_classes=self.num_classes, ).losses() return losses class FastRCNNFocalLoss(FastRCNNOutputs): """ A class that stores information about outputs of a Fast R-CNN head. It provides methods that are used to decode the outputs of a Fast R-CNN head. """ def __init__( self, box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, smooth_l1_beta=0.0, box_reg_loss_type="smooth_l1", num_classes=80, ): super(FastRCNNFocalLoss, self).__init__( box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, smooth_l1_beta, box_reg_loss_type, ) self.num_classes = num_classes def losses(self): return { "loss_cls": self.comput_focal_loss(), "loss_box_reg": self.box_reg_loss(), } def comput_focal_loss(self): if self._no_instances: return 0.0 * self.pred_class_logits.sum() else: FC_loss = FocalLoss( gamma=1.5, num_classes=self.num_classes, ) total_loss = FC_loss(input=self.pred_class_logits, target=self.gt_classes) total_loss = total_loss / self.gt_classes.shape[0] return total_loss class FocalLoss(nn.Module): def __init__( self, weight=None, gamma=1.0, num_classes=80, ): super(FocalLoss, self).__init__() assert gamma >= 0 self.gamma = gamma self.weight = weight self.num_classes = num_classes def forward(self, input, target): # focal loss CE = F.cross_entropy(input, target, reduction="none") p = torch.exp(-CE) loss = (1 - p) ** self.gamma * CE return loss.sum() ================================================ FILE: adapteacher/modeling/roi_heads/roi_heads.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import torch from typing import Dict, List, Optional, Tuple, Union from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou from detectron2.modeling.proposal_generator.proposal_utils import ( add_ground_truth_to_proposals, ) from detectron2.utils.events import get_event_storage from detectron2.modeling.roi_heads.box_head import build_box_head from detectron2.layers import ShapeSpec from detectron2.modeling.roi_heads import ( ROI_HEADS_REGISTRY, StandardROIHeads, ) from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers from adapteacher.modeling.roi_heads.fast_rcnn import FastRCNNFocaltLossOutputLayers import numpy as np from detectron2.modeling.poolers import ROIPooler @ROI_HEADS_REGISTRY.register() class StandardROIHeadsPseudoLab(StandardROIHeads): @classmethod def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) box_head = build_box_head( cfg, ShapeSpec( channels=in_channels, height=pooler_resolution, width=pooler_resolution ), ) if cfg.MODEL.ROI_HEADS.LOSS == "CrossEntropy": box_predictor = FastRCNNOutputLayers(cfg, box_head.output_shape) elif cfg.MODEL.ROI_HEADS.LOSS == "FocalLoss": box_predictor = FastRCNNFocaltLossOutputLayers(cfg, box_head.output_shape) else: raise ValueError("Unknown ROI head loss.") return { "box_in_features": in_features, "box_pooler": box_pooler, "box_head": box_head, "box_predictor": box_predictor, } def forward( self, images: ImageList, features: Dict[str, torch.Tensor], proposals: List[Instances], targets: Optional[List[Instances]] = None, compute_loss=True, branch="", compute_val_loss=False, ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]: del images if self.training and compute_loss: # apply if training loss assert targets # 1000 --> 512 proposals = self.label_and_sample_proposals( proposals, targets, branch=branch ) elif compute_val_loss: # apply if val loss assert targets # 1000 --> 512 temp_proposal_append_gt = self.proposal_append_gt self.proposal_append_gt = False proposals = self.label_and_sample_proposals( proposals, targets, branch=branch ) # do not apply target on proposals self.proposal_append_gt = temp_proposal_append_gt del targets if (self.training and compute_loss) or compute_val_loss: losses, _ = self._forward_box( features, proposals, compute_loss, compute_val_loss, branch ) return proposals, losses else: pred_instances, predictions = self._forward_box( features, proposals, compute_loss, compute_val_loss, branch ) return pred_instances, predictions def _forward_box( self, features: Dict[str, torch.Tensor], proposals: List[Instances], compute_loss: bool = True, compute_val_loss: bool = False, branch: str = "", ) -> Union[Dict[str, torch.Tensor], List[Instances]]: features = [features[f] for f in self.box_in_features] box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals]) box_features = self.box_head(box_features) predictions = self.box_predictor(box_features) del box_features if ( self.training and compute_loss ) or compute_val_loss: # apply if training loss or val loss losses = self.box_predictor.losses(predictions, proposals) if self.train_on_pred_boxes: with torch.no_grad(): pred_boxes = self.box_predictor.predict_boxes_for_gt_classes( predictions, proposals ) for proposals_per_image, pred_boxes_per_image in zip( proposals, pred_boxes ): proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image) return losses, predictions else: pred_instances, _ = self.box_predictor.inference(predictions, proposals) return pred_instances, predictions @torch.no_grad() def label_and_sample_proposals( self, proposals: List[Instances], targets: List[Instances], branch: str = "" ) -> List[Instances]: gt_boxes = [x.gt_boxes for x in targets] if self.proposal_append_gt: proposals = add_ground_truth_to_proposals(gt_boxes, proposals) proposals_with_gt = [] num_fg_samples = [] num_bg_samples = [] for proposals_per_image, targets_per_image in zip(proposals, targets): has_gt = len(targets_per_image) > 0 match_quality_matrix = pairwise_iou( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes ) matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) sampled_idxs, gt_classes = self._sample_proposals( matched_idxs, matched_labels, targets_per_image.gt_classes ) proposals_per_image = proposals_per_image[sampled_idxs] proposals_per_image.gt_classes = gt_classes if has_gt: sampled_targets = matched_idxs[sampled_idxs] for (trg_name, trg_value) in targets_per_image.get_fields().items(): if trg_name.startswith("gt_") and not proposals_per_image.has( trg_name ): proposals_per_image.set(trg_name, trg_value[sampled_targets]) else: gt_boxes = Boxes( targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_idxs), 4)) ) proposals_per_image.gt_boxes = gt_boxes num_bg_samples.append((gt_classes == self.num_classes).sum().item()) num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) proposals_with_gt.append(proposals_per_image) storage = get_event_storage() storage.put_scalar( "roi_head/num_target_fg_samples_" + branch, np.mean(num_fg_samples) ) storage.put_scalar( "roi_head/num_target_bg_samples_" + branch, np.mean(num_bg_samples) ) return proposals_with_gt ================================================ FILE: adapteacher/solver/build.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import torch from detectron2.config import CfgNode from detectron2.solver.lr_scheduler import WarmupCosineLR, WarmupMultiStepLR from .lr_scheduler import WarmupTwoStageMultiStepLR def build_lr_scheduler( cfg: CfgNode, optimizer: torch.optim.Optimizer ) -> torch.optim.lr_scheduler._LRScheduler: """ Build a LR scheduler from config. """ name = cfg.SOLVER.LR_SCHEDULER_NAME if name == "WarmupMultiStepLR": return WarmupMultiStepLR( optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, warmup_factor=cfg.SOLVER.WARMUP_FACTOR, warmup_iters=cfg.SOLVER.WARMUP_ITERS, warmup_method=cfg.SOLVER.WARMUP_METHOD, ) elif name == "WarmupCosineLR": return WarmupCosineLR( optimizer, cfg.SOLVER.MAX_ITER, warmup_factor=cfg.SOLVER.WARMUP_FACTOR, warmup_iters=cfg.SOLVER.WARMUP_ITERS, warmup_method=cfg.SOLVER.WARMUP_METHOD, ) elif name == "WarmupTwoStageMultiStepLR": return WarmupTwoStageMultiStepLR( optimizer, cfg.SOLVER.STEPS, factor_list=cfg.SOLVER.FACTOR_LIST, gamma=cfg.SOLVER.GAMMA, warmup_factor=cfg.SOLVER.WARMUP_FACTOR, warmup_iters=cfg.SOLVER.WARMUP_ITERS, warmup_method=cfg.SOLVER.WARMUP_METHOD, ) else: raise ValueError("Unknown LR scheduler: {}".format(name)) ================================================ FILE: adapteacher/solver/lr_scheduler.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved from bisect import bisect_right from typing import List import torch from detectron2.solver.lr_scheduler import _get_warmup_factor_at_iter class WarmupTwoStageMultiStepLR(torch.optim.lr_scheduler._LRScheduler): def __init__( self, optimizer: torch.optim.Optimizer, milestones: List[int], factor_list: List[int], gamma: float = 0.1, warmup_factor: float = 0.001, warmup_iters: int = 1000, warmup_method: str = "linear", last_epoch: int = -1, ): if not list(milestones) == sorted(milestones): raise ValueError( "Milestones should be a list of" " increasing integers. Got {}", milestones, ) if len(milestones) + 1 != len(factor_list): raise ValueError("Length of milestones should match length of factor_list.") self.milestones = milestones self.gamma = gamma self.warmup_factor = warmup_factor self.warmup_iters = warmup_iters self.warmup_method = warmup_method self.factor_list = factor_list super().__init__(optimizer, last_epoch) def get_lr(self) -> List[float]: warmup_factor = _get_warmup_factor_at_iter( self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor ) return [ base_lr * warmup_factor * self.factor_list[bisect_right(self.milestones, self.last_epoch)] for base_lr in self.base_lrs ] def _compute_values(self) -> List[float]: # The new interface return self.get_lr() ================================================ FILE: configs/Base-RCNN-C4.yaml ================================================ MODEL: META_ARCHITECTURE: "GeneralizedRCNN" RPN: PRE_NMS_TOPK_TEST: 6000 POST_NMS_TOPK_TEST: 1000 ROI_HEADS: NAME: "Res5ROIHeads" DATASETS: TRAIN: ("coco_2017_train",) TEST: ("coco_2017_val",) SOLVER: IMS_PER_BATCH: 16 BASE_LR: 0.02 STEPS: (60000, 80000) MAX_ITER: 90000 INPUT: MIN_SIZE_TRAIN: (600,) VERSION: 2 ================================================ FILE: configs/faster_rcnn_R101_cross_clipart.yaml ================================================ _BASE_: "./Base-RCNN-C4.yaml" MODEL: # META_ARCHITECTURE: "TwoStagePseudoLabGeneralizedRCNN" META_ARCHITECTURE: "DAobjTwoStagePseudoLabGeneralizedRCNN" # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" MASK_ON: False RESNETS: DEPTH: 101 PROPOSAL_GENERATOR: NAME: "PseudoLabRPN" # RPN: # POSITIVE_FRACTION: 0.25 ROI_HEADS: NAME: "StandardROIHeadsPseudoLab" LOSS: "CrossEntropy" # variant: "CrossEntropy" NUM_CLASSES: 20 ROI_BOX_HEAD: NAME: "FastRCNNConvFCHead" NUM_FC: 2 POOLER_RESOLUTION: 7 SOLVER: LR_SCHEDULER_NAME: "WarmupTwoStageMultiStepLR" STEPS: (60000, 80000, 90000, 360000) FACTOR_LIST: (1, 1, 1, 1, 1) MAX_ITER: 100000 IMG_PER_BATCH_LABEL: 16 IMG_PER_BATCH_UNLABEL: 16 BASE_LR: 0.01 DATALOADER: SUP_PERCENT: 100.0 DATASETS: CROSS_DATASET: True TRAIN_LABEL: ("voc_2012_trainval",) #voc_2012_train # TRAIN_UNLABEL: ("voc_2012_val",) #Clipart1k_train # TEST: ("voc_2012_val",) #Clipart1k_test TRAIN_UNLABEL: ("Clipart1k_train",) #Clipart1k_train TEST: ("Clipart1k_test",) #Clipart1k_test SEMISUPNET: Trainer: "ateacher" BBOX_THRESHOLD: 0.8 TEACHER_UPDATE_ITER: 1 BURN_UP_STEP: 60000 EMA_KEEP_RATE: 0.9996 UNSUP_LOSS_WEIGHT: 0.5 # 1.0 is suboptimal SUP_LOSS_WEIGHT: 1.0 DIS_TYPE: "res4" #["concate","p2","multi"] DIS_LOSS_WEIGHT: 0.1 TEST: EVAL_PERIOD: 2000 OUTPUT_DIR: ./output/faster_rcnn_R101_cross_clipart_test ================================================ FILE: configs/faster_rcnn_R101_cross_clipart_b4.yaml ================================================ _BASE_: "./Base-RCNN-C4.yaml" MODEL: META_ARCHITECTURE: "DAobjTwoStagePseudoLabGeneralizedRCNN" WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" MASK_ON: False RESNETS: DEPTH: 101 PROPOSAL_GENERATOR: NAME: "PseudoLabRPN" # RPN: # POSITIVE_FRACTION: 0.25 ROI_HEADS: NAME: "StandardROIHeadsPseudoLab" LOSS: "CrossEntropy" # variant: "CrossEntropy" NUM_CLASSES: 20 ROI_BOX_HEAD: NAME: "FastRCNNConvFCHead" NUM_FC: 2 POOLER_RESOLUTION: 7 SOLVER: LR_SCHEDULER_NAME: "WarmupTwoStageMultiStepLR" STEPS: (240000, 320000, 360000, 1440000) FACTOR_LIST: (1, 1, 1, 1, 1) MAX_ITER: 400000 IMG_PER_BATCH_LABEL: 4 IMG_PER_BATCH_UNLABEL: 4 IMS_PER_BATCH: 4 BASE_LR: 0.01 DATALOADER: SUP_PERCENT: 100.0 DATASETS: CROSS_DATASET: True TRAIN_LABEL: ("voc_2012_trainval",) #("voc_2012_trainval","voc_2007_trainval") TRAIN_UNLABEL: ("Clipart1k_train",) TEST: ("Clipart1k_test",) SEMISUPNET: Trainer: "ateacher" BBOX_THRESHOLD: 0.8 TEACHER_UPDATE_ITER: 1 BURN_UP_STEP: 80000 EMA_KEEP_RATE: 0.9996 UNSUP_LOSS_WEIGHT: 1.0 SUP_LOSS_WEIGHT: 1.0 DIS_TYPE: "res4" #["concate","p2","multi"] TEST: EVAL_PERIOD: 4000 OUTPUT_DIR: ./output/faster_rcnn_R101_cross_clipart_mod ================================================ FILE: configs/faster_rcnn_R101_cross_water.yaml ================================================ _BASE_: "./Base-RCNN-C4.yaml" MODEL: # META_ARCHITECTURE: "TwoStagePseudoLabGeneralizedRCNN" META_ARCHITECTURE: "DAobjTwoStagePseudoLabGeneralizedRCNN" # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" MASK_ON: False RESNETS: DEPTH: 101 PROPOSAL_GENERATOR: NAME: "PseudoLabRPN" # RPN: # POSITIVE_FRACTION: 0.25 ROI_HEADS: NAME: "StandardROIHeadsPseudoLab" LOSS: "CrossEntropy" # variant: "CrossEntropy" NUM_CLASSES: 20 ROI_BOX_HEAD: NAME: "FastRCNNConvFCHead" NUM_FC: 2 POOLER_RESOLUTION: 7 SOLVER: LR_SCHEDULER_NAME: "WarmupTwoStageMultiStepLR" STEPS: (60000, 80000, 90000, 360000) FACTOR_LIST: (1, 1, 1, 1, 1) MAX_ITER: 360000 IMG_PER_BATCH_LABEL: 16 IMG_PER_BATCH_UNLABEL: 16 BASE_LR: 0.04 DATALOADER: SUP_PERCENT: 100.0 DATASETS: CROSS_DATASET: True TRAIN_LABEL: ("voc_2012_trainval",) #voc_2012_train TRAIN_UNLABEL: ("Watercolor_train",) #Clipart1k_train TEST: ("Watercolor_test",) #Clipart1k_test SEMISUPNET: Trainer: "ateacher" BBOX_THRESHOLD: 0.8 TEACHER_UPDATE_ITER: 1 BURN_UP_STEP: 20000 EMA_KEEP_RATE: 0.9996 UNSUP_LOSS_WEIGHT: 1.0 SUP_LOSS_WEIGHT: 1.0 DIS_TYPE: "res4" #["concate","p2","multi"] TEST: EVAL_PERIOD: 1000 ================================================ FILE: configs/faster_rcnn_VGG_cross_city.yaml ================================================ _BASE_: "./Base-RCNN-C4.yaml" MODEL: # META_ARCHITECTURE: "TwoStagePseudoLabGeneralizedRCNN" META_ARCHITECTURE: "DAobjTwoStagePseudoLabGeneralizedRCNN" BACKBONE: NAME: "build_vgg_backbone" MASK_ON: False RESNETS: DEPTH: 101 PROPOSAL_GENERATOR: NAME: "PseudoLabRPN" # RPN: # POSITIVE_FRACTION: 0.25 RPN: IN_FEATURES: ["vgg4"] ROI_HEADS: NAME: "StandardROIHeadsPseudoLab" LOSS: "CrossEntropy" # variant: "CrossEntropy" NUM_CLASSES: 8 IN_FEATURES: ["vgg4"] ROI_BOX_HEAD: NAME: "FastRCNNConvFCHead" NUM_FC: 2 POOLER_RESOLUTION: 7 SOLVER: LR_SCHEDULER_NAME: "WarmupTwoStageMultiStepLR" STEPS: (60000, 80000, 90000, 360000) FACTOR_LIST: (1, 1, 1, 1, 1) MAX_ITER: 100000 IMG_PER_BATCH_LABEL: 16 IMG_PER_BATCH_UNLABEL: 16 BASE_LR: 0.04 DATALOADER: SUP_PERCENT: 100.0 DATASETS: DATASETS: CROSS_DATASET: True TRAIN_LABEL: ("cityscapes_fine_instance_seg_train",) TRAIN_UNLABEL: ("cityscapes_foggy_train",) TEST: ("cityscapes_foggy_val",) SEMISUPNET: Trainer: "ateacher" BBOX_THRESHOLD: 0.8 TEACHER_UPDATE_ITER: 1 BURN_UP_STEP: 20000 EMA_KEEP_RATE: 0.9996 UNSUP_LOSS_WEIGHT: 1.0 SUP_LOSS_WEIGHT: 1.0 DIS_TYPE: "vgg4" #["concate","p2","multi"] TEST: EVAL_PERIOD: 1000 ================================================ FILE: prod_lib/TARGETS ================================================ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved ================================================ FILE: prod_lib/config/defaults.py ================================================ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved # from d2go.config import CfgNode as CN def add_aut_config(cfg): """ Add config for SemiSupSegRunner. """ _C = cfg #New added for discriminator _C.UNBIASEDTEACHER.DIS_LOSS_WEIGHT = 0.1 _C.UNBIASEDTEACHER.DIS_TYPE = "concate" #["concate","p2","multi"] _C.UNBIASEDTEACHER.ISAUG = "Yes" ================================================ FILE: prod_lib/data/builtin.py ================================================ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import contextlib import io import logging import os import json from detectron2.data import DatasetCatalog, MetadataCatalog from d2go.data.utils import CallFuncWithJsonFile from detectron2.utils.file_io import PathManager from fvcore.common.timer import Timer from detectron2.data.datasets.pascal_voc import register_pascal_voc from detectron2.data.datasets.builtin_meta import _get_builtin_metadata from .cityscapes_foggy import load_cityscapes_instances logger = logging.getLogger(__name__) _SPLITS_COCO_FORMAT = {} _SPLITS_COCO_FORMAT["coco"] = { "coco_2017_unlabel": ( "memcache_manifold://mobile_vision_dataset/tree/coco_unlabel2017", "memcache_manifold://mobile_vision_dataset/tree/coco_unlabel2017/coco_jsons/image_info_unlabeled2017.json", ), "goi_v5_unlabel": ( "memcache_manifold://portal_ai_data/tree/goi_v5/train", "memcache_manifold://mobile_vision_dataset/tree/goi/v5/coco_jsons/openimages_v5_train_unlabel.json", ), } def register_coco_unlabel(): for _, splits_per_dataset in _SPLITS_COCO_FORMAT.items(): for key, (image_root, json_file) in splits_per_dataset.items(): meta = {} register_coco_unlabel_instances(key, meta, json_file, image_root) def register_coco_unlabel_instances(name, metadata, json_file, image_root): """ Register a dataset in COCO's json annotation format for instance detection, instance segmentation and keypoint detection. (i.e., Type 1 and 2 in http://cocodataset.org/#format-data. `instances*.json` and `person_keypoints*.json` in the dataset). This is an example of how to register a new dataset. You can do something similar to this function, to register new datasets. Args: name (str): the name that identifies a dataset, e.g. "coco_2014_train". metadata (dict): extra metadata associated with this dataset. You can leave it as an empty dict. json_file (str): path to the json instance annotation file. image_root (str or path-like): directory which contains all the images. """ assert isinstance(name, str), name assert isinstance(json_file, (str, os.PathLike)), json_file assert isinstance(image_root, (str, os.PathLike)), image_root # 1. register a function which returns dicts DatasetCatalog.register( name, lambda: load_coco_unlabel_json(json_file, image_root, name) ) # 2. Optionally, add metadata about this dataset, # since they might be useful in evaluation, visualization or logging MetadataCatalog.get(name).set( json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata ) def load_coco_unlabel_json( json_file, image_root, dataset_name=None, extra_annotation_keys=None ): """ Load a json file with COCO's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in COCO instances annotation format. image_root (str or path-like): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., coco_2017_train). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", "category_id", "segmentation"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See `Using Custom Datasets `_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info( "Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()) ) # sort indices for reproducible results img_ids = sorted(coco_api.imgs.keys()) imgs = coco_api.loadImgs(img_ids) logger.info( "Loaded {} unlabeled images in COCO format from {}".format(len(imgs), json_file) ) dataset_dicts = [] for img_dict in imgs: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] record["image_id"] = img_dict["id"] dataset_dicts.append(record) return dataset_dicts _UNLABELED_DATASETS = { # 1-2 people images extracted from UGC ig images or ig profiles using fetch_image flow "UGC_unlabel_ig_1M_20210514_1or2people": "manifold://pai_mobile/tree/datasets/semi_supervised/unlabeled_UGC/sweep_4m_20210514_20210515_1or2people.json", # hand non-UGC long range frames extracted from collected videos "hand_nonUGC_long_range_384K_20210521": "manifold://pai_mobile/tree/datasets/hand_unlabeled_nonUGC/long_range.json", # hand non-UGC short range images cropped from the annotated bounding boxes in long-range videos "hand_nonUGC_short_range_183K_20210521": "manifold://pai_mobile/tree/datasets/hand_unlabeled_nonUGC/short_range.json", } def load_json(json_file): """ Simply load and return the json_file """ with PathManager.open(json_file, "r") as f: json_data = json.load(f) return json_data def register_unlabeled(): """ Register the unlabeled datasets The json_file needs to be in D2's format """ for name, json_file in _UNLABELED_DATASETS.items(): # 1. register a function which returns dicts DatasetCatalog.register( name, CallFuncWithJsonFile( func=load_json, json_file=json_file ) ) # 2. Optionally, add metadata about this dataset, # since they might be useful in evaluation, visualization or logging MetadataCatalog.get(name).set( json_file=json_file, image_root="", evaluator_type="coco" ) # ==== Predefined splits for raw cityscapes foggy images =========== _RAW_CITYSCAPES_SPLITS = { # "cityscapes_foggy_{task}_train": ("cityscape_foggy/leftImg8bit/train/", "cityscape_foggy/gtFine/train/"), # "cityscapes_foggy_{task}_val": ("cityscape_foggy/leftImg8bit/val/", "cityscape_foggy/gtFine/val/"), # "cityscapes_foggy_{task}_test": ("cityscape_foggy/leftImg8bit/test/", "cityscape_foggy/gtFine/test/"), "cityscapes_foggy_train": ("cityscape_foggy/leftImg8bit/train/", "cityscape_foggy/gtFine/train/"), "cityscapes_foggy_val": ("cityscape_foggy/leftImg8bit/val/", "cityscape_foggy/gtFine/val/"), "cityscapes_foggy_test": ("cityscape_foggy/leftImg8bit/test/", "cityscape_foggy/gtFine/test/"), } def register_all_cityscapes_foggy(): root = "manifold://mobile_vision_dataset/tree/yujheli/dataset" for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items(): meta = _get_builtin_metadata("cityscapes") image_dir = os.path.join(root, image_dir) gt_dir = os.path.join(root, gt_dir) # inst_key = key.format(task="instance_seg") inst_key = key # DatasetCatalog.register( # inst_key, # lambda x=image_dir, y=gt_dir: load_cityscapes_instances( # x, y, from_json=True, to_polygons=True # ), # ) DatasetCatalog.register( inst_key, lambda x=image_dir, y=gt_dir: load_cityscapes_instances( x, y, from_json=False, to_polygons=False ), ) # MetadataCatalog.get(inst_key).set( # image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta # ) # MetadataCatalog.get(inst_key).set( # image_dir=image_dir, gt_dir=gt_dir, evaluator_type="pascal_voc", **meta # ) MetadataCatalog.get(inst_key).set( image_dir=image_dir, gt_dir=gt_dir, evaluator_type="coco", **meta ) # ==== Predefined splits for Clipart (PASCAL VOC format) =========== def register_all_clipart(): root = "manifold://mobile_vision_dataset/tree/yujheli/dataset" SPLITS = [ ("Clipart1k_train", "clipart", "train"), ("Clipart1k_test", "clipart", "test"), ] for name, dirname, split in SPLITS: year = 2012 register_pascal_voc(name, os.path.join(root, dirname), split, year) MetadataCatalog.get(name).evaluator_type = "pascal_voc" # MetadataCatalog.get(name).evaluator_type = "coco" register_all_cityscapes_foggy() register_all_clipart() # register_coco_unlabel() # register_unlabeled() def register_all_water(): root = "manifold://mobile_vision_dataset/tree/yujheli/dataset" #Need to modify to the correct folder containing the dataset. SPLITS = [ ("Watercolor_train", "watercolor", "train"), ("Watercolor_test", "watercolor", "test"), ] for name, dirname, split in SPLITS: year = 2012 # register_pascal_voc(name, os.path.join(root, dirname), split, year, class_names=["person", "dog","bicycle", "bird", "car", "cat"]) register_pascal_voc(name, os.path.join(root, dirname), split, year) MetadataCatalog.get(name).evaluator_type = "pascal_voc_water" register_all_water() def register_all_clipart_ws(): root = "manifold://mobile_vision_dataset/tree/yujheli/dataset" SPLITS = [ ("Clipart1k_train_w", "clipart", "train"), ("Clipart1k_test_w", "clipart", "test"), ] for name, dirname, split in SPLITS: year = 2012 register_pascal_voc(name, os.path.join(root, dirname), split, year) MetadataCatalog.get(name).evaluator_type = "pascal_voc_water" # MetadataCatalog.get(name).evaluator_type = "coco" register_all_clipart_ws() ================================================ FILE: prod_lib/data/cityscapes_foggy.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. import functools import json import logging import multiprocessing as mp import numpy as np import os from itertools import chain import pycocotools.mask as mask_util from PIL import Image from detectron2.structures import BoxMode from detectron2.utils.comm import get_world_size from detectron2.utils.file_io import PathManager from detectron2.utils.logger import setup_logger try: import cv2 # noqa except ImportError: # OpenCV is an optional dependency at the moment pass logger = logging.getLogger(__name__) def _get_cityscapes_files(image_dir, gt_dir): files = [] # scan through the directory cities = PathManager.ls(image_dir) logger.info(f"{len(cities)} cities found in '{image_dir}'.") for city in cities: city_img_dir = os.path.join(image_dir, city) city_gt_dir = os.path.join(gt_dir, city) for basename in PathManager.ls(city_img_dir): image_file = os.path.join(city_img_dir, basename) # suffix = "leftImg8bit.png" # assert basename.endswith(suffix), basename # basename = basename[: -len(suffix)] suffix = 'leftImg8bit_foggy' basename = basename.split(suffix)[0] instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png") label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png") json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json") files.append((image_file, instance_file, label_file, json_file)) assert len(files), "No images found in {}".format(image_dir) for f in files[0]: assert PathManager.isfile(f), f return files def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True): """ Args: image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". from_json (bool): whether to read annotations from the raw json file or the png files. to_polygons (bool): whether to represent the segmentation as polygons (COCO's format) instead of masks (cityscapes's format). Returns: list[dict]: a list of dicts in Detectron2 standard format. (See `Using Custom Datasets `_ ) """ if from_json: assert to_polygons, ( "Cityscapes's json annotations are in polygon format. " "Converting to mask format is not supported now." ) files = _get_cityscapes_files(image_dir, gt_dir) logger.info("Preprocessing cityscapes annotations ...") # This is still not fast: all workers will execute duplicate works and will # take up to 10m on a 8GPU server. pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4)) ret = pool.map( functools.partial(_cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons), files, ) logger.info("Loaded {} images from {}".format(len(ret), image_dir)) # Map cityscape ids to contiguous ids from cityscapesscripts.helpers.labels import labels labels = [l for l in labels if l.hasInstances and not l.ignoreInEval] dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)} for dict_per_image in ret: for anno in dict_per_image["annotations"]: anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]] return ret def load_cityscapes_semantic(image_dir, gt_dir): """ Args: image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". Returns: list[dict]: a list of dict, each has "file_name" and "sem_seg_file_name". """ ret = [] # gt_dir is small and contain many small files. make sense to fetch to local first gt_dir = PathManager.get_local_path(gt_dir) for image_file, _, label_file, json_file in _get_cityscapes_files(image_dir, gt_dir): label_file = label_file.replace("labelIds", "labelTrainIds") with PathManager.open(json_file, "r") as f: jsonobj = json.load(f) ret.append( { "file_name": image_file, "sem_seg_file_name": label_file, "height": jsonobj["imgHeight"], "width": jsonobj["imgWidth"], } ) assert len(ret), f"No images found in {image_dir}!" assert PathManager.isfile( ret[0]["sem_seg_file_name"] ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa return ret def _cityscapes_files_to_dict(files, from_json, to_polygons): """ Parse cityscapes annotation files to a instance segmentation dataset dict. Args: files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file) from_json (bool): whether to read annotations from the raw json file or the png files. to_polygons (bool): whether to represent the segmentation as polygons (COCO's format) instead of masks (cityscapes's format). Returns: A dict in Detectron2 Dataset format. """ from cityscapesscripts.helpers.labels import id2label, name2label image_file, instance_id_file, _, json_file = files annos = [] if from_json: from shapely.geometry import MultiPolygon, Polygon with PathManager.open(json_file, "r") as f: jsonobj = json.load(f) ret = { "file_name": image_file, "image_id": os.path.basename(image_file), "height": jsonobj["imgHeight"], "width": jsonobj["imgWidth"], } # `polygons_union` contains the union of all valid polygons. polygons_union = Polygon() # CityscapesScripts draw the polygons in sequential order # and each polygon *overwrites* existing ones. See # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa # We use reverse order, and each polygon *avoids* early ones. # This will resolve the ploygon overlaps in the same way as CityscapesScripts. for obj in jsonobj["objects"][::-1]: if "deleted" in obj: # cityscapes data format specific continue label_name = obj["label"] try: label = name2label[label_name] except KeyError: if label_name.endswith("group"): # crowd area label = name2label[label_name[: -len("group")]] else: raise if label.id < 0: # cityscapes data format continue # Cityscapes's raw annotations uses integer coordinates # Therefore +0.5 here poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5 # CityscapesScript uses PIL.ImageDraw.polygon to rasterize # polygons for evaluation. This function operates in integer space # and draws each pixel whose center falls into the polygon. # Therefore it draws a polygon which is 0.5 "fatter" in expectation. # We therefore dilate the input polygon by 0.5 as our input. poly = Polygon(poly_coord).buffer(0.5, resolution=4) if not label.hasInstances or label.ignoreInEval: # even if we won't store the polygon it still contributes to overlaps resolution polygons_union = polygons_union.union(poly) continue # Take non-overlapping part of the polygon poly_wo_overlaps = poly.difference(polygons_union) if poly_wo_overlaps.is_empty: continue polygons_union = polygons_union.union(poly) anno = {} anno["iscrowd"] = label_name.endswith("group") anno["category_id"] = label.id if isinstance(poly_wo_overlaps, Polygon): poly_list = [poly_wo_overlaps] elif isinstance(poly_wo_overlaps, MultiPolygon): poly_list = poly_wo_overlaps.geoms else: raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps)) poly_coord = [] for poly_el in poly_list: # COCO API can work only with exterior boundaries now, hence we store only them. # TODO: store both exterior and interior boundaries once other parts of the # codebase support holes in polygons. poly_coord.append(list(chain(*poly_el.exterior.coords))) anno["segmentation"] = poly_coord (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds anno["bbox"] = (xmin, ymin, xmax, ymax) anno["bbox_mode"] = BoxMode.XYXY_ABS annos.append(anno) else: # See also the official annotation parsing scripts at # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa with PathManager.open(instance_id_file, "rb") as f: inst_image = np.asarray(Image.open(f), order="F") # ids < 24 are stuff labels (filtering them first is about 5% faster) flattened_ids = np.unique(inst_image[inst_image >= 24]) ret = { "file_name": image_file, "image_id": os.path.basename(image_file), "height": inst_image.shape[0], "width": inst_image.shape[1], } for instance_id in flattened_ids: # For non-crowd annotations, instance_id // 1000 is the label_id # Crowd annotations have <1000 instance ids label_id = instance_id // 1000 if instance_id >= 1000 else instance_id label = id2label[label_id] if not label.hasInstances or label.ignoreInEval: continue anno = {} anno["iscrowd"] = instance_id < 1000 anno["category_id"] = label.id mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F") inds = np.nonzero(mask) ymin, ymax = inds[0].min(), inds[0].max() xmin, xmax = inds[1].min(), inds[1].max() anno["bbox"] = (xmin, ymin, xmax, ymax) if xmax <= xmin or ymax <= ymin: continue anno["bbox_mode"] = BoxMode.XYXY_ABS if to_polygons: # This conversion comes from D4809743 and D5171122, # when Mask-RCNN was first developed. contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[ -2 ] polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3] # opencv's can produce invalid polygons if len(polygons) == 0: continue anno["segmentation"] = polygons else: anno["segmentation"] = mask_util.encode(mask[:, :, None])[0] annos.append(anno) ret["annotations"] = annos return ret ================================================ FILE: prod_lib/engine/probe.py ================================================ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved from detectron2.structures import pairwise_iou class OpenMatchTrainerProbe: def __init__(self, cfg): self.BOX_AP = 0.5 self.NUM_CLASSES = cfg.MODEL.ROI_HEADS.NUM_CLASSES # self.bbox_stat_list = ['compute_fp_gtoutlier', 'compute_num_box', 'compute_ood_acc'] def bbox_stat(self, unlabel_gt, unlabel_pseudo, name, bbox_stat_list): stats = {} sum_gpu_names = [] for metric in bbox_stat_list: stats_per, sum_gpu_names_per = getattr( self, metric)(unlabel_gt, unlabel_pseudo, name) stats.update(stats_per) sum_gpu_names.extend(sum_gpu_names_per) return stats, sum_gpu_names def compute_fp_gtoutlier(self, unlabel_gt, unlabel_pseudo, name): num_gt_ood_object = 0 num_gt_fp_ood_object = 0 sum_iou = 0.0 sum_gpu_names = [] results = {} if len(unlabel_gt) != 0: for gt, pseudo in zip(unlabel_gt, unlabel_pseudo): # import pdb; pdb. set_trace() if name == "pred": pp_boxes = pseudo.pred_boxes elif name == "pseudo_conf" or name == "pseudo_ood": # filter predicted ood box when evaluating this metric pseudo = pseudo[pseudo.gt_classes != -1] pp_boxes = pseudo.gt_boxes else: raise ValueError("Unknown name for probe roi bbox.") if len(gt) != 0 and len(pseudo) != 0: max_iou, max_idx = pairwise_iou( gt.gt_boxes.to('cuda'), pp_boxes).max(1) ood_idx = (gt.gt_classes == -1) num_gt_ood_object += ood_idx.sum().item() num_gt_fp_ood_object += (max_iou[ood_idx] > self.BOX_AP).sum().item() sum_iou += max_iou[ood_idx].sum().item() elif len(gt) != 0 and len(pseudo) == 0: ood_idx = (gt.gt_classes == -1) num_gt_ood_object += ood_idx.shape[0] results = {'Analysis_'+name+'/num_gt_ood_object': num_gt_ood_object, 'Analysis_'+name+'/num_gt_fp_ood_object': num_gt_fp_ood_object, 'Analysis_'+name+'/sum_iou': sum_iou} sum_gpu_names.extend(list(results.keys())) return results, sum_gpu_names def compute_num_box(self, unlabel_gt, unlabel_pseudo, name): num_bbox = 0.0 size_bbox = 0.0 avg_conf = 0.0 # measure in and out box for openset SS-OD num_bbox_in = 0.0 num_bbox_out = 0.0 num_bg = 0.0 # when ground-truth is missing in unlabeled data if len(unlabel_gt) == 0: for pp_roi in unlabel_pseudo: if name == "pred": pp_boxes = pp_roi.pred_boxes pp_classes = pp_roi.pred_classes pp_scores = pp_roi.scores elif name == "pseudo_conf" or name == "pseudo_ood": pp_boxes = pp_roi.gt_boxes pp_classes = pp_roi.gt_classes pp_scores = pp_roi.scores elif name == "gt": pp_boxes = pp_roi.gt_boxes pp_classes = pp_roi.gt_classes else: raise ValueError("Unknown name for probe roi bbox.") # all boxes (in + out boxes) if len(pp_roi) != 0: # bbox number and size num_bbox += len(pp_roi) size_bbox += pp_boxes.area().mean().item() # average box confidence if name != "gt": avg_conf += pp_scores.mean() else: num_bbox += 0 size_bbox += torch.tensor(0).cuda() num_valid_img = len(unlabel_pseudo) else: # with ground-truth num_valid_img = 0 for gt, pp_roi in zip(unlabel_gt, unlabel_pseudo): if name == "pred": pp_boxes = pp_roi.pred_boxes pp_classes = pp_roi.pred_classes pp_scores = pp_roi.scores elif name == "pseudo_conf" or name == "pseudo_ood": # filter out ood pseudo-box when doing analysis pp_roi = pp_roi[pp_roi.gt_classes != -1] pp_boxes = pp_roi.gt_boxes pp_classes = pp_roi.gt_classes pp_scores = pp_roi.scores elif name == "gt": pp_boxes = pp_roi.gt_boxes pp_classes = pp_roi.gt_classes else: raise ValueError("Unknown name for probe roi bbox.") # all boxes (in + out boxes) if len(pp_roi) != 0: # bbox number and size num_bbox += len(pp_roi) size_bbox += pp_boxes.area().mean().item() # average box confidence if name != "gt": avg_conf += pp_scores.mean() else: num_bbox += 0 size_bbox += torch.tensor(0).cuda() # in and out class if name == "gt": pp_roi_in = pp_roi[pp_classes != -1] num_bbox_in += len(pp_roi_in) pp_roi_out = pp_roi[pp_classes == -1] num_bbox_out += len(pp_roi_out) num_valid_img += 1 elif name == "pred" or name == "pseudo_conf" or name == "pseudo_ood": if len(gt.gt_boxes.to('cuda'))>0 and len(pp_boxes) > 0: max_iou, max_idx = pairwise_iou(gt.gt_boxes.to('cuda'), pp_boxes).max(0) # for the ground-truth label for each pseudo-box gtclass4pseudo = gt.gt_classes[max_idx] matchgtbox = max_iou > 0.5 # compute the number of boxes (background, inlier, outlier) num_bg += (~matchgtbox).sum().item() num_bbox_in += (gtclass4pseudo[matchgtbox] != -1).sum().item() num_bbox_out += (gtclass4pseudo[matchgtbox] == -1).sum().item() num_valid_img += 1 else: raise ValueError("Unknown name for probe roi bbox.") box_probe = {} if num_valid_img >0 : box_probe["Analysis_" + name + "/Num_bbox"] = num_bbox / \ num_valid_img box_probe["Analysis_" + name + "/Size_bbox"] = size_bbox / \ num_valid_img box_probe["Analysis_" + name + "/Num_bbox_inlier"] = num_bbox_in / num_valid_img box_probe["Analysis_" + name + "/Num_bbox_outlier"] = num_bbox_out / num_valid_img if name != "gt": # prediciton, background number box_probe["Analysis_" + name + "/Conf"] = avg_conf / \ num_valid_img box_probe["Analysis_" + name + "/Num_bbox_background"] = num_bg / num_valid_img box_probe["Analysis_" + name + "/background_fp_ratio"] = num_bg / num_bbox box_probe["Analysis_" + name + "/background_tp_ratio"] = num_bbox_in / num_bbox else: box_probe["Analysis_" + name + "/Num_bbox"] = 0.0 box_probe["Analysis_" + name + "/Size_bbox"] = 0.0 box_probe["Analysis_" + name + "/Num_bbox_inlier"] = 0.0 box_probe["Analysis_" + name + "/Num_bbox_outlier"] = 0.0 if name != "gt": # prediciton, background number box_probe["Analysis_" + name + "/Conf"] = 0.0 box_probe["Analysis_" + name + "/Num_bbox_background"] = 0.0 box_probe["Analysis_" + name + "/background_fp_ratio"] = num_bg / num_bbox box_probe["Analysis_" + name + "/background_tp_ratio"] = num_bbox_in / num_bbox return box_probe, [] def compute_ood_acc(self, unlabel_gt, unlabel_pseudo, name, BOX_IOU=0.5): results = {} sum_gpu_names = [] if len(unlabel_gt) != 0: for metric in ['acc_outlier', 'recall_outlier']: for samples in ['_fg', '_all']: for fraction_part in ['_nume', '_deno']: results[metric+samples+fraction_part] = 0.0 for gt, pred in zip(unlabel_gt, unlabel_pseudo): if name == "pred": pp_boxes = pred.pred_boxes pp_ood_scores = pred.ood_scores elif name == "pseudo_conf" or name == "pseudo_ood": # assume these outlier are suppressed pred = pred[pred.gt_classes != -1] pp_boxes = pred.gt_boxes pp_ood_scores = pred.ood_scores else: raise ValueError("Unknown name for probe roi bbox.") if len(gt) != 0 and len(pred) != 0: # find the most overlapped ground-truth box for each pseudo-box max_iou, max_idx = pairwise_iou( gt.gt_boxes.to('cuda'), pp_boxes).max(0) # ignore background instances find_fg_mask = max_iou > BOX_IOU if find_fg_mask.sum() > 0: gt_corres = gt[max_idx].gt_classes.to("cuda") gt_outlier = (gt_corres[find_fg_mask] == -1) pred_outlier = pp_ood_scores[find_fg_mask][:, 0] > 0.5 # accurcay of ood detection (foreground) # acc_outlier_fg = (pred_outlier == gt_outlier).sum() /find_fg_mask.sum() results['acc_outlier_fg_nume'] += ( pred_outlier == gt_outlier).sum() results['acc_outlier_fg_deno'] += find_fg_mask.sum() # recall of ood detection (foreground) # recall_outlier_fg = (pred_outlier[gt_outlier] == gt_outlier[gt_outlier]).sum() /gt_outlier.sum() results['recall_outlier_fg_nume'] += ( pred_outlier[gt_outlier] == gt_outlier[gt_outlier]).sum() results['recall_outlier_fg_deno'] += gt_outlier.sum() # Regard backgound gt as outlier gt_corres = gt[max_idx].gt_classes.to("cuda") # convert all background gt as outlier gt_corres[~find_fg_mask] = -1 gt_outlier = gt_corres == -1 pred_outlier = pp_ood_scores[:, 0] > 0.5 # accurcay of ood detection (all) # acc_outlier_all = (pred_outlier == gt_outlier).sum() /len(pred) results['acc_outlier_all_nume'] += ( pred_outlier == gt_outlier).sum() results['acc_outlier_all_deno'] += len(pred) # recall of ood detection (all) # recall_outlier_all = (pred_outlier[gt_outlier] == gt_outlier[gt_outlier]).sum() /gt_outlier.sum() results['recall_outlier_all_nume'] += ( pred_outlier[gt_outlier] == gt_outlier[gt_outlier]).sum() results['recall_outlier_all_deno'] += gt_outlier.sum() results = {'Analysis_'+name+'/'+k: v for k, v in results.items()} sum_gpu_names.extend(list(results.keys())) return results, sum_gpu_names ================================================ FILE: prod_lib/engine/trainer.py ================================================ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import logging import time from collections import OrderedDict from typing import Dict import detectron2.utils.comm as comm import numpy as np import torch from detectron2.engine import SimpleTrainer from detectron2.structures import BitMasks, Boxes, Instances, Keypoints from detectron2.utils.events import get_event_storage from d2go.projects.unbiased_teacher.engine.trainer import UnbiasedTeacherTrainer from d2go.projects.unbiased_teacher.utils.probe import probe import copy logger = logging.getLogger(__name__) class DAobjTrainer(UnbiasedTeacherTrainer): """ A trainer for Teacher-Student mutual learning following this paper: "Unbiased Teacher for Semi-Supervised Object Detection" It assumes that every step, you: For Teacher: 1. Perform a forward pass on a weakly augmented unlabeled data from the data_loader. 2. Generate pseudo-labels on the weakly augmented unlabeled data For Student: 1. Perform a forward pass on a strongly augmented unlabeled data from the data_loader. 2. Perform a forward pass on a labeled data from the data_loader. 1. Use pseudo-labels generated from the Teacher as target and compute the loss on a strongly augmented unlabeled data 2. Compute the gradients with the above losses on labeled and unlabeled data. 3. Update the Student model with the optimizer. 4. EMA update the Teacher model """ # def __init__(self, cfg, model, model_teacher, data_loader, optimizer): # """ # Args: # model: a torch Module. Takes a data from data_loader and returns a # dict of losses. # data_loader: an iterable. Contains data to be used to call model. # optimizer: a torch optimizer. # """ # super().__init__(model, data_loader, optimizer) # self.cfg = cfg # self.model_teacher = model_teacher def run_step(self): assert ( self.model.training ), "Student model was changed to eval mode during training" start = time.perf_counter() data = next(self._data_loader_iter) # q (queue): strongly augmented, k (key): weakly augmented #TODO Need to further use the weak samples for domain adaptation label_data_q, label_data_k, unlabel_data_q, unlabel_data_k = data data_time = time.perf_counter() - start if ( self.cfg.UNBIASEDTEACHER.BURN_IN_STEP != 0 and self.iter < self.cfg.UNBIASEDTEACHER.BURN_IN_STEP ): # Burn-In stage. Supervisedly train the Student model. losses, loss_dict, record_dict = self.burn_in(label_data_q, label_data_k) else: # Copy the Student model to the Teacher (using keep_rate = 0) if self.iter == self.cfg.UNBIASEDTEACHER.BURN_IN_STEP: logger.info("Copying Student weights to the Teacher .....") self._update_teacher_model(keep_rate=0.0) elif ( self.iter - self.cfg.UNBIASEDTEACHER.BURN_IN_STEP ) % self.cfg.UNBIASEDTEACHER.TEACHER_UPDATE_ITER == 0: self._update_teacher_model( keep_rate=self.cfg.UNBIASEDTEACHER.EMA.KEEP_RATE ) # Teacher-Student Mutual Learning losses, loss_dict, record_dict = self.teacher_student_learning( label_data_q, label_data_k, unlabel_data_q, unlabel_data_k ) self.optimizer.zero_grad() losses.backward() self._write_metrics(record_dict, data_time) """ If you need gradient clipping/scaling or other processing, you can wrap the optimizer with your custom `step()` method. But it is suboptimal as explained in https://arxiv.org/abs/2006.15704 Sec 3.2.4 """ self.optimizer.step() def burn_in(self, label_data_q, label_data_k): """ Perform Burn-In stage with labeled data """ # combine label_data_q + label_data_k label_data_q.extend(label_data_k) record_dict, _, _, _ = self.model(label_data_q, branch="supervised") # weight losses loss_dict = self.weight_losses(record_dict) losses = sum(loss_dict.values()) return losses, loss_dict, record_dict def teacher_student_learning( self, label_data_q, label_data_k, unlabel_data_q, unlabel_data_k ): """ Perform Teacher-Student Mutual Learning with labeled and unlabeled data """ # q (queue): strongly augmented, k (key): weakly augmented record_dict = {} ######################## For probe ################################# # import pdb; pdb. set_trace() gt_unlabel_k = self.get_label(unlabel_data_k) # 0. remove potential ground-truth labels in the unlabeled data unlabel_data_q = self.remove_label(unlabel_data_q) unlabel_data_k = self.remove_label(unlabel_data_k) # 1. generate the pseudo-label using teacher model # TODO: why is the Teacher not in .eval() mode? with torch.no_grad(): ( _, proposals_rpn_unsup_k, proposals_roih_unsup_k, _, ) = self.model_teacher(unlabel_data_k, branch="unsup_data_weak") ######################## For probe ################################# # import pdb; pdb. set_trace() # analysis_pred, _ = self.probe.compute_num_box(gt_unlabel_k,proposals_roih_unsup_k,'pred') # record_dict.update(analysis_pred) # 2. Pseudo-labeling # Pseudo-labeling for RPN head (bbox location/objectness) joint_proposal_dict = {} ## No need this joint_proposal_dict["proposals_rpn"] = proposals_rpn_unsup_k ( pesudo_proposals_rpn_unsup_k, nun_pseudo_bbox_rpn, ) = self.process_pseudo_label( proposals_rpn_unsup_k, self.cfg.UNBIASEDTEACHER.BBOX_THRESHOLD, self.cfg.UNBIASEDTEACHER.MASK_THRESHOLD, self.cfg.UNBIASEDTEACHER.KEYPOINT_THRESHOLD, "rpn", "thresholding", ) joint_proposal_dict["proposals_pseudo_rpn"] = pesudo_proposals_rpn_unsup_k ## No need this end # Pseudo-labeling for ROI head (bbox location/objectness) pesudo_proposals_roih_unsup_k, _ = self.process_pseudo_label( proposals_roih_unsup_k, self.cfg.UNBIASEDTEACHER.BBOX_THRESHOLD, self.cfg.UNBIASEDTEACHER.MASK_THRESHOLD, self.cfg.UNBIASEDTEACHER.KEYPOINT_THRESHOLD, "roih", "thresholding", ) joint_proposal_dict["proposals_pseudo_roih"] = pesudo_proposals_roih_unsup_k ######################## For probe ################################# analysis_pred, _ = self.probe.compute_num_box(gt_unlabel_k,pesudo_proposals_roih_unsup_k,'pred') record_dict.update(analysis_pred) # Probe for analysis (usually for research development) if self.cfg.UNBIASEDTEACHER.PROBE: record_dict = probe( self.cfg, proposals_roih_unsup_k, unlabel_data_k, pesudo_proposals_roih_unsup_k, record_dict, ) # 3. add pseudo-label to unlabeled data unlabel_data_q = self.add_label( unlabel_data_q, joint_proposal_dict["proposals_pseudo_roih"] ) unlabel_data_k = self.add_label( unlabel_data_k, joint_proposal_dict["proposals_pseudo_roih"] ) # all_label_data = label_data_q + label_data_k if self.cfg.UNBIASEDTEACHER.ISAUG == "No": all_label_data = label_data_k all_unlabel_data = unlabel_data_k else: all_label_data = label_data_q + label_data_k all_unlabel_data = unlabel_data_q # 4. input both strongly and weakly augmented labeled data into student model # all_unlabel_data = unlabel_data_q record_all_label_data, _, _, _ = self.model(all_label_data, branch="supervised") record_dict.update(record_all_label_data) # 5. input strongly augmented unlabeled data into model record_all_unlabel_data, _, _, _ = self.model( all_unlabel_data, branch="supervised-pseudo" ) # rename unsupervised loss # NOTE: names of the recorded output from model are hard-coded # we rename them accordingly for unlabeled data new_record_all_unlabel_data = {} for key in record_all_unlabel_data.keys(): new_record_all_unlabel_data[key + "_pseudo"] = record_all_unlabel_data[key] record_dict.update(new_record_all_unlabel_data) # 6. input weakly labeled data (source) and weakly unlabeled data (target) to student model # give sign to the target data for i_index in range(len(unlabel_data_k)): # unlabel_data_item = {} for k, v in unlabel_data_k[i_index].items(): # label_data_k[i_index][k + "_unlabeled"] = v label_data_k[i_index][k + "_unlabeled"] = v # unlabel_data_k[i_index] = unlabel_data_item all_domain_data = label_data_k # all_domain_data = label_data_k + unlabel_data_k record_all_domain_data, _, _, _ = self.model(all_domain_data, branch="domain") record_dict.update(record_all_domain_data) # 7. distill teacher # for distill back to teacher with torch.no_grad(): ( _, proposals_rpn_unsup_dis, proposals_roih_unsup_dis, _, ) = self.model(unlabel_data_k, branch="unsup_data_weak") pesudo_proposals_roih_unsup_k, _ = self.process_pseudo_label( proposals_roih_unsup_dis, self.cfg.UNBIASEDTEACHER.BBOX_THRESHOLD, self.cfg.UNBIASEDTEACHER.MASK_THRESHOLD, self.cfg.UNBIASEDTEACHER.KEYPOINT_THRESHOLD, "roih", "thresholding", ) unlabel_data_k = self.remove_label(unlabel_data_k) unlabel_data_k = self.add_label( unlabel_data_k, pesudo_proposals_roih_unsup_k ) record_distill_data, _, _, _ = self.model_teacher( unlabel_data_k, branch="supervised-pseudo" ) new_record_all_distill_data = {} for key in record_distill_data.keys(): new_record_all_distill_data[key + "_distill"] = record_distill_data[key] record_dict.update(new_record_all_distill_data) # weighting losses loss_dict = self.weight_losses(record_dict) #Add discriminator loss here #loss_dict.update(...) losses = sum(loss_dict.values()) return losses, loss_dict, record_dict def weight_losses(self, record_dict): loss_dict = {} REGRESSION_LOSS_WEIGHT = 0 for key in record_dict.keys(): if key.startswith("loss"): if key == "loss_rpn_cls_pseudo": loss_dict[key] = ( record_dict[key] * self.cfg.UNBIASEDTEACHER.UNSUP_LOSS_WEIGHT_RPN_CLS ) elif ( key == "loss_rpn_loc_pseudo" or key == "loss_box_reg_pseudo" ): # set pseudo bbox regression to 0 loss_dict[key] = record_dict[key] * REGRESSION_LOSS_WEIGHT elif ( key == "loss_rpn_loc_distill" or key == "loss_box_reg_distill" ): # set pseudo bbox regression to 0 loss_dict[key] = record_dict[key] * REGRESSION_LOSS_WEIGHT elif key.endswith("mask_pseudo"): # unsupervised loss for segmentation loss_dict[key] = ( record_dict[key] * self.cfg.UNBIASEDTEACHER.UNSUP_LOSS_WEIGHT_MASK ) elif key.endswith("keypoint_pseudo"): # unsupervised loss for keypoint loss_dict[key] = ( record_dict[key] * self.cfg.UNBIASEDTEACHER.UNSUP_LOSS_WEIGHT_KEYPOINT ) elif key.endswith("pseudo"): # unsupervised loss loss_dict[key] = ( record_dict[key] * self.cfg.UNBIASEDTEACHER.UNSUP_LOSS_WEIGHT ) elif ( key == "loss_D_img_s" or key == "loss_D_img_t" ): # set weight for discriminator # import pdb # pdb.set_trace() loss_dict[key] = record_dict[key] * self.cfg.UNBIASEDTEACHER.DIS_LOSS_WEIGHT #Need to modify defaults and yaml else: # supervised loss loss_dict[key] = record_dict[key] * 1 return loss_dict def threshold_bbox( self, proposal_bbox_inst, thres=0.7, mask_thres=0.5, keypoint_thres=0.5, proposal_type="roih", ): if proposal_type == "rpn": valid_map = proposal_bbox_inst.objectness_logits > thres # create instances containing boxes and gt_classes image_shape = proposal_bbox_inst.image_size new_proposal_inst = Instances(image_shape) # create box new_bbox_loc = proposal_bbox_inst.proposal_boxes.tensor[valid_map, :] new_boxes = Boxes(new_bbox_loc) # add boxes to instances new_proposal_inst.gt_boxes = new_boxes new_proposal_inst.pred_boxes = new_boxes new_proposal_inst.objectness_logits = proposal_bbox_inst.objectness_logits[ valid_map ] elif proposal_type == "roih": valid_map = proposal_bbox_inst.scores > thres # create instances containing boxes and gt_classes image_shape = proposal_bbox_inst.image_size new_proposal_inst = Instances(image_shape) # create box new_bbox_loc = proposal_bbox_inst.pred_boxes.tensor[valid_map, :] new_boxes = Boxes(new_bbox_loc) # add boxes to instances new_proposal_inst.gt_boxes = new_boxes new_proposal_inst.pred_boxes = new_boxes new_proposal_inst.gt_classes = proposal_bbox_inst.pred_classes[valid_map] new_proposal_inst.pred_classes = proposal_bbox_inst.pred_classes[valid_map] new_proposal_inst.scores = proposal_bbox_inst.scores[valid_map] if self.cfg.MODEL.MASK_ON and new_boxes: # put predicted output into gt_masks with thresholding new_masks = proposal_bbox_inst.pred_masks[valid_map].squeeze(1) new_masks = new_masks >= mask_thres new_proposal_inst.gt_masks = BitMasks(new_masks) if self.cfg.MODEL.KEYPOINT_ON and new_boxes: # we use the keypoint score as the basis for thresholding new_keypoints = proposal_bbox_inst.pred_keypoints[valid_map, :] invalid_keypoints = new_keypoints[:, :, 2] < keypoint_thres # (x, y, visibility): visibility flag = 0 -> not labeled (in which case x=y=0) new_keypoints[invalid_keypoints] = torch.FloatTensor([0, 0, 0]).to( new_keypoints.device ) new_proposal_inst.gt_keypoints = Keypoints(new_keypoints) return new_proposal_inst ================================================ FILE: prod_lib/evaluation/__init__.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. from .coco_evaluation import COCOEvaluator from .pascal_voc_evaluation import PascalVOCDetectionEvaluator # __all__ = [k for k in globals().keys() if not k.startswith("_")] __all__ = [ "COCOEvaluator", "PascalVOCDetectionEvaluator" ] ================================================ FILE: prod_lib/evaluation/coco_evaluation.py ================================================ # Copyright (c) Facebook, Inc. and its affiliates. import contextlib import copy import io import itertools import json import logging import numpy as np import os import pickle from collections import OrderedDict import pycocotools.mask as mask_util import torch from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval from tabulate import tabulate import detectron2.utils.comm as comm from detectron2.config import CfgNode from detectron2.data import MetadataCatalog from detectron2.data.datasets.coco import convert_to_coco_dict from detectron2.evaluation.fast_eval_api import COCOeval_opt from detectron2.structures import Boxes, BoxMode, pairwise_iou from detectron2.utils.file_io import PathManager from detectron2.utils.logger import create_small_table from detectron2.evaluation import DatasetEvaluator from iopath.common.file_io import file_lock logger = logging.getLogger(__name__) def convert_to_coco_json(dataset_name, output_file, allow_cached=True): """ Converts dataset into COCO format and saves it to a json file. dataset_name must be registered in DatasetCatalog and in detectron2's standard format. Args: dataset_name: reference from the config file to the catalogs must be registered in DatasetCatalog and in detectron2's standard format output_file: path of json file that will be saved to allow_cached: if json file is already present then skip conversion """ # TODO: The dataset or the conversion script *may* change, # a checksum would be useful for validating the cached data PathManager.mkdirs(os.path.dirname(output_file)) with file_lock(output_file): if PathManager.exists(output_file) and allow_cached: logger.warning( f"Using previously cached COCO format annotations at '{output_file}'. " "You need to clear the cache file if your dataset has been modified." ) else: logger.info(f"Converting annotations of dataset '{dataset_name}' to COCO format ...)") coco_dict = convert_to_coco_dict(dataset_name) logger.info(f"Caching COCO format annotations at '{output_file}' ...") tmp_file = output_file #+ ".tmp" # with PathManager.open(tmp_file, "w") as f: # json.dump(coco_dict, f) # shutil.move(tmp_file, output_file) with PathManager.open(tmp_file, "w") as f: json.dump(coco_dict, f) class COCOEvaluator(DatasetEvaluator): """ Evaluate AR for object proposals, AP for instance detection/segmentation, AP for keypoint detection outputs using COCO's metrics. See http://cocodataset.org/#detection-eval and http://cocodataset.org/#keypoints-eval to understand its metrics. The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means the metric cannot be computed (e.g. due to no predictions made). In addition to COCO, this evaluator is able to support any bounding box detection, instance segmentation, or keypoint detection dataset. """ def __init__( self, dataset_name, tasks=None, distributed=True, output_dir=None, *, use_fast_impl=True, kpt_oks_sigmas=(), ): """ Args: dataset_name (str): name of the dataset to be evaluated. It must have either the following corresponding metadata: "json_file": the path to the COCO format annotation Or it must be in detectron2's standard dataset format so it can be converted to COCO format automatically. tasks (tuple[str]): tasks that can be evaluated under the given configuration. A task is one of "bbox", "segm", "keypoints". By default, will infer this automatically from predictions. distributed (True): if True, will collect results from all ranks and run evaluation in the main process. Otherwise, will only evaluate the results in the current process. output_dir (str): optional, an output directory to dump all results predicted on the dataset. The dump contains two files: 1. "instances_predictions.pth" a file that can be loaded with `torch.load` and contains all the results in the format they are produced by the model. 2. "coco_instances_results.json" a json file in COCO's result format. use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP. Although the results should be very close to the official implementation in COCO API, it is still recommended to compute results with the official API for use in papers. The faster implementation also uses more RAM. kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval When empty, it will use the defaults in COCO. Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. """ self._logger = logging.getLogger(__name__) self._distributed = distributed self._output_dir = output_dir self._use_fast_impl = use_fast_impl if tasks is not None and isinstance(tasks, CfgNode): kpt_oks_sigmas = ( tasks.TEST.KEYPOINT_OKS_SIGMAS if not kpt_oks_sigmas else kpt_oks_sigmas ) self._logger.warn( "COCO Evaluator instantiated using config, this is deprecated behavior." " Please pass in explicit arguments instead." ) self._tasks = None # Infering it from predictions should be better else: self._tasks = tasks self._cpu_device = torch.device("cpu") self._metadata = MetadataCatalog.get(dataset_name) if not hasattr(self._metadata, "json_file"): self._logger.info( f"'{dataset_name}' is not registered by `register_coco_instances`." " Therefore trying to convert it to COCO format ..." ) cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json") self._metadata.json_file = cache_path convert_to_coco_json(dataset_name, cache_path) json_file = PathManager.get_local_path(self._metadata.json_file) with contextlib.redirect_stdout(io.StringIO()): self._coco_api = COCO(json_file) # Test set json files do not contain annotations (evaluation must be # performed using the COCO evaluation server). self._do_evaluation = "annotations" in self._coco_api.dataset if self._do_evaluation: self._kpt_oks_sigmas = kpt_oks_sigmas def reset(self): self._predictions = [] def process(self, inputs, outputs): """ Args: inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). It is a list of dict. Each dict corresponds to an image and contains keys like "height", "width", "file_name", "image_id". outputs: the outputs of a COCO model. It is a list of dicts with key "instances" that contains :class:`Instances`. """ for input, output in zip(inputs, outputs): prediction = {"image_id": input["image_id"]} if "instances" in output: instances = output["instances"].to(self._cpu_device) prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) if "proposals" in output: prediction["proposals"] = output["proposals"].to(self._cpu_device) if len(prediction) > 1: self._predictions.append(prediction) def evaluate(self, img_ids=None): """ Args: img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset """ if self._distributed: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions if len(predictions) == 0: self._logger.warning("[COCOEvaluator] Did not receive valid predictions.") return {} if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "instances_predictions.pth") with PathManager.open(file_path, "wb") as f: torch.save(predictions, f) self._results = OrderedDict() if "proposals" in predictions[0]: self._eval_box_proposals(predictions) if "instances" in predictions[0]: self._eval_predictions(predictions, img_ids=img_ids) # Copy so the caller can do whatever with results return copy.deepcopy(self._results) def _tasks_from_predictions(self, predictions): """ Get COCO API "tasks" (i.e. iou_type) from COCO-format predictions. """ tasks = {"bbox"} for pred in predictions: if "segmentation" in pred: tasks.add("segm") if "keypoints" in pred: tasks.add("keypoints") return sorted(tasks) def _eval_predictions(self, predictions, img_ids=None): """ Evaluate predictions. Fill self._results with the metrics of the tasks. """ self._logger.info("Preparing results for COCO format ...") coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) tasks = self._tasks or self._tasks_from_predictions(coco_results) # unmap the category ids for COCO if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) num_classes = len(all_contiguous_ids) assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1 reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()} for result in coco_results: category_id = result["category_id"] assert category_id < num_classes, ( f"A prediction has class={category_id}, " f"but the dataset only has {num_classes} classes and " f"predicted class id should be in [0, {num_classes - 1}]." ) result["category_id"] = reverse_id_mapping[category_id] if self._output_dir: file_path = os.path.join(self._output_dir, "coco_instances_results.json") self._logger.info("Saving results to {}".format(file_path)) with PathManager.open(file_path, "w") as f: f.write(json.dumps(coco_results)) f.flush() if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info( "Evaluating predictions with {} COCO API...".format( "unofficial" if self._use_fast_impl else "official" ) ) for task in sorted(tasks): assert task in {"bbox", "segm", "keypoints"}, f"Got unknown task: {task}!" coco_eval = ( _evaluate_predictions_on_coco( self._coco_api, coco_results, task, kpt_oks_sigmas=self._kpt_oks_sigmas, use_fast_impl=self._use_fast_impl, img_ids=img_ids, ) if len(coco_results) > 0 else None # cocoapi does not handle empty results very well ) res = self._derive_coco_results( coco_eval, task, class_names=self._metadata.get("thing_classes") ) self._results[task] = res def _eval_box_proposals(self, predictions): """ Evaluate the box proposals in predictions. Fill self._results with the metrics for "box_proposals" task. """ if self._output_dir: # Saving generated box proposals to file. # Predicted box_proposals are in XYXY_ABS mode. bbox_mode = BoxMode.XYXY_ABS.value ids, boxes, objectness_logits = [], [], [] for prediction in predictions: ids.append(prediction["image_id"]) boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) proposal_data = { "boxes": boxes, "objectness_logits": objectness_logits, "ids": ids, "bbox_mode": bbox_mode, } with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: pickle.dump(proposal_data, f) if not self._do_evaluation: self._logger.info("Annotations are not available for evaluation.") return self._logger.info("Evaluating bbox proposals ...") res = {} areas = {"all": "", "small": "s", "medium": "m", "large": "l"} for limit in [100, 1000]: for area, suffix in areas.items(): stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit) key = "AR{}@{:d}".format(suffix, limit) res[key] = float(stats["ar"].item() * 100) self._logger.info("Proposal metrics: \n" + create_small_table(res)) self._results["box_proposals"] = res def _derive_coco_results(self, coco_eval, iou_type, class_names=None): """ Derive the desired score numbers from summarized COCOeval. Args: coco_eval (None or COCOEval): None represents no predictions from model. iou_type (str): class_names (None or list[str]): if provided, will use it to predict per-category AP. Returns: a dict of {metric name: score} """ metrics = { "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"], "keypoints": ["AP", "AP50", "AP75", "APm", "APl"], }[iou_type] if coco_eval is None: self._logger.warn("No predictions from the model!") return {metric: float("nan") for metric in metrics} # the standard metrics results = { metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan") for idx, metric in enumerate(metrics) } self._logger.info( "Evaluation results for {}: \n".format(iou_type) + create_small_table(results) ) if not np.isfinite(sum(results.values())): self._logger.info("Some metrics cannot be computed and is shown as NaN.") if class_names is None or len(class_names) <= 1: return results # Compute per-category AP # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa precisions = coco_eval.eval["precision"] # precision has dims (iou, recall, cls, area range, max dets) assert len(class_names) == precisions.shape[2] results_per_category = [] for idx, name in enumerate(class_names): # area range index 0: all area ranges # max dets index -1: typically 100 per image precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] ap = np.mean(precision) if precision.size else float("nan") results_per_category.append(("{}".format(name), float(ap * 100))) # tabulate it N_COLS = min(6, len(results_per_category) * 2) results_flatten = list(itertools.chain(*results_per_category)) results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)]) table = tabulate( results_2d, tablefmt="pipe", floatfmt=".3f", headers=["category", "AP"] * (N_COLS // 2), numalign="left", ) self._logger.info("Per-category {} AP: \n".format(iou_type) + table) # results.update({"AP-" + name: ap for name, ap in results_per_category}) results_per_category_AP50 = [] for idx, name in enumerate(class_names): # area range index 0: all area ranges # max dets index -1: typically 100 per image t = np.where(.5 == coco_eval.params.iouThrs)[0] precisions_50 = precisions[t] precisions_50 = precisions_50[:, :, idx, 0, -1] precisions_50 = precisions_50[precisions_50 > -1] ap = np.mean(precisions_50) if precisions_50.size else float("nan") results_per_category_AP50.append(("{}".format(name), float(ap * 100))) # tabulate it N_COLS = min(6, len(results_per_category_AP50) * 2) results_flatten = list(itertools.chain(*results_per_category_AP50)) results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)]) table = tabulate( results_2d, tablefmt="pipe", floatfmt=".3f", headers=["category", "AP50"] * (N_COLS // 2), numalign="left", ) self._logger.info("Per-category {} AP50: \n".format(iou_type) + table) results.update({"AP50-" + name: ap for name, ap in results_per_category_AP50}) return results def instances_to_coco_json(instances, img_id): """ Dump an "Instances" object to a COCO-format json that's used for evaluation. Args: instances (Instances): img_id (int): the image id Returns: list[dict]: list of json annotations in COCO format. """ num_instance = len(instances) if num_instance == 0: return [] boxes = instances.pred_boxes.tensor.numpy() boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) boxes = boxes.tolist() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() has_mask = instances.has("pred_masks") if has_mask: # use RLE to encode the masks, because they are too large and takes memory # since this evaluator stores outputs of the entire dataset rles = [ mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in instances.pred_masks ] for rle in rles: # "counts" is an array encoded by mask_util as a byte-stream. Python3's # json writer which always produces strings cannot serialize a bytestream # unless you decode it. Thankfully, utf-8 works out (which is also what # the pycocotools/_mask.pyx does). rle["counts"] = rle["counts"].decode("utf-8") has_keypoints = instances.has("pred_keypoints") if has_keypoints: keypoints = instances.pred_keypoints results = [] for k in range(num_instance): result = { "image_id": img_id, "category_id": classes[k], "bbox": boxes[k], "score": scores[k], } if has_mask: result["segmentation"] = rles[k] if has_keypoints: # In COCO annotations, # keypoints coordinates are pixel indices. # However our predictions are floating point coordinates. # Therefore we subtract 0.5 to be consistent with the annotation format. # This is the inverse of data loading logic in `datasets/coco.py`. keypoints[k][:, :2] -= 0.5 result["keypoints"] = keypoints[k].flatten().tolist() results.append(result) return results # inspired from Detectron: # https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None): """ Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0 ** 2, 1e5 ** 2], # all [0 ** 2, 32 ** 2], # small [32 ** 2, 96 ** 2], # medium [96 ** 2, 1e5 ** 2], # large [96 ** 2, 128 ** 2], # 96-128 [128 ** 2, 256 ** 2], # 128-256 [256 ** 2, 512 ** 2], # 256-512 [512 ** 2, 1e5 ** 2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for prediction_dict in dataset_predictions: predictions = prediction_dict["proposals"] # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = predictions.objectness_logits.sort(descending=True)[1] predictions = predictions[inds] ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"]) anno = coco_api.loadAnns(ann_ids) gt_boxes = [ BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno if obj["iscrowd"] == 0 ] gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes gt_boxes = Boxes(gt_boxes) gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0]) if len(gt_boxes) == 0 or len(predictions) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if limit is not None and len(predictions) > limit: predictions = predictions[:limit] overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(predictions), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = ( torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) ) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, } def _evaluate_predictions_on_coco( coco_gt, coco_results, iou_type, kpt_oks_sigmas=None, use_fast_impl=True, img_ids=None ): """ Evaluate the coco results using COCOEval API. """ assert len(coco_results) > 0 if iou_type == "segm": coco_results = copy.deepcopy(coco_results) # When evaluating mask AP, if the results contain bbox, cocoapi will # use the box area as the area of the instance, instead of the mask area. # This leads to a different definition of small/medium/large. # We remove the bbox field to let mask AP use mask area. for c in coco_results: c.pop("bbox", None) coco_dt = coco_gt.loadRes(coco_results) coco_eval = (COCOeval_opt if use_fast_impl else COCOeval)(coco_gt, coco_dt, iou_type) if img_ids is not None: coco_eval.params.imgIds = img_ids if iou_type == "keypoints": # Use the COCO default keypoint OKS sigmas unless overrides are specified if kpt_oks_sigmas: assert hasattr(coco_eval.params, "kpt_oks_sigmas"), "pycocotools is too old!" coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas) # COCOAPI requires every detection and every gt to have keypoints, so # we just take the first entry from both num_keypoints_dt = len(coco_results[0]["keypoints"]) // 3 num_keypoints_gt = len(next(iter(coco_gt.anns.values()))["keypoints"]) // 3 num_keypoints_oks = len(coco_eval.params.kpt_oks_sigmas) assert num_keypoints_oks == num_keypoints_dt == num_keypoints_gt, ( f"[COCOEvaluator] Prediction contain {num_keypoints_dt} keypoints. " f"Ground truth contains {num_keypoints_gt} keypoints. " f"The length of cfg.TEST.KEYPOINT_OKS_SIGMAS is {num_keypoints_oks}. " "They have to agree with each other. For meaning of OKS, please refer to " "http://cocodataset.org/#keypoints-eval." ) coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() return coco_eval ================================================ FILE: prod_lib/evaluation/pascal_voc_evaluation.py ================================================ # -*- coding: utf-8 -*- # Copyright (c) Facebook, Inc. and its affiliates. import logging import numpy as np import os import tempfile import xml.etree.ElementTree as ET from collections import OrderedDict, defaultdict from functools import lru_cache import torch from detectron2.data import MetadataCatalog from detectron2.utils import comm from detectron2.utils.file_io import PathManager from detectron2.evaluation import DatasetEvaluator class PascalVOCDetectionEvaluator(DatasetEvaluator): """ Evaluate Pascal VOC style AP for Pascal VOC dataset. It contains a synchronization, therefore has to be called from all ranks. Note that the concept of AP can be implemented in different ways and may not produce identical results. This class mimics the implementation of the official Pascal VOC Matlab API, and should produce similar but not identical results to the official API. """ def __init__(self, dataset_name, target_classnames=None): """ Args: dataset_name (str): name of the dataset, e.g., "voc_2007_test" """ self._dataset_name = dataset_name meta = MetadataCatalog.get(dataset_name) # Too many tiny files, download all to local for speed. annotation_dir_local = PathManager.get_local_path( os.path.join(meta.dirname, "Annotations/") ) self._anno_file_template = os.path.join(annotation_dir_local, "{}.xml") self._image_set_path = os.path.join(meta.dirname, "ImageSets", "Main", meta.split + ".txt") self._class_names = meta.thing_classes assert meta.year in [2007, 2012], meta.year self._is_2007 = meta.year == 2007 self._cpu_device = torch.device("cpu") self._logger = logging.getLogger(__name__) if target_classnames == None: self.target_classnames = self._class_names else: self.target_classnames = target_classnames def reset(self): self._predictions = defaultdict(list) # class name -> list of prediction strings def process(self, inputs, outputs): for input, output in zip(inputs, outputs): image_id = input["image_id"] instances = output["instances"].to(self._cpu_device) boxes = instances.pred_boxes.tensor.numpy() scores = instances.scores.tolist() classes = instances.pred_classes.tolist() for box, score, cls in zip(boxes, scores, classes): xmin, ymin, xmax, ymax = box # The inverse of data loading logic in `datasets/pascal_voc.py` xmin += 1 ymin += 1 self._predictions[cls].append( f"{image_id} {score:.3f} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}" ) def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". """ all_predictions = comm.gather(self._predictions, dst=0) if not comm.is_main_process(): return predictions = defaultdict(list) for predictions_per_rank in all_predictions: for clsid, lines in predictions_per_rank.items(): predictions[clsid].extend(lines) del all_predictions self._logger.info( "Evaluating {} using {} metric. " "Note that results do not use the official Matlab API.".format( self._dataset_name, 2007 if self._is_2007 else 2012 ) ) with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: res_file_template = os.path.join(dirname, "{}.txt") aps = defaultdict(list) # iou -> ap per class for cls_id, cls_name in enumerate(self._class_names): if cls_name not in self.target_classnames: continue lines = predictions.get(cls_id, [""]) with open(res_file_template.format(cls_name), "w") as f: f.write("\n".join(lines)) for thresh in range(50, 100, 5): rec, prec, ap = voc_eval( res_file_template, self._anno_file_template, self._image_set_path, cls_name, ovthresh=thresh / 100.0, use_07_metric=self._is_2007, ) aps[thresh].append(ap * 100) ret = OrderedDict() mAP = {iou: np.mean(x) for iou, x in aps.items()} ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]} #Add the codes for AP50 for idx, name in enumerate(self.target_classnames): ret["bbox"].update({"AP50-" + name: aps[50][idx]}) return ret ############################################################################## # # Below code is modified from # https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py # -------------------------------------------------------- # Fast/er R-CNN # Licensed under The MIT License [see LICENSE for details] # Written by Bharath Hariharan # -------------------------------------------------------- """Python implementation of the PASCAL VOC devkit's AP evaluation code.""" @lru_cache(maxsize=None) def parse_rec(filename): """Parse a PASCAL VOC xml file.""" with PathManager.open(filename) as f: tree = ET.parse(f) objects = [] for obj in tree.findall("object"): obj_struct = {} obj_struct["name"] = obj.find("name").text obj_struct["pose"] = obj.find("pose").text obj_struct["truncated"] = int(obj.find("truncated").text) obj_struct["difficult"] = int(obj.find("difficult").text) bbox = obj.find("bndbox") obj_struct["bbox"] = [ int(bbox.find("xmin").text), int(bbox.find("ymin").text), int(bbox.find("xmax").text), int(bbox.find("ymax").text), ] objects.append(obj_struct) return objects def voc_ap(rec, prec, use_07_metric=False): """Compute VOC AP given precision and recall. If use_07_metric is true, uses the VOC 07 11-point method (default:False). """ if use_07_metric: # 11 point metric ap = 0.0 for t in np.arange(0.0, 1.1, 0.1): if np.sum(rec >= t) == 0: p = 0 else: p = np.max(prec[rec >= t]) ap = ap + p / 11.0 else: # correct AP calculation # first append sentinel values at the end mrec = np.concatenate(([0.0], rec, [1.0])) mpre = np.concatenate(([0.0], prec, [0.0])) # compute the precision envelope for i in range(mpre.size - 1, 0, -1): mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) # to calculate area under PR curve, look for points # where X axis (recall) changes value i = np.where(mrec[1:] != mrec[:-1])[0] # and sum (\Delta recall) * prec ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) return ap def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False): """rec, prec, ap = voc_eval(detpath, annopath, imagesetfile, classname, [ovthresh], [use_07_metric]) Top level function that does the PASCAL VOC evaluation. detpath: Path to detections detpath.format(classname) should produce the detection results file. annopath: Path to annotations annopath.format(imagename) should be the xml annotations file. imagesetfile: Text file containing the list of images, one image per line. classname: Category name (duh) [ovthresh]: Overlap threshold (default = 0.5) [use_07_metric]: Whether to use VOC07's 11 point AP computation (default False) """ # assumes detections are in detpath.format(classname) # assumes annotations are in annopath.format(imagename) # assumes imagesetfile is a text file with each line an image name # first load gt # read list of images with PathManager.open(imagesetfile, "r") as f: lines = f.readlines() imagenames = [x.strip() for x in lines] # load annots recs = {} for imagename in imagenames: recs[imagename] = parse_rec(annopath.format(imagename)) # extract gt objects for this class class_recs = {} npos = 0 for imagename in imagenames: R = [obj for obj in recs[imagename] if obj["name"] == classname] bbox = np.array([x["bbox"] for x in R]) difficult = np.array([x["difficult"] for x in R]).astype(np.bool) # difficult = np.array([False for x in R]).astype(np.bool) # treat all "difficult" as GT det = [False] * len(R) npos = npos + sum(~difficult) class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det} # read dets detfile = detpath.format(classname) with open(detfile, "r") as f: lines = f.readlines() splitlines = [x.strip().split(" ") for x in lines] image_ids = [x[0] for x in splitlines] confidence = np.array([float(x[1]) for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4) # sort by confidence sorted_ind = np.argsort(-confidence) BB = BB[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] # go down dets and mark TPs and FPs nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf BBGT = R["bbox"].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1.0, 0.0) ih = np.maximum(iymax - iymin + 1.0, 0.0) inters = iw * ih # union uni = ( (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0) + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0) - inters ) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not R["difficult"][jmax]: if not R["det"][jmax]: tp[d] = 1.0 R["det"][jmax] = 1 else: fp[d] = 1.0 else: fp[d] = 1.0 # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = voc_ap(rec, prec, use_07_metric) return rec, prec, ap ================================================ FILE: prod_lib/modeling/daobj_rcnn.py ================================================ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import numpy as np import torch import torch.nn as nn from torch.nn import functional as F from detectron2.data.detection_utils import convert_image_to_rgb from detectron2.modeling import META_ARCH_REGISTRY, GeneralizedRCNN from detectron2.utils.events import get_event_storage import logging from typing import Dict, Tuple, List, Optional from collections import OrderedDict from detectron2.config import configurable # from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY # from detectron2.modeling.meta_arch.rcnn import GeneralizedRCNN from detectron2.modeling.proposal_generator import build_proposal_generator from detectron2.modeling.backbone import build_backbone, Backbone from detectron2.modeling.roi_heads import build_roi_heads from detectron2.utils.events import get_event_storage from detectron2.structures import ImageList ############### Image discriminator ############## class FCDiscriminator_img(nn.Module): def __init__(self, num_classes, ndf1=256, ndf2=128): super(FCDiscriminator_img, self).__init__() self.conv1 = nn.Conv2d(num_classes, ndf1, kernel_size=3, padding=1) self.conv2 = nn.Conv2d(ndf1, ndf2, kernel_size=3, padding=1) self.conv3 = nn.Conv2d(ndf2, ndf2, kernel_size=3, padding=1) self.classifier = nn.Conv2d(ndf2, 1, kernel_size=3, padding=1) self.leaky_relu = nn.LeakyReLU(negative_slope=0.2, inplace=True) def forward(self, x): x = self.conv1(x) x = self.leaky_relu(x) x = self.conv2(x) x = self.leaky_relu(x) x = self.conv3(x) x = self.leaky_relu(x) x = self.classifier(x) return x ################################# ################ Gradient reverse function class GradReverse(torch.autograd.Function): @staticmethod def forward(ctx, x): return x.view_as(x) @staticmethod def backward(ctx, grad_output): return grad_output.neg() def grad_reverse(x): return GradReverse.apply(x) ####################### @META_ARCH_REGISTRY.register() class DAobjTwoStagePseudoLabGeneralizedRCNN(GeneralizedRCNN): @configurable def __init__( self, *, backbone: Backbone, proposal_generator: nn.Module, roi_heads: nn.Module, pixel_mean: Tuple[float], pixel_std: Tuple[float], input_format: Optional[str] = None, vis_period: int = 0, dis_type: str, # dis_loss_weight: float = 0, ): """ Args: backbone: a backbone module, must follow detectron2's backbone interface proposal_generator: a module that generates proposals using backbone features roi_heads: a ROI head that performs per-region computation pixel_mean, pixel_std: list or tuple with #channels element, representing the per-channel mean and std to be used to normalize the input image input_format: describe the meaning of channels of input. Needed by visualization vis_period: the period to run visualization. Set to 0 to disable. """ super(GeneralizedRCNN, self).__init__() self.backbone = backbone self.proposal_generator = proposal_generator self.roi_heads = roi_heads self.input_format = input_format self.vis_period = vis_period if vis_period > 0: assert input_format is not None, "input_format is required for visualization!" self.register_buffer("pixel_mean", torch.tensor(pixel_mean).view(-1, 1, 1), False) self.register_buffer("pixel_std", torch.tensor(pixel_std).view(-1, 1, 1), False) assert ( self.pixel_mean.shape == self.pixel_std.shape ), f"{self.pixel_mean} and {self.pixel_std} have different shapes!" # @yujheli: you may need to build your discriminator here self.dis_type = dis_type # self.D_img = FCDiscriminator_img(self.backbone._out_feature_channels['res4']) # Need to know the channel if self.dis_type == "multi": self.D_img_dict = {} for k,v in self.backbone._out_feature_channels.items(): self.D_img_dict[k] = FCDiscriminator_img(v) self.add_module("D_"+k, self.D_img_dict[k]) else: self.D_img = FCDiscriminator_img(self.backbone._out_feature_channels[self.dis_type]) # Need to know the channel # self.bceLoss_func = nn.BCEWithLogitsLoss() @classmethod def from_config(cls, cfg): backbone = build_backbone(cfg) return { "backbone": backbone, "proposal_generator": build_proposal_generator(cfg, backbone.output_shape()), "roi_heads": build_roi_heads(cfg, backbone.output_shape()), "input_format": cfg.INPUT.FORMAT, "vis_period": cfg.VIS_PERIOD, "pixel_mean": cfg.MODEL.PIXEL_MEAN, "pixel_std": cfg.MODEL.PIXEL_STD, "dis_type": cfg.UNBIASEDTEACHER.DIS_TYPE, # "dis_loss_ratio": cfg.xxx, } def preprocess_image_train(self, batched_inputs: List[Dict[str, torch.Tensor]]): """ Normalize, pad and batch the input images. """ images = [x["image"].to(self.device) for x in batched_inputs] images = [(x - self.pixel_mean) / self.pixel_std for x in images] images = ImageList.from_tensors(images, self.backbone.size_divisibility) images_t = [x["image_unlabeled"].to(self.device) for x in batched_inputs] images_t = [(x - self.pixel_mean) / self.pixel_std for x in images_t] images_t = ImageList.from_tensors(images_t, self.backbone.size_divisibility) return images, images_t def forward( self, batched_inputs, branch="supervised", given_proposals=None, val_mode=False ): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances (optional): groundtruth :class:`Instances` * proposals (optional): :class:`Instances`, precomputed proposals. Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: list[dict]: Each dict is the output for one input image. The dict contains one key "instances" whose value is a :class:`Instances`. The :class:`Instances` object has the following keys: "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints" """ if (not self.training) and (not val_mode): # only conduct when testing mode return self.inference(batched_inputs) if branch == "domain": source_label = 0 target_label = 1 # images = self.preprocess_image(batched_inputs) images_s, images_t = self.preprocess_image_train(batched_inputs) features = self.backbone(images_s.tensor) # import pdb # pdb.set_trace() if self.dis_type == "multi": loss_D_img_s = 0 for k, v in features.items(): features_s = grad_reverse(v) D_img_out_s = self.D_img_dict[k](features_s) loss_D_img_s += F.binary_cross_entropy_with_logits(D_img_out_s, torch.FloatTensor(D_img_out_s.data.size()).fill_(source_label).to(self.device)) loss_D_img_s /= len(features) # features_s = grad_reverse(torch.cat((features['p2'],features['p3'],features['p4'],features['p5']),dim=1)) else: features_s = grad_reverse(features[self.dis_type]) D_img_out_s = self.D_img(features_s) loss_D_img_s = F.binary_cross_entropy_with_logits(D_img_out_s, torch.FloatTensor(D_img_out_s.data.size()).fill_(source_label).to(self.device)) features_t = self.backbone(images_t.tensor) if self.dis_type == "multi": loss_D_img_t = 0 for k, v in features_t.items(): features_tt = grad_reverse(v) D_img_out_t = self.D_img_dict[k](features_tt) loss_D_img_t += F.binary_cross_entropy_with_logits(D_img_out_t, torch.FloatTensor(D_img_out_t.data.size()).fill_(target_label).to(self.device)) loss_D_img_t /= len(features_t) else: features_t = grad_reverse(features_t[self.dis_type]) # features_t = grad_reverse(features_t['p2']) D_img_out_t = self.D_img(features_t) loss_D_img_t = F.binary_cross_entropy_with_logits(D_img_out_t, torch.FloatTensor(D_img_out_t.data.size()).fill_(target_label).to(self.device)) # import pdb # pdb.set_trace() losses = {} losses["loss_D_img_s"] = loss_D_img_s losses["loss_D_img_t"] = loss_D_img_t return losses, [], [], None images = self.preprocess_image(batched_inputs) if "instances" in batched_inputs[0]: gt_instances = [x["instances"].to(self.device) for x in batched_inputs] else: gt_instances = None features = self.backbone(images.tensor) # TODO: remove the usage of if else here. This needs to be re-organized if branch.startswith("supervised"): # Region proposal network proposals_rpn, proposal_losses = self.proposal_generator( images, features, gt_instances ) # roi_head lower branch _, detector_losses = self.roi_heads( images, features, proposals_rpn, compute_loss=True, targets=gt_instances, branch=branch, ) # visualization if self.vis_period > 0: storage = get_event_storage() if storage.iter % self.vis_period == 0: self.visualize_training(batched_inputs, proposals_rpn, branch) losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses, [], [], None elif branch == "unsup_data_weak": """ unsupervised weak branch: input image without any ground-truth label; output proposals of rpn and roi-head """ # Region proposal network proposals_rpn, _ = self.proposal_generator( images, features, None, compute_loss=False ) # roi_head lower branch (keep this for further production) # notice that we do not use any target in ROI head to do inference! proposals_roih, ROI_predictions = self.roi_heads( images, features, proposals_rpn, targets=None, compute_loss=False, branch=branch, ) # if self.vis_period > 0: # storage = get_event_storage() # if storage.iter % self.vis_period == 0: # self.visualize_training(batched_inputs, proposals_rpn, branch) return {}, proposals_rpn, proposals_roih, ROI_predictions elif branch == "unsup_data_strong": raise NotImplementedError() elif branch == "val_loss": raise NotImplementedError() def visualize_training(self, batched_inputs, proposals, branch=""): """ This function different from the original one: - it adds "branch" to the `vis_name`. A function used to visualize images and proposals. It shows ground truth bounding boxes on the original image and up to 20 predicted object proposals on the original image. Users can implement different visualization functions for different models. Args: batched_inputs (list): a list that contains input to the model. proposals (list): a list that contains predicted proposals. Both batched_inputs and proposals should have the same length. """ from detectron2.utils.visualizer import Visualizer storage = get_event_storage() max_vis_prop = 20 for input, prop in zip(batched_inputs, proposals): img = input["image"] img = convert_image_to_rgb(img.permute(1, 2, 0), self.input_format) v_gt = Visualizer(img, None) v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes) anno_img = v_gt.get_image() box_size = min(len(prop.proposal_boxes), max_vis_prop) v_pred = Visualizer(img, None) v_pred = v_pred.overlay_instances( boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy() ) prop_img = v_pred.get_image() vis_img = np.concatenate((anno_img, prop_img), axis=1) vis_img = vis_img.transpose(2, 0, 1) vis_name = ( "Left: GT bounding boxes " + branch + "; Right: Predicted proposals " + branch ) storage.put_image(vis_name, vis_img) break # only visualize one image in a batch ================================================ FILE: prod_lib/modeling/vgg.py ================================================ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import torch.nn as nn import copy import torch from typing import Union, List, Dict, Any, cast from detectron2.modeling.backbone import ( ResNet, Backbone, build_resnet_backbone, BACKBONE_REGISTRY ) from detectron2.modeling.backbone.fpn import FPN, LastLevelMaxPool, LastLevelP6P7 def make_layers(cfg: List[Union[str, int]], batch_norm: bool = False) -> nn.Sequential: layers: List[nn.Module] = [] in_channels = 3 for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: v = cast(int, v) conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v return nn.Sequential(*layers) cfgs: Dict[str, List[Union[str, int]]] = { 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], } class vgg_backbone(Backbone): """ Backbone (bottom-up) for FBNet. Hierarchy: trunk0: xif0_0 xif0_1 ... trunk1: xif1_0 xif1_1 ... ... Output features: The outputs from each "stage", i.e. trunkX. """ def __init__(self, cfg): super().__init__() self.vgg = make_layers(cfgs['vgg16'],batch_norm=True) self._initialize_weights() # self.stage_names_index = {'vgg1':3, 'vgg2':8 , 'vgg3':15, 'vgg4':22, 'vgg5':29} _out_feature_channels = [64, 128, 256, 512, 512] _out_feature_strides = [2, 4, 8, 16, 32] # stages, shape_specs = build_fbnet( # cfg, # name="trunk", # in_channels=cfg.MODEL.FBNET_V2.STEM_IN_CHANNELS # ) # nn.Sequential(*list(self.vgg.features._modules.values())[:14]) self.stages = [nn.Sequential(*list(self.vgg._modules.values())[0:7]),\ nn.Sequential(*list(self.vgg._modules.values())[7:14]),\ nn.Sequential(*list(self.vgg._modules.values())[14:24]),\ nn.Sequential(*list(self.vgg._modules.values())[24:34]),\ nn.Sequential(*list(self.vgg._modules.values())[34:]),] self._out_feature_channels = {} self._out_feature_strides = {} self._stage_names = [] for i, stage in enumerate(self.stages): name = "vgg{}".format(i) self.add_module(name, stage) self._stage_names.append(name) self._out_feature_channels[name] = _out_feature_channels[i] self._out_feature_strides[name] = _out_feature_strides[i] self._out_features = self._stage_names del self.vgg def forward(self, x): features = {} for name, stage in zip(self._stage_names, self.stages): x = stage(x) # if name in self._out_features: # outputs[name] = x features[name] = x # import pdb # pdb.set_trace() return features def _initialize_weights(self) -> None: for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) nn.init.constant_(m.bias, 0) @BACKBONE_REGISTRY.register() #already register in baseline model def build_vgg_backbone(cfg, _): return vgg_backbone(cfg) @BACKBONE_REGISTRY.register() #already register in baseline model def build_vgg_fpn_backbone(cfg, _): # backbone = FPN( # bottom_up=build_vgg_backbone(cfg), # in_features=cfg.MODEL.FPN.IN_FEATURES, # out_channels=cfg.MODEL.FPN.OUT_CHANNELS, # norm=cfg.MODEL.FPN.NORM, # top_block=LastLevelMaxPool(), # ) bottom_up = vgg_backbone(cfg) in_features = cfg.MODEL.FPN.IN_FEATURES out_channels = cfg.MODEL.FPN.OUT_CHANNELS backbone = FPN( bottom_up=bottom_up, in_features=in_features, out_channels=out_channels, norm=cfg.MODEL.FPN.NORM, top_block=LastLevelMaxPool(), # fuse_type=cfg.MODEL.FPN.FUSE_TYPE, ) # return backbone return backbone ================================================ FILE: prod_lib/runner/__init__.py ================================================ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved # from .runner import SemiSupSegRunner, SemiSupHandTrackingRunner # noqa from .runner import BaseUnbiasedTeacherRunner # noqa from .runner import DAobjUnbiasedTeacherRunner # noqa ================================================ FILE: prod_lib/runner/runner.py ================================================ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import logging import os from collections import OrderedDict from functools import lru_cache import d2go.utils.abnormal_checker as abnormal_checker import detectron2.utils.comm as comm from d2go.config import CONFIG_SCALING_METHOD_REGISTRY, temp_defrost from d2go.data.dataset_mappers import D2GoDatasetMapper, build_dataset_mapper from d2go.data.transforms.build import build_transform_gen from d2go.data.utils import maybe_subsample_n_images from d2go.modeling import build_model, kmeans_anchors, model_ema from d2go.runner import GeneralizedRCNNRunner from d2go.utils.flop_calculator import add_print_flops_callback from d2go.utils.misc import get_tensorboard_log_dir from d2go.utils.helper import TensorboardXWriter, D2Trainer from detectron2.checkpoint import PeriodicCheckpointer from detectron2.engine import hooks from detectron2.utils.events import CommonMetricPrinter, JSONWriter, TensorboardXWriter from torch.nn.parallel import DataParallel, DistributedDataParallel from detectron2.evaluation import ( DatasetEvaluators, ) from detectron2.data import ( MetadataCatalog, ) from ..evaluation import ( COCOEvaluator, PascalVOCDetectionEvaluator, ) from d2go.projects.unbiased_teacher.checkpoint import EnsembleTSModel from ..config.defaults import add_aut_config # from ..config.defaults import add_ut_config # from ..data.build import ( # build_detection_semisup_train_loader_two_crops, # build_uru_detection_semisup_train_loader, # inject_uru_dataset, # ) from d2go.projects.unbiased_teacher.data.build import ( build_detection_semisup_train_loader_two_crops, build_uru_detection_semisup_train_loader, ) from d2go.projects.unbiased_teacher.runner.runner import UnbiasedTeacherRunner from d2go.projects.unbiased_teacher.data.dataset_mapper import DatasetMapperTwoCropSeparate # noqa from ..data import builtin # noqa; for registering COCO unlabel dataset from d2go.projects.unbiased_teacher.engine.trainer import UnbiasedTeacherTrainer from d2go.projects.unbiased_teacher.modeling.meta_arch.rcnn import TwoStagePseudoLabGeneralizedRCNN # noqa from d2go.projects.unbiased_teacher.modeling.proposal_generator.rpn import PseudoLabRPN # noqa from d2go.projects.unbiased_teacher.modeling.roi_heads.roi_heads import StandardROIHeadsPseudoLab # noqa from d2go.projects.unbiased_teacher.solver.build import ut_build_lr_scheduler #For DA object detection from ..engine.trainer import DAobjTrainer from ..modeling.meta_arch.daobj_rcnn import DAobjTwoStagePseudoLabGeneralizedRCNN # noqa #For VGG model architecture from ..modeling.meta_arch.vgg import build_vgg_backbone,build_vgg_fpn_backbone # noqa ALL_TB_WRITERS = [] @lru_cache() def _get_tbx_writer(log_dir): ret = TensorboardXWriter(log_dir) ALL_TB_WRITERS.append(ret) return ret class BaseUnbiasedTeacherRunner(UnbiasedTeacherRunner): def get_default_cfg(self): cfg = super().get_default_cfg() add_aut_config(cfg) # add_pointrend_config(cfg) # cfg = CN(cfg) # upgrade from D2's CfgNode to D2Go's CfgNode return cfg @staticmethod def get_evaluator(cfg, dataset_name, output_folder): evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type if evaluator_type in ["coco"]: # D2 is in the process of reducing the use of cfg. dataset_evaluators = COCOEvaluator( dataset_name, output_dir=output_folder, kpt_oks_sigmas=cfg.TEST.KEYPOINT_OKS_SIGMAS, ) elif evaluator_type in ["pascal_voc"]: dataset_evaluators = PascalVOCDetectionEvaluator(dataset_name) elif evaluator_type in ["pascal_voc_water"]: dataset_evaluators = PascalVOCDetectionEvaluator(dataset_name, target_classnames=["bicycle", "bird", "car", "cat", "dog", "person"]) else: dataset_evaluators = D2Trainer.build_evaluator( cfg, dataset_name, output_folder ) if not isinstance(dataset_evaluators, DatasetEvaluators): dataset_evaluators = DatasetEvaluators([dataset_evaluators]) return dataset_evaluators # class DAobjUnbiasedTeacherRunner(UnbiasedTeacherRunner): class DAobjUnbiasedTeacherRunner(BaseUnbiasedTeacherRunner): def get_default_cfg(self): cfg = super().get_default_cfg() # add_aut_config(cfg) # add_pointrend_config(cfg) # cfg = CN(cfg) # upgrade from D2's CfgNode to D2Go's CfgNode return cfg def build_model(self, cfg, eval_only=False): """ Build both Student and Teacher models Student: regular model Teacher: model that is updated by EMA """ # build_model might modify the cfg, thus clone cfg = cfg.clone() model = build_model(cfg) model_teacher = build_model(cfg) if cfg.MODEL.FROZEN_LAYER_REG_EXP: raise NotImplementedError() if cfg.QUANTIZATION.QAT.ENABLED: raise NotImplementedError() if eval_only: raise NotImplementedError() return EnsembleTSModel(model_teacher, model) def do_train(self, cfg, model, resume): # NOTE: d2go's train_net applies DDP layer by default # we need to strip it away and only put DDP on model_student if isinstance(model, (DistributedDataParallel, DataParallel)): model = model.module model_teacher, model_student = model.model_teacher, model.model_student if comm.get_world_size() > 1: model_student = DistributedDataParallel( model_student, device_ids=None if cfg.MODEL.DEVICE == "cpu" else [comm.get_local_rank()], broadcast_buffers=False, find_unused_parameters=cfg.MODEL.DDP_FIND_UNUSED_PARAMETERS, ) add_print_flops_callback(cfg, model_student, disable_after_callback=True) optimizer = self.build_optimizer(cfg, model_student) scheduler = self.build_lr_scheduler(cfg, optimizer) checkpointer = self.build_checkpointer( cfg, model, save_dir=cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler, ) checkpoint = checkpointer.resume_or_load( cfg.MODEL.WEIGHTS, resume=resume or cfg.UNBIASEDTEACHER.RESUME_FROM_ANOTHER ) start_iter = ( checkpoint.get("iteration", -1) if resume and checkpointer.has_checkpoint() or cfg.UNBIASEDTEACHER.RESUME_FROM_ANOTHER else -1 ) # The checkpoint stores the training iteration that just finished, thus we start # at the next iteration (or iter zero if there's no checkpoint). start_iter += 1 max_iter = cfg.SOLVER.MAX_ITER periodic_checkpointer = PeriodicCheckpointer( checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter ) # if resume from a pre-trained checkpoint, we modify the BURN_IN_STEP # so that the weights of the Student will be copied to the Teacher # at the 1st iteration when the training started if cfg.UNBIASEDTEACHER.RESUME_FROM_ANOTHER: cfg.defrost() cfg.UNBIASEDTEACHER.BURN_IN_STEP = start_iter cfg.freeze() data_loader = self.build_detection_train_loader(cfg) def _get_model_with_abnormal_checker(model): if not cfg.ABNORMAL_CHECKER.ENABLED: return model tbx_writer = _get_tbx_writer(get_tensorboard_log_dir(cfg.OUTPUT_DIR)) writers = abnormal_checker.get_writers(cfg, tbx_writer) checker = abnormal_checker.AbnormalLossChecker(start_iter, writers) ret = abnormal_checker.AbnormalLossCheckerWrapper(model, checker) return ret trainer = DAobjTrainer( cfg, _get_model_with_abnormal_checker(model_student), _get_model_with_abnormal_checker(model_teacher), data_loader, optimizer, ) trainer_hooks = [ hooks.IterationTimer(), self._create_after_step_hook( cfg, model_student, optimizer, scheduler, periodic_checkpointer ), hooks.EvalHook( cfg.TEST.EVAL_PERIOD, lambda: self.do_test(cfg, model, train_iter=trainer.iter), ), kmeans_anchors.compute_kmeans_anchors_hook(self, cfg), self._create_qat_hook(cfg) if cfg.QUANTIZATION.QAT.ENABLED else None, ] if comm.is_main_process(): tbx_writer = _get_tbx_writer(get_tensorboard_log_dir(cfg.OUTPUT_DIR)) writers = [ CommonMetricPrinter(max_iter), JSONWriter(os.path.join(cfg.OUTPUT_DIR, "metrics.json")), tbx_writer, ] trainer_hooks.append( hooks.PeriodicWriter(writers, period=cfg.WRITER_PERIOD) ) trainer.register_hooks(trainer_hooks) trainer.train(start_iter, max_iter) trained_cfg = cfg.clone() with temp_defrost(trained_cfg): trained_cfg.MODEL.WEIGHTS = checkpointer.get_checkpoint_file() return {"model_final": trained_cfg} ================================================ FILE: train_net.py ================================================ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved import detectron2.utils.comm as comm from detectron2.checkpoint import DetectionCheckpointer from detectron2.config import get_cfg from detectron2.engine import default_argument_parser, default_setup, launch from adapteacher import add_ateacher_config from adapteacher.engine.trainer import ATeacherTrainer, BaselineTrainer # hacky way to register from adapteacher.modeling.meta_arch.rcnn import TwoStagePseudoLabGeneralizedRCNN, DAobjTwoStagePseudoLabGeneralizedRCNN from adapteacher.modeling.meta_arch.vgg import build_vgg_backbone # noqa from adapteacher.modeling.proposal_generator.rpn import PseudoLabRPN from adapteacher.modeling.roi_heads.roi_heads import StandardROIHeadsPseudoLab import adapteacher.data.datasets.builtin from adapteacher.modeling.meta_arch.ts_ensemble import EnsembleTSModel def setup(args): """ Create configs and perform basic setups. """ cfg = get_cfg() add_ateacher_config(cfg) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() default_setup(cfg, args) return cfg def main(args): cfg = setup(args) if cfg.SEMISUPNET.Trainer == "ateacher": Trainer = ATeacherTrainer elif cfg.SEMISUPNET.Trainer == "baseline": Trainer = BaselineTrainer else: raise ValueError("Trainer Name is not found.") if args.eval_only: if cfg.SEMISUPNET.Trainer == "ateacher": model = Trainer.build_model(cfg) model_teacher = Trainer.build_model(cfg) ensem_ts_model = EnsembleTSModel(model_teacher, model) DetectionCheckpointer( ensem_ts_model, save_dir=cfg.OUTPUT_DIR ).resume_or_load(cfg.MODEL.WEIGHTS, resume=args.resume) res = Trainer.test(cfg, ensem_ts_model.modelTeacher) else: model = Trainer.build_model(cfg) DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( cfg.MODEL.WEIGHTS, resume=args.resume ) res = Trainer.test(cfg, model) return res trainer = Trainer(cfg) trainer.resume_or_load(resume=args.resume) return trainer.train() if __name__ == "__main__": args = default_argument_parser().parse_args() print("Command Line Args:", args) launch( main, args.num_gpus, num_machines=args.num_machines, machine_rank=args.machine_rank, dist_url=args.dist_url, args=(args,), )