Repository: VlSomers/bpbreid Branch: main Commit: a2dc43042847 Files: 171 Total size: 1009.1 KB Directory structure: gitextract_fsax4gel/ ├── .flake8 ├── .gitignore ├── .isort.cfg ├── .style.yapf ├── LICENSE ├── README.md ├── Torchreid_original_README.rst ├── configs/ │ └── bpbreid/ │ ├── bpbreid_dukemtmc_test.yaml │ ├── bpbreid_dukemtmc_train.yaml │ ├── bpbreid_market1501_test.yaml │ ├── bpbreid_market1501_train.yaml │ ├── bpbreid_occ_duke_test.yaml │ ├── bpbreid_occ_duke_train.yaml │ ├── bpbreid_occ_reid_test.yaml │ ├── bpbreid_occ_reid_train.yaml │ ├── bpbreid_p_dukemtmc_test.yaml │ ├── bpbreid_p_dukemtmc_train.yaml │ ├── pcb_market1501_train.yaml │ └── pcb_occ_duke_train.yaml ├── docs/ │ ├── AWESOME_REID.md │ ├── MODEL_ZOO.md │ ├── Makefile │ ├── conf.py │ ├── datasets.rst │ ├── evaluation.rst │ ├── index.rst │ ├── pkg/ │ │ ├── data.rst │ │ ├── engine.rst │ │ ├── losses.rst │ │ ├── metrics.rst │ │ ├── models.rst │ │ ├── optim.rst │ │ └── utils.rst │ ├── requirements.txt │ └── user_guide.rst ├── linter.sh ├── pyproject.toml ├── requirements.txt ├── requirements_labels.txt ├── setup.py └── torchreid/ ├── __init__.py ├── data/ │ ├── __init__.py │ ├── data_augmentation/ │ │ ├── __init__.py │ │ └── random_occlusion.py │ ├── datamanager.py │ ├── datasets/ │ │ ├── __init__.py │ │ ├── dataset.py │ │ ├── image/ │ │ │ ├── __init__.py │ │ │ ├── cuhk01.py │ │ │ ├── cuhk02.py │ │ │ ├── cuhk03.py │ │ │ ├── dukemtmcreid.py │ │ │ ├── grid.py │ │ │ ├── ilids.py │ │ │ ├── market1501.py │ │ │ ├── msmt17.py │ │ │ ├── occluded_dukemtmc.py │ │ │ ├── occluded_reid.py │ │ │ ├── p_ETHZ.py │ │ │ ├── p_dukemtmc_reid.py │ │ │ ├── partial_ilids.py │ │ │ ├── partial_reid.py │ │ │ ├── prid.py │ │ │ ├── sensereid.py │ │ │ └── viper.py │ │ └── video/ │ │ ├── __init__.py │ │ ├── dukemtmcvidreid.py │ │ ├── ilidsvid.py │ │ ├── mars.py │ │ └── prid2011.py │ ├── masks_transforms/ │ │ ├── __init__.py │ │ ├── coco_keypoints_transforms.py │ │ ├── mask_transform.py │ │ ├── pcb_transforms.py │ │ └── pifpaf_mask_transform.py │ ├── sampler.py │ └── transforms.py ├── engine/ │ ├── __init__.py │ ├── engine.py │ ├── image/ │ │ ├── __init__.py │ │ ├── part_based_engine.py │ │ ├── softmax.py │ │ └── triplet.py │ └── video/ │ ├── __init__.py │ ├── softmax.py │ └── triplet.py ├── hyperparameter/ │ ├── custom_hyperparameter_optimizer.py │ ├── hyperparameter_optimizer.py │ └── optuna_hyperparameter_optimizer.py ├── losses/ │ ├── GiLt_loss.py │ ├── __init__.py │ ├── body_part_attention_loss.py │ ├── cross_entropy_loss.py │ ├── hard_mine_triplet_loss.py │ ├── inter_parts_triplet_loss.py │ ├── part_averaged_triplet_loss.py │ ├── part_individual_triplet_loss.py │ ├── part_max_min_triplet_loss.py │ ├── part_max_triplet_loss.py │ ├── part_min_triplet_loss.py │ └── part_random_max_min_triplet_loss.py ├── metrics/ │ ├── __init__.py │ ├── accuracy.py │ ├── distance.py │ ├── rank.py │ └── rank_cylib/ │ ├── Makefile │ ├── __init__.py │ ├── rank_cy.pyx │ ├── setup.py │ └── test_cython.py ├── models/ │ ├── __init__.py │ ├── bpbreid.py │ ├── compact_bilinear_pooling.py │ ├── densenet.py │ ├── hacnn.py │ ├── hrnet.py │ ├── inceptionresnetv2.py │ ├── inceptionv4.py │ ├── mlfn.py │ ├── mobilenetv2.py │ ├── mudeep.py │ ├── nasnet.py │ ├── osnet.py │ ├── osnet_ain.py │ ├── pcb.py │ ├── pvpm.py │ ├── resnet.py │ ├── resnet_fastreid.py │ ├── resnet_ibn_a.py │ ├── resnet_ibn_b.py │ ├── resnetmid.py │ ├── senet.py │ ├── shufflenet.py │ ├── shufflenetv2.py │ ├── squeezenet.py │ └── xception.py ├── optim/ │ ├── __init__.py │ ├── lr_scheduler.py │ ├── optimizer.py │ └── radam.py ├── scripts/ │ ├── __init__.py │ ├── default_config.py │ ├── get_labels.py │ └── main.py ├── tools/ │ ├── __init__.py │ ├── compute_mean_std.py │ ├── extract_part_based_features.py │ └── feature_extractor.py └── utils/ ├── __init__.py ├── avgmeter.py ├── constants.py ├── distribution.py ├── engine_state.py ├── imagetools.py ├── logging/ │ ├── __init__.py │ ├── deprecated_loggers.py │ └── logger.py ├── model_complexity.py ├── reidtools.py ├── rerank.py ├── tensortools.py ├── tools.py ├── torch_receptive_field/ │ ├── __init__.py │ └── receptive_field.py ├── torchtools.py ├── visualization/ │ ├── __init__.py │ ├── display_batch_triplets.py │ ├── embeddings_projection.py │ ├── feature_map_visualization.py │ └── visualize_query_gallery_rankings.py └── writer.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .flake8 ================================================ [flake8] ignore = # At least two spaces before inline comment E261, # Line lengths are recommended to be no greater than 79 characters E501, # Missing whitespace around arithmetic operator E226, # Blank line contains whitespace W293, # Do not use bare 'except' E722, # Line break after binary operator W504, # isort found an import in the wrong position I001 max-line-length = 79 exclude = __init__.py, build, torchreid/metrics/rank_cylib/ ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/f lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # Cython eval code *.c *.html # OS X .DS_Store .Spotlight-V100 .Trashes ._* # ReID reid-data/ log/ saved-models/ model-zoo/ pretrained_models/ debug* /.idea/ /configs_user/ ================================================ FILE: .isort.cfg ================================================ [isort] line_length=79 multi_line_output=3 length_sort=true known_standard_library=numpy,setuptools known_myself=torchreid known_third_party=matplotlib,cv2,torch,torchvision,PIL,yacs no_lines_before=STDLIB,THIRDPARTY sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER default_section=FIRSTPARTY ================================================ FILE: .style.yapf ================================================ [style] BASED_ON_STYLE = pep8 BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true DEDENT_CLOSING_BRACKETS = true SPACES_BEFORE_COMMENT = 1 ARITHMETIC_PRECEDENCE_INDICATION = true ================================================ FILE: LICENSE ================================================ **HIPPOCRATIC LICENSE** **Version 3.0, October 2021** **TERMS AND CONDITIONS** TERMS AND CONDITIONS FOR USE, COPY, MODIFICATION, PREPARATION OF DERIVATIVE WORK, REPRODUCTION, AND DISTRIBUTION: **[1.](#1) DEFINITIONS:** _This section defines certain terms used throughout this license agreement._ [1.1.](#1.1) “License” means the terms and conditions, as stated herein, for use, copy, modification, preparation of derivative work, reproduction, and distribution of Software (as defined below). [1.2.](#1.2) “Licensor” means the copyright and/or patent owner or entity authorized by the copyright and/or patent owner that is granting the License. [1.3.](#1.3) “Licensee” means the individual or entity exercising permissions granted by this License, including the use, copy, modification, preparation of derivative work, reproduction, and distribution of Software (as defined below). [1.4.](#1.4) “Software” means any copyrighted work, including but not limited to software code, authored by Licensor and made available under this License. [1.5.](#1.5) “Supply Chain” means the sequence of processes involved in the production and/or distribution of a commodity, good, or service offered by the Licensee. [1.6.](#1.6) “Supply Chain Impacted Party” or “Supply Chain Impacted Parties” means any person(s) directly impacted by any of Licensee’s Supply Chain, including the practices of all persons or entities within the Supply Chain prior to a good or service reaching the Licensee. [1.7.](#1.7) “Duty of Care” is defined by its use in tort law, delict law, and/or similar bodies of law closely related to tort and/or delict law, including without limitation, a requirement to act with the watchfulness, attention, caution, and prudence that a reasonable person in the same or similar circumstances would use towards any Supply Chain Impacted Party. [1.8.](#1.8) “Worker” is defined to include any and all permanent, temporary, and agency workers, as well as piece-rate, salaried, hourly paid, legal young (minors), part-time, night, and migrant workers. **[2.](#2) INTELLECTUAL PROPERTY GRANTS:** _This section identifies intellectual property rights granted to a Licensee_. [2.1.](#2.1) _Grant of Copyright License_: Subject to the terms and conditions of this License, Licensor hereby grants to Licensee a worldwide, non-exclusive, no-charge, royalty-free copyright license to use, copy, modify, prepare derivative work, reproduce, or distribute the Software, Licensor authored modified software, or other work derived from the Software. [2.2.](#2.2) _Grant of Patent License_: Subject to the terms and conditions of this License, Licensor hereby grants Licensee a worldwide, non-exclusive, no-charge, royalty-free patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer Software. **[3.](#3) ETHICAL STANDARDS:** _This section lists conditions the Licensee must comply with in order to have rights under this License._ The rights granted to the Licensee by this License are expressly made subject to the Licensee’s ongoing compliance with the following conditions: * [3.1.](#3.1) The Licensee SHALL NOT, whether directly or indirectly, through agents or assigns: * [3.1.1.](#3.1.1) Infringe upon any person’s right to life or security of person, engage in extrajudicial killings, or commit murder, without lawful cause (See Article 3, _United Nations Universal Declaration of Human Rights_; Article 6, _International Covenant on Civil and Political Rights_) * [3.1.2.](#3.1.2) Hold any person in slavery, servitude, or forced labor (See Article 4, _United Nations Universal Declaration of Human Rights_; Article 8, _International Covenant on Civil and Political Rights_); * [3.1.3.](#3.1.3) Contribute to the institution of slavery, slave trading, forced labor, or unlawful child labor (See Article 4, _United Nations Universal Declaration of Human Rights_; Article 8, _International Covenant on Civil and Political Rights_); * [3.1.4.](#3.1.4) Torture or subject any person to cruel, inhumane, or degrading treatment or punishment (See Article 5, _United Nations Universal Declaration of Human Rights_; Article 7, _International Covenant on Civil and Political Rights_); * [3.1.5.](#3.1.5) Discriminate on the basis of sex, gender, sexual orientation, race, ethnicity, nationality, religion, caste, age, medical disability or impairment, and/or any other like circumstances (See Article 7, _United Nations Universal Declaration of Human Rights_; Article 2, _International Covenant on Economic, Social and Cultural Rights_; Article 26, _International Covenant on Civil and Political Rights_); * [3.1.6.](#3.1.6) Prevent any person from exercising his/her/their right to seek an effective remedy by a competent court or national tribunal (including domestic judicial systems, international courts, arbitration bodies, and other adjudicating bodies) for actions violating the fundamental rights granted to him/her/them by applicable constitutions, applicable laws, or by this License (See Article 8, _United Nations Universal Declaration of Human Rights_; Articles 9 and 14, _International Covenant on Civil and Political Rights_); * [3.1.7.](#3.1.7) Subject any person to arbitrary arrest, detention, or exile (See Article 9, _United Nations Universal Declaration of Human Rights_; Article 9, _International Covenant on Civil and Political Rights_); * [3.1.8.](#3.1.8) Subject any person to arbitrary interference with a person’s privacy, family, home, or correspondence without the express written consent of the person (See Article 12, _United Nations Universal Declaration of Human Rights_; Article 17, _International Covenant on Civil and Political Rights_); * [3.1.9.](#3.1.9) Arbitrarily deprive any person of his/her/their property (See Article 17, _United Nations Universal Declaration of Human Rights_); * [3.1.10.](#3.1.10) Forcibly remove indigenous peoples from their lands or territories or take any action with the aim or effect of dispossessing indigenous peoples from their lands, territories, or resources, including without limitation the intellectual property or traditional knowledge of indigenous peoples, without the free, prior, and informed consent of indigenous peoples concerned (See Articles 8 and 10, _United Nations Declaration on the Rights of Indigenous Peoples_); * [3.1.11.](#3.1.11) _Mass Surveillance_: Be a government agency or multinational corporation, or a representative, agent, affiliate, successor, attorney, or assign of a government or multinational corporation, which participates in mass surveillance programs; * [3.1.12.](#3.1.12) _Military Activities_: Be an entity or a representative, agent, affiliate, successor, attorney, or assign of an entity which conducts military activities; * [3.1.13.](#3.1.13) _Law Enforcement_: Be an individual or entity, or a or a representative, agent, affiliate, successor, attorney, or assign of an individual or entity, that provides good or services to, or otherwise enters into any commercial contracts with, any local, state, or federal law enforcement agency; * [3.1.14.](#3.1.14) _Media_: Be an individual or entity, or a or a representative, agent, affiliate, successor, attorney, or assign of an individual or entity, that broadcasts messages promoting killing, torture, or other forms of extreme violence; * [3.1.15.](#3.1.15) Interfere with Workers' free exercise of the right to organize and associate (See Article 20, United Nations Universal Declaration of Human Rights; C087 - Freedom of Association and Protection of the Right to Organise Convention, 1948 (No. 87), International Labour Organization; Article 8, International Covenant on Economic, Social and Cultural Rights); and * [3.1.16.](#3.1.16) Harm the environment in a manner inconsistent with local, state, national, or international law. * [3.2.](#3.2) The Licensee SHALL: * [3.2.1.](#3.2.1) _Social Auditing_: Only use social auditing mechanisms that adhere to Worker-Driven Social Responsibility Network’s Statement of Principles () over traditional social auditing mechanisms, to the extent the Licensee uses any social auditing mechanisms at all; * [3.2.2.](#3.2.2) Provide equal pay for equal work where the performance of such work requires equal skill, effort, and responsibility, and which are performed under similar working conditions, except where such payment is made pursuant to: * [3.2.2.1.](#3.2.2.1) A seniority system; * [3.2.2.2.](#3.2.2.2) A merit system; * [3.2.2.3.](#3.2.2.3) A system which measures earnings by quantity or quality of production; or * [3.2.2.4.](#3.2.2.4) A differential based on any other factor other than sex, gender, sexual orientation, race, ethnicity, nationality, religion, caste, age, medical disability or impairment, and/or any other like circumstances (See 29 U.S.C.A. § 206(d)(1); Article 23, _United Nations Universal Declaration of Human Rights_; Article 7, _International Covenant on Economic, Social and Cultural Rights_; Article 26, _International Covenant on Civil and Political Rights_); and * [3.2.3.](#3.2.3) Allow for reasonable limitation of working hours and periodic holidays with pay (See Article 24, _United Nations Universal Declaration of Human Rights_; Article 7, _International Covenant on Economic, Social and Cultural Rights_). **[4.](#4) SUPPLY CHAIN IMPACTED PARTIES:** _This section identifies additional individuals or entities that a Licensee could harm as a result of violating the Ethical Standards section, the condition that the Licensee must voluntarily accept a Duty of Care for those individuals or entities, and the right to a private right of action that those individuals or entities possess as a result of violations of the Ethical Standards section._ [4.1.](#4.1) In addition to the above Ethical Standards, Licensee voluntarily accepts a Duty of Care for Supply Chain Impacted Parties of this License, including individuals and communities impacted by violations of the Ethical Standards. The Duty of Care is breached when a provision within the Ethical Standards section is violated by a Licensee, one of its successors or assigns, or by an individual or entity that exists within the Supply Chain prior to a good or service reaching the Licensee. [4.2.](#4.2) Breaches of the Duty of Care, as stated within this section, shall create a private right of action, allowing any Supply Chain Impacted Party harmed by the Licensee to take legal action against the Licensee in accordance with applicable negligence laws, whether they be in tort law, delict law, and/or similar bodies of law closely related to tort and/or delict law, regardless if Licensee is directly responsible for the harms suffered by a Supply Chain Impacted Party. Nothing in this section shall be interpreted to include acts committed by individuals outside of the scope of his/her/their employment. [5.](#5) **NOTICE:** _This section explains when a Licensee must notify others of the License._ [5.1.](#5.1) _Distribution of Notice_: Licensee must ensure that everyone who receives a copy of or uses any part of Software from Licensee, with or without changes, also receives the License and the copyright notice included with Software (and if included by the Licensor, patent, trademark, and attribution notice). Licensee must ensure that License is prominently displayed so that any individual or entity seeking to download, copy, use, or otherwise receive any part of Software from Licensee is notified of this License and its terms and conditions. Licensee must cause any modified versions of the Software to carry prominent notices stating that Licensee changed the Software. [5.2.](#5.2) _Modified Software_: Licensee is free to create modifications of the Software and distribute only the modified portion created by Licensee, however, any derivative work stemming from the Software or its code must be distributed pursuant to this License, including this Notice provision. [5.3.](#5.3) _Recipients as Licensees_: Any individual or entity that uses, copies, modifies, reproduces, distributes, or prepares derivative work based upon the Software, all or part of the Software’s code, or a derivative work developed by using the Software, including a portion of its code, is a Licensee as defined above and is subject to the terms and conditions of this License. **[6.](#6) REPRESENTATIONS AND WARRANTIES:** [6.1.](#6.1) _Disclaimer of Warranty_: TO THE FULL EXTENT ALLOWED BY LAW, THIS SOFTWARE COMES “AS IS,” WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED, AND LICENSOR SHALL NOT BE LIABLE TO ANY PERSON OR ENTITY FOR ANY DAMAGES OR OTHER LIABILITY ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THIS LICENSE, UNDER ANY LEGAL CLAIM. [6.2.](#6.2) _Limitation of Liability_: LICENSEE SHALL HOLD LICENSOR HARMLESS AGAINST ANY AND ALL CLAIMS, DEBTS, DUES, LIABILITIES, LIENS, CAUSES OF ACTION, DEMANDS, OBLIGATIONS, DISPUTES, DAMAGES, LOSSES, EXPENSES, ATTORNEYS' FEES, COSTS, LIABILITIES, AND ALL OTHER CLAIMS OF EVERY KIND AND NATURE WHATSOEVER, WHETHER KNOWN OR UNKNOWN, ANTICIPATED OR UNANTICIPATED, FORESEEN OR UNFORESEEN, ACCRUED OR UNACCRUED, DISCLOSED OR UNDISCLOSED, ARISING OUT OF OR RELATING TO LICENSEE’S USE OF THE SOFTWARE. NOTHING IN THIS SECTION SHOULD BE INTERPRETED TO REQUIRE LICENSEE TO INDEMNIFY LICENSOR, NOR REQUIRE LICENSOR TO INDEMNIFY LICENSEE. **[7.](#7) TERMINATION** [7.1.](#7.1) _Violations of Ethical Standards or Breaching Duty of Care_: If Licensee violates the Ethical Standards section or Licensee, or any other person or entity within the Supply Chain prior to a good or service reaching the Licensee, breaches its Duty of Care to Supply Chain Impacted Parties, Licensee must remedy the violation or harm caused by Licensee within 30 days of being notified of the violation or harm. If Licensee fails to remedy the violation or harm within 30 days, all rights in the Software granted to Licensee by License will be null and void as between Licensor and Licensee. [7.2.](#7.2) _Failure of Notice_: If any person or entity notifies Licensee in writing that Licensee has not complied with the Notice section of this License, Licensee can keep this License by taking all practical steps to comply within 30 days after the notice of noncompliance. If Licensee does not do so, Licensee’s License (and all rights licensed hereunder) will end immediately. [7.3.](#7.3) _Judicial Findings_: In the event Licensee is found by a civil, criminal, administrative, or other court of competent jurisdiction, or some other adjudicating body with legal authority, to have committed actions which are in violation of the Ethical Standards or Supply Chain Impacted Party sections of this License, all rights granted to Licensee by this License will terminate immediately. [7.4.](#7.4) _Patent Litigation_: If Licensee institutes patent litigation against any entity (including a cross-claim or counterclaim in a suit) alleging that the Software, all or part of the Software’s code, or a derivative work developed using the Software, including a portion of its code, constitutes direct or contributory patent infringement, then any patent license, along with all other rights, granted to Licensee under this License will terminate as of the date such litigation is filed. [7.5.](#7.5) _Additional Remedies_: Termination of the License by failing to remedy harms in no way prevents Licensor or Supply Chain Impacted Party from seeking appropriate remedies at law or in equity. **[8.](#8) MISCELLANEOUS:** [8.1.](#8.1) _Conditions_: Sections 3, 4.1, 5.1, 5.2, 7.1, 7.2, 7.3, and 7.4 are conditions of the rights granted to Licensee in the License. [8.2.](#8.2) _Equitable Relief_: Licensor and any Supply Chain Impacted Party shall be entitled to equitable relief, including injunctive relief or specific performance of the terms hereof, in addition to any other remedy to which they are entitled at law or in equity. [8.3.](#8.3) _Severability_: If any term or provision of this License is determined to be invalid, illegal, or unenforceable by a court of competent jurisdiction, any such determination of invalidity, illegality, or unenforceability shall not affect any other term or provision of this License or invalidate or render unenforceable such term or provision in any other jurisdiction. If the determination of invalidity, illegality, or unenforceability by a court of competent jurisdiction pertains to the terms or provisions contained in the Ethical Standards section of this License, all rights in the Software granted to Licensee shall be deemed null and void as between Licensor and Licensee. [8.4.](#8.4) _Section Titles_: Section titles are solely written for organizational purposes and should not be used to interpret the language within each section. [8.5.](#8.5) _Citations_: Citations are solely written to provide context for the source of the provisions in the Ethical Standards. [8.6.](#8.6) _Section Summaries_: Some sections have a brief _italicized description_ which is provided for the sole purpose of briefly describing the section and should not be used to interpret the terms of the License. [8.7.](#8.7) _Entire License_: This is the entire License between the Licensor and Licensee with respect to the claims released herein and that the consideration stated herein is the only consideration or compensation to be paid or exchanged between them for this License. This License cannot be modified or amended except in a writing signed by Licensor and Licensee. [8.8.](#8.8) _Successors and Assigns_: This License shall be binding upon and inure to the benefit of the Licensor’s and Licensee’s respective heirs, successors, and assigns. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # BPBReID: Body Part-based (Occluded) Re-Identification **A strong baseline for body part-based person re-identification** 🔥 *Our new work on [Keypoint Promptable ReID](https://github.com/VlSomers/keypoint_promptable_reidentification) was accepted at ECCV24* 🔥 [[Paper](https://arxiv.org/abs/2211.03679)] [[Video](https://www.youtube.com/watch?v=4NQump-vg_A&ab_channel=VladimirSomers)] [[Poster](docs/figures/bpbreid/wacv23_poster_bpbreid.pdf)] [![arXiv](https://img.shields.io/badge/arXiv-2211.03679-.svg)](https://arxiv.org/abs/2211.03679) [![Hippocratic License HL3-LAW-MEDIA-MIL-SOC-SV](https://img.shields.io/static/v1?label=Hippocratic%20License&message=HL3-LAW-MEDIA-MIL-SOC-SV&labelColor=5e2751&color=bc8c3d)](https://firstdonoharm.dev/version/3/0/law-media-mil-soc-sv.html) >**[Body Part-Based Representation Learning for Occluded Person Re-Identification, WACV23](https://arxiv.org/abs/2211.03679)** > >Vladimir Somers, Christophe De Vleeschouwer, Alexandre Alahi > >[*arxiv 2211.03679*](https://arxiv.org/abs/2211.03679) > ### State-of-the-art performance on 5 datasets: Occluded-Duke: [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/body-part-based-representation-learning-for/person-re-identification-on-occluded-dukemtmc)](https://paperswithcode.com/sota/person-re-identification-on-occluded-dukemtmc?p=body-part-based-representation-learning-for) Occluded ReID: [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/body-part-based-representation-learning-for/person-re-identification-on-occluded-reid-1)](https://paperswithcode.com/sota/person-re-identification-on-occluded-reid-1?p=body-part-based-representation-learning-for) P-DukeMTMC: [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/body-part-based-representation-learning-for/person-re-identification-on-p-dukemtmc-reid)](https://paperswithcode.com/sota/person-re-identification-on-p-dukemtmc-reid?p=body-part-based-representation-learning-for) DukeMTMC-ReID: [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/body-part-based-representation-learning-for/person-re-identification-on-dukemtmc-reid)](https://paperswithcode.com/sota/person-re-identification-on-dukemtmc-reid?p=body-part-based-representation-learning-for) Market1501: [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/body-part-based-representation-learning-for/person-re-identification-on-market-1501)](https://paperswithcode.com/sota/person-re-identification-on-market-1501?p=body-part-based-representation-learning-for)  

  ## News - [2025.10.25] Added a discussion on the Theoretical Limitations of Global Embeddings in ReID Under Partial Observations - [2024.08.23] 🚀🔥 Our new work on [Keypoint Promptable ReID](https://arxiv.org/abs/2407.18112) was accepted to ECCV24, full codebase available [here](https://github.com/VlSomers/keypoint_promptable_reidentification). - [2023.09.20] New paper and big update coming soon 🚀 ... - [2023.07.26] The Python script from @samihormi to generate human parsing labels based on PifPaf and MaskRCNN has been released, have a look at the "Generate human parsing labels" section below. This script is different from the one used by the authors (especially when facing multiple pedestrians in a single image): resulting human parsing labels will not be exactly the same. - [2023.06.28] Please find a non-official script to generate human parsing labels from PifPaf and MaskRCNN in this [Pull Request](https://github.com/VlSomers/bpbreid/pull/18). The PR will be merged soon. - [2022.12.02] We release the first version of our codebase. Please update frequently as we will add more documentation during the next few weeks. ## What's next We plan on extending BPBReID in the near future, put a star and stay updated for future changes: - part-based video/tracklet reid - part-based reid for multi-object tracking - ... ## Table of content - [BPBReID: Body Part-based Re-Identification](#bpbreid--body-part-based-re-identification) * [News](#news) * [What's next](#what-s-next) * [Table of content](#table-of-content) * [Introduction](#introduction) * [What to find in this repository](#what-to-find-in-this-repository) * [Discussion on the Theoretical Limitations of Global Embeddings in ReID Under Partial Observations](#discussion-on-the-theoretical-limitations-of-global-embeddings-in-reid-under-partial-observationsRetry) * [Instructions](#instructions) + [Installation](#installation) + [Download human parsing labels](#download-human-parsing-labels) + [Generate human parsing labels](#generate-human-parsing-labels) + [Download the pre-trained models](#download-the-pre-trained-models) + [Inference](#inference) + [Training](#training) + [Visualization tools](#visualization-tools) * [Other works](#other-works) * [Questions and suggestions](#questions-and-suggestions) * [Citation](#citation) * [Acknowledgement](#acknowledgement) ## Introduction Welcome to the official repository for our WACV23 paper "_Body Part-Based Representation Learning for Occluded Person Re-Identification_". In this work, we propose BPBReID, a part-based method for person re-identification using body part feature representations to compute to similarity between two samples. As illustrated in the figure below, **part-based** ReID methods output multiple features per input sample, i.e. one for each part, whereas standard global methods only output a single feature. Compared to global methods, part-based ones come with some advantages: 1. They achieve explicit appearance feature alignement for better ReID accuracy. 2. They are robust to occlusions, since only mutually visible parts are used when comparing two samples. Our model BPBreID uses pseudo human parsing labels at training time to learn an attention mechanism. This attention mechanism has K branches to pool the global spatial feature map into K body part-based embeddings. Based on the attention maps activations, visibility scores are computed for each part. At test time, no human parsing labels is required. The final similarity score between two samples is computed using the average distance of all mutually visible part-based embeddings. Please refer to [our paper](https://arxiv.org/abs/2211.03679) for more information. ## What to find in this repository In this repository, we propose a framework and a strong baseline to support further research on part-based ReID methods. Our code is based on the popular [Torchreid](https://github.com/KaiyangZhou/deep-person-reid) framework for person re-identification. In this codebase, we provide several adaptations to the original framework to support part-based ReID methods: - The [ImagePartBasedEngine](torchreid/engine/image/part_based_engine.py) to train/test part-based models, compute query-gallery distance matrix using multiple features per test sample with support for visibility scores. - The fully configurable [GiLt loss](/torchreid/losses/GiLt_loss.py) to selectively apply id/triplet loss on holistics (global) and part-based features. - The [BodyPartAttentionLoss](torchreid/losses/body_part_attention_loss.py) to train the attention mechanism. - The [BPBreID](torchreid/models/bpbreid.py) part-based model to compute part-based features with support for body-part learnable attention, fixed attention heatmaps from an external model, PCB-like horizontal stripes, etc. - The [Albumentation](https://albumentations.ai/) data augmentation library used for data augmentation, with support for external heatmaps/masks transforms. - Support for [Weights & Biases](https://wandb.ai/site) and other logging tools in the [Logger](torchreid/utils/logging/logger.py) class. - An [EngineState](torchreid/utils/engine_state.py) class to keep track of training epoch, etc. - A new [ranking visualization](torchreid/utils/visualization/visualize_query_gallery_rankings.py) tool to display part heatmaps, local distance for each part and other metrics. - For more information about all available configuration and parameters, please have a look at the [default config file](torchreid/scripts/default_config.py). You can also have a look at the original [Torchreid README](Torchreid_original_README.rst) for additional information, such as documentation, how-to instructions, etc. Be aware that some of the original Torchreid functionnality and models might be broken (for example, we don't support video re-id yet). ## Discussion on the Theoretical Limitations of Global Embeddings in ReID Under Partial Observations (Occlusions) Current ReID models aim to learn a single global embedding where images of the same person cluster together. However, this paradigm faces an inherent paradox when dealing with partial observations. Consider three images of the same person: a full-body image (A) and two occluded variations - an upper-body-only image (B) and a lower-body-only image (C). This becomes particularly problematic when comparing B with C - they share no common visible features despite representing the same person. The global embedding approach implicitly assumes transitivity (if A=B and B=C, then A=C), but this property breaks down under partial observations, revealing a fundamental flaw in the single embedding space paradigm. This observation suggests that the traditional approach of mapping all images to a unified embedding space may be fundamentally flawed. ReID models do not learn truly identity-centric representations, but rather appearance-centric representations that serve as a proxy to model identity. These appearance-centric representations are learned to be invariant to pose, lighting, and viewpoint changes, but when faced with partial observations, expecting such models to produce consistent embeddings becomes theoretically questionable. This theoretical limitation is reflected in practice, where part-based representation learning, which compares only mutually visible regions, offer a more principled solution to this ambiguity. Rather than being a technical hack to improve image retrieval performance, they may represent a fundamental requirement for representation learning under partial observation constraints. Nevertheless, this apparent limitation of deep metric learning deserves further investigation. We identify two promising research directions. First, developing generic part-based architectures for representation learning that extend beyond human ReID and dynamically align comparable features while discarding missing information across object pairs. Such generic part-based methods would learn to build representations under partial observability without priors on the input object type, making them truly universal solutions. Second, moving beyond representation learning entirely by designing ReID models that directly process image pairs to compute similarity scores, thus avoiding the ambiguity of intermediate representation in an embedding space under partial observations. In this paradigm, the network would inherently learn to compare only mutually visible parts of the input image pair. This approach faces however two key limitations compared to the first one: the need to run the neural network for every possible image pair, leading to quadratic computational complexity with gallery size (versus running the network once per image in representation learning), and the challenge of quantifying the confidence in the output similarity score when comparing partially visible objects. ## Instructions ### Installation Make sure [conda](https://www.anaconda.com/distribution/) is installed. # clone this repository git clone https://github.com/VlSomers/bpbreid # create conda environment cd bpbreid/ # enter project folder conda create --name bpbreid python=3.10 conda activate bpbreid # install dependencies # make sure `which python` and `which pip` point to the correct path pip install -r requirements.txt # install torch and torchvision (select the proper cuda version to suit your machine) conda install pytorch torchvision cudatoolkit=9.0 -c pytorch # install torchreid (don't need to re-build it if you modify the source code) python setup.py develop ### Download human parsing labels You can download the human parsing labels on [GDrive](https://drive.google.com/drive/folders/1IbCAbjj3XtV3_tFOsCuqBi79ZiDqNc1H?usp=sharing). These labels were generated using the [PifPaf](https://github.com/openpifpaf/openpifpaf) pose estimation model and then filtered using segmentation masks from [Mask-RCNN](https://github.com/facebookresearch/detectron2). We provide the labels for five datasets: **Market-1501**, **DukeMTMC-reID**, **Occluded-Duke**, **Occluded-ReID** and **P-DukeMTMC**. After downloading, unzip the file and put the `masks` folder under the corresponding dataset directory. For instance, Market-1501 should look like this: Market-1501-v15.09.15 ├── bounding_box_test ├── bounding_box_train ├── masks │   └── pifpaf_maskrcnn_filtering │   ├── bounding_box_test │   ├── bounding_box_train │   └── query └── query Make also sure to set `data.root` config to your dataset root directory path, i.e., all your datasets folders (`Market-1501-v15.09.15`, `DukeMTMC-reID`, `Occluded_Duke`, `P-DukeMTMC-reID`, `Occluded_REID`) should be under this path. ### Generate human parsing labels You can create human parsing labels for your own dataset using the following command: conda activate bpbreid python torchreid/scripts/get_labels --source [Dataset Path] The labels will be saved under the source directory in the *masks* folder as per the code convention. ### Download the pre-trained models We also provide some [state-of-the-art pre-trained models](https://drive.google.com/drive/folders/1aUjpSXXVGtAh2nzV0RVsCq0tTXuDZWoH?usp=sharing) based on the HRNet-W32 backbone. You can put the downloaded weights under a 'pretrained_models/' directory or specify the path to the pre-trained weights using the `model.load_weights` parameter in the `yaml` config. The configuration used to obtain the pre-trained weights is also saved within the `.pth` file: make sure to set `model.load_config` to `True` so that the parameters under the `model.bpbreid` part of the configuration tree will be loaded from this file. ### Inference You can test the above downloaded models using the following command: conda activate bpbreid python torchreid/scripts/main.py --config-file configs/bpbreid/bpbreid__test.yaml For instance, for the Market-1501 dataset: conda activate bpbreid python torchreid/scripts/main.py --config-file configs/bpbreid/bpbreid_market1501_test.yaml Configuration files for other datasets are available under `configs/bpbreid/`. Make sure the `model.load_weights` in these `yaml` config files points to the pre-trained weights you just downloaded. ### Training Training configs for five datasets (Market-1501, DukeMTMC-reID, Occluded-Duke, Occluded-ReID and P-DukeMTMC) are provided in the `configs/bpbreid/` folder. A training procedure can be launched with: conda activate bpbreid python ./torchreid/scripts/main.py --config-file configs/bpbreid/bpbreid__train.yaml For instance, for the Occluded-Duke dataset: conda activate bpbreid python torchreid/scripts/main.py --config-file configs/bpbreid/bpbreid_occ_duke_train.yaml Make sure to download and install the human parsing labels for your training dataset before runing this command. ### Visualization tools The ranking visualization tool can be activated by setting the `test.visrank` config to `True`. As illustrated below, this tool displays the Top-K ranked samples as rows (K can be set via `test.visrank_topk`). The first row with blue background is the query, and the following green/red rows indicated correct/incorrect matches. The attention maps for each test embedding (foreground, parts, etc) are displayed in the row. An attention map has a green/red border when it is visible/unvisible. The first number under each attention map indicate the visibility score and the second number indicate the distance of the embedding to the corresponding query embedding. The distances under the images in the first column on the left are the global distances of that sample to the query, which is usually computed as the average of all other distances weighted by the visibility score. If you need more information about the visualization tool, fell free to open an issue.  

  ## Other works If you are looking for datasets to evaluate your re-identification models, please have a look at our other works on player re-identification for team sport events: - [CAMELTrack: Context-Aware Multi-cue ExpLoitation for Online Multi-Object Tracking](https://github.com/TrackingLaboratory/CAMELTrack) - The [SoccerNet Player Re-Identification](https://github.com/SoccerNet/sn-reid) dataset - The [DeepSportRadar Player Re-Identification](https://github.com/DeepSportRadar/player-reidentification-challenge) dataset

## Questions and suggestions If you have any question/suggestion, or find any bug/issue with the code, please raise a GitHub issue in this repository, I'll be glab to help you as much as I can! I'll try to update the documentation regularly based on your questions. ## Citation If you use this repository for your research or wish to refer to our method [BPBReID](https://arxiv.org/abs/2211.03679), please use the following BibTeX entry: ``` @article{bpbreid, archivePrefix = {arXiv}, arxivId = {2211.03679}, author = {Somers, Vladimir and {De Vleeschouwer}, Christophe and Alahi, Alexandre}, doi = {10.48550/arxiv.2211.03679}, eprint = {2211.03679}, isbn = {2211.03679v1}, journal = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV23)}, month = {nov}, title = {{Body Part-Based Representation Learning for Occluded Person Re-Identification}}, url = {https://arxiv.org/abs/2211.03679v1 http://arxiv.org/abs/2211.03679}, year = {2023} } ``` ## Acknowledgement This codebase is a fork from [Torchreid](https://github.com/KaiyangZhou/deep-person-reid) ================================================ FILE: Torchreid_original_README.rst ================================================ Torchreid =========== Torchreid is a library for deep-learning person re-identification, written in `PyTorch `_. It features: - multi-GPU training - support both image- and video-reid - end-to-end training and evaluation - incredibly easy preparation of reid datasets - multi-dataset training - cross-dataset evaluation - standard protocol used by most research papers - highly extensible (easy to add models, datasets, training methods, etc.) - implementations of state-of-the-art deep reid models - access to pretrained reid models - advanced training techniques - visualization tools (tensorboard, ranks, etc.) Code: https://github.com/KaiyangZhou/deep-person-reid. Documentation: https://kaiyangzhou.github.io/deep-person-reid/. How-to instructions: https://kaiyangzhou.github.io/deep-person-reid/user_guide. Model zoo: https://kaiyangzhou.github.io/deep-person-reid/MODEL_ZOO. Tech report: https://arxiv.org/abs/1910.10093. You can find some research projects that are built on top of Torchreid `here `_. What's new ------------ - [May 2020] Added the person attribute recognition code used in [Omni-Scale Feature Learning for Person Re-Identification (ICCV'19)](https://arxiv.org/abs/1905.00953). - [May 2020] ``1.2.1``: Added a simple API for feature extraction (``torchreid/utils/feature_extractor.py``). See the `documentation `_ for the instruction. - [Apr 2020] Code for reproducing the experiments of `deep mutual learning `_ in the `OSNet paper `__ (Supp. B) has been released at ``projects/DML``. - [Apr 2020] Upgraded to ``1.2.0``. The engine class has been made more model-agnostic to improve extensibility. See `Engine `_ and `ImageSoftmaxEngine `_ for more details. Credit to `Dassl.pytorch `_. - [Dec 2019] Our `OSNet paper `_ has been updated, with additional experiments (in section B of the supplementary) showing some useful techniques for improving OSNet's performance in practice. - [Nov 2019] ``ImageDataManager`` can load training data from target datasets by setting ``load_train_targets=True``, and the train-loader can be accessed with ``train_loader_t = datamanager.train_loader_t``. This feature is useful for domain adaptation research. Installation --------------- Make sure `conda `_ is installed. .. code-block:: bash # cd to your preferred directory and clone this repo git clone https://github.com/KaiyangZhou/deep-person-reid.git # create environment cd deep-person-reid/ conda create --name torchreid python=3.7 conda activate torchreid # install dependencies # make sure `which python` and `which pip` point to the correct path pip install -r requirements.txt # install torch and torchvision (select the proper cuda version to suit your machine) conda install pytorch torchvision cudatoolkit=9.0 -c pytorch # install torchreid (don't need to re-build it if you modify the source code) python setup.py develop Get started: 30 seconds to Torchreid ------------------------------------- 1. Import ``torchreid`` .. code-block:: python import torchreid 2. Load data manager .. code-block:: python datamanager = torchreid.data.ImageDataManager( root='reid-data', sources='market1501', targets='market1501', height=256, width=128, batch_size_train=32, batch_size_test=100, transforms=['random_flip', 'random_crop'] ) 3 Build model, optimizer and lr_scheduler .. code-block:: python model = torchreid.models.build_model( name='resnet50', num_classes=datamanager.num_train_pids, loss='softmax', pretrained=True ) model = model.cuda() optimizer = torchreid.optim.build_optimizer( model, optim='adam', lr=0.0003 ) scheduler = torchreid.optim.build_lr_scheduler( optimizer, lr_scheduler='single_step', stepsize=20 ) 4. Build engine .. code-block:: python engine = torchreid.engine.ImageSoftmaxEngine( datamanager, model, optimizer=optimizer, scheduler=scheduler, label_smooth=True ) 5. Run training and test .. code-block:: python engine.run( save_dir='log/resnet50', max_epoch=60, eval_freq=10, test_only=False ) A unified interface ----------------------- In "deep-person-reid/scripts/", we provide a unified interface to train and test a model. See "scripts/main.py" and "scripts/default_config.py" for more details. The folder "configs/" contains some predefined configs which you can use as a starting point. Below we provide an example to train and test `OSNet (Zhou et al. ICCV'19) `_. Assume :code:`PATH_TO_DATA` is the directory containing reid datasets. The environmental variable :code:`CUDA_VISIBLE_DEVICES` is omitted, which you need to specify if you have a pool of gpus and want to use a specific set of them. Conventional setting ^^^^^^^^^^^^^^^^^^^^^ To train OSNet on Market1501, do .. code-block:: bash python scripts/main.py \ --config-file configs/im_osnet_x1_0_softmax_256x128_amsgrad_cosine.yaml \ --transforms random_flip random_erase \ --root $PATH_TO_DATA The config file sets Market1501 as the default dataset. If you wanna use DukeMTMC-reID, do .. code-block:: bash python scripts/main.py \ --config-file configs/im_osnet_x1_0_softmax_256x128_amsgrad_cosine.yaml \ -s dukemtmcreid \ -t dukemtmcreid \ --transforms random_flip random_erase \ --root $PATH_TO_DATA \ data.save_dir log/osnet_x1_0_dukemtmcreid_softmax_cosinelr The code will automatically (download and) load the ImageNet pretrained weights. After the training is done, the model will be saved as "log/osnet_x1_0_market1501_softmax_cosinelr/model.pth.tar-250". Under the same folder, you can find the `tensorboard `_ file. To visualize the learning curves using tensorboard, you can run :code:`tensorboard --logdir=log/osnet_x1_0_market1501_softmax_cosinelr` in the terminal and visit :code:`http://localhost:6006/` in your web browser. Evaluation is automatically performed at the end of training. To run the test again using the trained model, do .. code-block:: bash python scripts/main.py \ --config-file configs/im_osnet_x1_0_softmax_256x128_amsgrad_cosine.yaml \ --root $PATH_TO_DATA \ model.load_weights log/osnet_x1_0_market1501_softmax_cosinelr/model.pth.tar-250 \ test.evaluate True Cross-domain setting ^^^^^^^^^^^^^^^^^^^^^ Suppose you wanna train OSNet on DukeMTMC-reID and test its performance on Market1501, you can do .. code-block:: bash python scripts/main.py \ --config-file configs/im_osnet_x1_0_softmax_256x128_amsgrad.yaml \ -s dukemtmcreid \ -t market1501 \ --transforms random_flip color_jitter \ --root $PATH_TO_DATA Here we only test the cross-domain performance. However, if you also want to test the performance on the source dataset, i.e. DukeMTMC-reID, you can set :code:`-t dukemtmcreid market1501`, which will evaluate the model on the two datasets separately. Different from the same-domain setting, here we replace :code:`random_erase` with :code:`color_jitter`. This can improve the generalization performance on the unseen target dataset. Pretrained models are available in the `Model Zoo `_. Datasets -------- Image-reid datasets ^^^^^^^^^^^^^^^^^^^^^ - `Market1501 `_ - `CUHK03 `_ - `DukeMTMC-reID `_ - `MSMT17 `_ - `VIPeR `_ - `GRID `_ - `CUHK01 `_ - `SenseReID `_ - `QMUL-iLIDS `_ - `PRID `_ Video-reid datasets ^^^^^^^^^^^^^^^^^^^^^^^ - `MARS `_ - `iLIDS-VID `_ - `PRID2011 `_ - `DukeMTMC-VideoReID `_ Models ------- ImageNet classification models ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - `ResNet `_ - `ResNeXt `_ - `SENet `_ - `DenseNet `_ - `Inception-ResNet-V2 `_ - `Inception-V4 `_ - `Xception `_ - `IBN-Net `_ Lightweight models ^^^^^^^^^^^^^^^^^^^ - `NASNet `_ - `MobileNetV2 `_ - `ShuffleNet `_ - `ShuffleNetV2 `_ - `SqueezeNet `_ ReID-specific models ^^^^^^^^^^^^^^^^^^^^^^ - `MuDeep `_ - `ResNet-mid `_ - `HACNN `_ - `PCB `_ - `MLFN `_ - `OSNet `_ - `OSNet-AIN `_ Useful links ------------- - `OSNet-IBN1-Lite (test-only code with lite docker container) `_ - `Deep Learning for Person Re-identification: A Survey and Outlook `_ Citation --------- If you find this code useful to your research, please cite the following papers. .. code-block:: bash @article{torchreid, title={Torchreid: A Library for Deep Learning Person Re-Identification in Pytorch}, author={Zhou, Kaiyang and Xiang, Tao}, journal={arXiv preprint arXiv:1910.10093}, year={2019} } @inproceedings{zhou2019osnet, title={Omni-Scale Feature Learning for Person Re-Identification}, author={Zhou, Kaiyang and Yang, Yongxin and Cavallaro, Andrea and Xiang, Tao}, booktitle={ICCV}, year={2019} } @article{zhou2019learning, title={Learning Generalisable Omni-Scale Representations for Person Re-Identification}, author={Zhou, Kaiyang and Yang, Yongxin and Cavallaro, Andrea and Xiang, Tao}, journal={arXiv preprint arXiv:1910.06827}, year={2019} } ================================================ FILE: configs/bpbreid/bpbreid_dukemtmc_test.yaml ================================================ data: root: '~/datasets/reid' sources: ['dukemtmcreid'] targets: ['dukemtmcreid'] height: 384 width: 128 transforms: ['rc', 're'] model: name: 'bpbreid' load_weights: 'pretrained_models/bpbreid_dukemtmcreid_hrnet32_10669.pth' load_config: True bpbreid: mask_filtering_training: False mask_filtering_testing: True learnable_attention_enabled: True backbone: 'hrnet32' test_embeddings: ['bn_foreg', 'parts'] masks: dir: 'pifpaf_maskrcnn_filtering' preprocess: 'five_v' test: evaluate: True batch_size: 64 visrank: True ================================================ FILE: configs/bpbreid/bpbreid_dukemtmc_train.yaml ================================================ data: root: '~/datasets/reid' sources: ['dukemtmcreid'] targets: ['dukemtmcreid'] height: 384 width: 128 transforms: ['rc', 're'] model: name: 'bpbreid' bpbreid: mask_filtering_training: False mask_filtering_testing: True learnable_attention_enabled: True backbone: 'hrnet32' test_embeddings: ['bn_foreg', 'parts'] masks: dir: 'pifpaf_maskrcnn_filtering' preprocess: 'five_v' loss: name: 'part_based' part_based: name: 'part_averaged_triplet_loss' ppl: 'cl' weights: # SOTA weights for GiLt loss globl: id: 1. tr: 0. foreg: id: 1. tr: 1. conct: id: 1. tr: 0. parts: id: 0. tr: 1. pixls: ce: 0.35 train: batch_size: 64 test: evaluate: False batch_size: 64 visrank: True ================================================ FILE: configs/bpbreid/bpbreid_market1501_test.yaml ================================================ data: root: '~/datasets/reid' sources: ['market1501'] targets: ['market1501'] height: 384 width: 128 transforms: ['rc', 're'] model: name: 'bpbreid' load_weights: 'pretrained_models/bpbreid_market1501_hrnet32_10642.pth' load_config: True bpbreid: mask_filtering_training: False mask_filtering_testing: True learnable_attention_enabled: True backbone: 'hrnet32' test_embeddings: ['bn_foreg', 'parts'] masks: dir: 'pifpaf_maskrcnn_filtering' preprocess: 'five_v' test: evaluate: True batch_size: 64 visrank: True ================================================ FILE: configs/bpbreid/bpbreid_market1501_train.yaml ================================================ data: root: '~/datasets/reid' sources: ['market1501'] targets: ['market1501'] height: 384 width: 128 transforms: ['rc', 're'] model: name: 'bpbreid' bpbreid: mask_filtering_training: False mask_filtering_testing: True learnable_attention_enabled: True backbone: 'hrnet32' test_embeddings: ['bn_foreg', 'parts'] masks: dir: 'pifpaf_maskrcnn_filtering' preprocess: 'five_v' loss: name: 'part_based' part_based: name: 'part_averaged_triplet_loss' ppl: 'cl' weights: # SOTA weights for GiLt loss globl: id: 1. tr: 0. foreg: id: 1. tr: 1. conct: id: 1. tr: 0. parts: id: 0. tr: 1. pixls: ce: 0.35 train: batch_size: 64 test: evaluate: False batch_size: 64 visrank: True ================================================ FILE: configs/bpbreid/bpbreid_occ_duke_test.yaml ================================================ data: root: '~/datasets/reid' sources: ['occluded_duke'] targets: ['occluded_duke'] height: 384 width: 128 transforms: ['rc', 're'] model: name: 'bpbreid' load_weights: 'pretrained_models/bpbreid_occluded_duke_hrnet32_10670.pth' load_config: True bpbreid: mask_filtering_training: False mask_filtering_testing: True learnable_attention_enabled: True backbone: 'hrnet32' test_embeddings: ['bn_foreg', 'parts'] masks: dir: 'pifpaf_maskrcnn_filtering' preprocess: 'five_v' test: evaluate: True batch_size: 64 visrank: True ================================================ FILE: configs/bpbreid/bpbreid_occ_duke_train.yaml ================================================ data: root: '~/datasets/reid' sources: ['occluded_duke'] targets: ['occluded_duke'] height: 384 width: 128 transforms: ['rc', 're'] model: name: 'bpbreid' bpbreid: mask_filtering_training: False mask_filtering_testing: True learnable_attention_enabled: True backbone: 'hrnet32' test_embeddings: ['bn_foreg', 'parts'] masks: dir: 'pifpaf_maskrcnn_filtering' preprocess: 'eight' loss: name: 'part_based' part_based: name: 'part_averaged_triplet_loss' ppl: 'cl' weights: # SOTA weights for GiLt loss globl: id: 1. tr: 0. foreg: id: 1. tr: 0. conct: id: 1. tr: 0. parts: id: 0. tr: 1. pixls: ce: 0.35 train: batch_size: 64 test: evaluate: False batch_size: 64 visrank: True ================================================ FILE: configs/bpbreid/bpbreid_occ_reid_test.yaml ================================================ data: root: '~/datasets/reid' sources: ['market1501'] targets: ['occluded_reid'] height: 384 width: 128 transforms: ['rc', 're'] model: name: 'bpbreid' load_weights: 'pretrained_models/' load_config: True bpbreid: mask_filtering_training: False mask_filtering_testing: True learnable_attention_enabled: True backbone: 'hrnet32' test_embeddings: ['bn_foreg', 'parts'] masks: dir: 'pifpaf_maskrcnn_filtering' preprocess: 'five_v' test: evaluate: True batch_size: 64 visrank: True ================================================ FILE: configs/bpbreid/bpbreid_occ_reid_train.yaml ================================================ data: root: '~/datasets/reid' sources: ['market1501'] targets: ['market1501', 'occluded_reid'] height: 384 width: 128 transforms: ['rc', 're', 'rf', 'cj'] model: name: 'bpbreid' bpbreid: mask_filtering_training: False mask_filtering_testing: True learnable_attention_enabled: True backbone: 'hrnet32' test_embeddings: ['bn_foreg', 'parts'] masks: dir: 'pifpaf_maskrcnn_filtering' preprocess: 'eight' loss: name: 'part_based' part_based: name: 'part_averaged_triplet_loss' ppl: 'cl' weights: # SOTA weights for GiLt loss globl: id: 1. tr: 0. foreg: id: 1. tr: 1. conct: id: 1. tr: 0. parts: id: 0. tr: 1. pixls: ce: 0.7 train: batch_size: 64 test: evaluate: False batch_size: 64 visrank: True ================================================ FILE: configs/bpbreid/bpbreid_p_dukemtmc_test.yaml ================================================ data: root: '~/datasets/reid' sources: ['p_dukemtmc_reid'] targets: ['p_dukemtmc_reid'] height: 384 width: 128 transforms: ['rc', 're'] model: name: 'bpbreid' load_weights: 'pretrained_models/bpbreid_p_dukemtmc_hrnet32_10672.pth' load_config: True bpbreid: mask_filtering_training: False mask_filtering_testing: True learnable_attention_enabled: True backbone: 'hrnet32' test_embeddings: ['bn_foreg', 'parts'] masks: dir: 'pifpaf_maskrcnn_filtering' preprocess: 'five_v' test: evaluate: True batch_size: 64 visrank: True ================================================ FILE: configs/bpbreid/bpbreid_p_dukemtmc_train.yaml ================================================ data: root: '~/datasets/reid' sources: ['p_dukemtmc_reid'] targets: ['p_dukemtmc_reid'] height: 384 width: 128 transforms: ['rc', 're'] model: name: 'bpbreid' bpbreid: mask_filtering_training: False mask_filtering_testing: True learnable_attention_enabled: True backbone: 'hrnet32' test_embeddings: ['bn_foreg', 'parts'] masks: dir: 'pifpaf_maskrcnn_filtering' preprocess: 'eight' loss: name: 'part_based' part_based: name: 'part_averaged_triplet_loss' ppl: 'cl' weights: # SOTA weights for GiLt loss globl: id: 1. tr: 0. foreg: id: 1. tr: 0. conct: id: 1. tr: 0. parts: id: 0. tr: 1. pixls: ce: 0.35 train: batch_size: 64 test: evaluate: False batch_size: 64 visrank: True ================================================ FILE: configs/bpbreid/pcb_market1501_train.yaml ================================================ data: root: '~/datasets/reid' sources: ['market1501'] targets: ['market1501'] height: 384 width: 128 transforms: ['rc', 'rf', 're'] model: name: 'bpbreid' bpbreid: mask_filtering_training: False mask_filtering_testing: False # no visibility scores used at training for PCB learnable_attention_enabled: True backbone: 'hrnet32' test_embeddings: ['conct'] # the holistic concatenated embedding alone is used to compute query-gallery distances masks: type: 'stripes' # PCB uses horizontal stripes parts_num: 6 # we use 6 horizontal stripes here loss: name: 'part_based' part_based: name: 'part_averaged_triplet_loss' ppl: 'cl' weights: globl: id: 0. tr: 0. foreg: id: 0. tr: 0. conct: id: 0. tr: 0. parts: id: 1. # For PCB, we use the cross-entropy (identity) loss on each of the 6 horizontal stripes tr: 0. pixls: ce: 0. # no body part prediction loss for PCB train: batch_size: 64 test: evaluate: False batch_size: 64 visrank: True ================================================ FILE: configs/bpbreid/pcb_occ_duke_train.yaml ================================================ data: root: '~/datasets/reid' sources: ['occluded_duke'] targets: ['occluded_duke'] height: 384 width: 128 transforms: ['rc', 'rf', 're'] model: name: 'bpbreid' bpbreid: mask_filtering_training: False mask_filtering_testing: False # no visibility scores used at training for PCB learnable_attention_enabled: True backbone: 'hrnet32' test_embeddings: ['conct'] # the holistic concatenated embedding alone is used to compute query-gallery distances masks: type: 'stripes' # PCB uses horizontal stripes parts_num: 6 # we use 6 horizontal stripes here loss: name: 'part_based' part_based: name: 'part_averaged_triplet_loss' ppl: 'cl' weights: globl: id: 0. tr: 0. foreg: id: 0. tr: 0. conct: id: 0. tr: 0. parts: id: 1. # For PCB, we use the cross-entropy (identity) loss on each of the 6 horizontal stripes tr: 0. pixls: ce: 0. # no body part prediction loss for PCB train: batch_size: 64 test: evaluate: False batch_size: 64 visrank: True ================================================ FILE: docs/AWESOME_REID.md ================================================ # Awesome-ReID Here is a collection of ReID-related research with links to papers and code. You are welcome to submit [PR](https://help.github.com/articles/creating-a-pull-request/)s if you find something missing. - [ArXiv20] Deep Learning for Person Re-identification: A Survey and Outlook [[paper](https://arxiv.org/abs/2001.04193)] [[code](https://github.com/mangye16/ReID-Survey)] - [ArXiv19] Learning Generalisable Omni-Scale Representations for Person Re-Identification [[paper](https://arxiv.org/abs/1910.06827)][[code](https://github.com/KaiyangZhou/deep-person-reid)] - [ICCV19] RGB-Infrared Cross-Modality Person Re-Identification via Joint Pixel and Feature Alignment. [[paper](http://openaccess.thecvf.com/content_ICCV_2019/papers/Wang_RGB-Infrared_Cross-Modality_Person_Re-Identification_via_Joint_Pixel_and_Feature_Alignment_ICCV_2019_paper.pdf)] [[code](https://github.com/wangguanan/AlignGAN)] - [ICCV19] Unsupervised Graph Association for Person Re-identification. [[paper](https://github.com/yichuan9527/Unsupervised-Graph-Association-for-Person-Re-identification)] [[code](https://github.com/yichuan9527/Unsupervised-Graph-Association-for-Person-Re-identification)] - [ICCV19] Self-similarity Grouping: A Simple Unsupervised Cross Domain Adaptation Approach for Person Re-identification. [[paper](http://openaccess.thecvf.com/content_ICCV_2019/papers/Fu_Self-Similarity_Grouping_A_Simple_Unsupervised_Cross_Domain_Adaptation_Approach_for_ICCV_2019_paper.pdf)] [[code](https://github.com/OasisYang/SSG)] - [ICCV19] Spectral Feature Transformation for Person Re-Identification. [[paper](http://openaccess.thecvf.com/content_ICCV_2019/papers/Luo_Spectral_Feature_Transformation_for_Person_Re-Identification_ICCV_2019_paper.pdf)] [[code](https://github.com/LuckyDC/SFT_REID)] - [ICCV19] Beyond Human Parts: Dual Part-Aligned Representations for Person Re-Identification. [[paper](http://openaccess.thecvf.com/content_ICCV_2019/papers/Guo_Beyond_Human_Parts_Dual_Part-Aligned_Representations_for_Person_Re-Identification_ICCV_2019_paper.pdf)] [[code](https://github.com/ggjy/P2Net.pytorch)] - [ICCV19] Co-segmentation Inspired Attention Networks for Video-based Person Re-identification. [[paper](http://openaccess.thecvf.com/content_ICCV_2019/papers/Subramaniam_Co-Segmentation_Inspired_Attention_Networks_for_Video-Based_Person_Re-Identification_ICCV_2019_paper.pdf)][[code](https://github.com/InnovArul/vidreid_cosegmentation)] - [ICCV19] Mixed High-Order Attention Network for Person Re-Identification. [[paper](https://arxiv.org/abs/1908.05819)][[code](https://github.com/chenbinghui1/MHN)] - [ICCV19] ABD-Net: Attentive but Diverse Person Re-Identification. [[paper](https://arxiv.org/abs/1908.01114)] [[code](https://github.com/TAMU-VITA/ABD-Net)] - [ICCV19] Omni-Scale Feature Learning for Person Re-Identification. [[paper](https://arxiv.org/abs/1905.00953)] [[code](https://github.com/KaiyangZhou/deep-person-reid)] - [CVPR19] Joint Discriminative and Generative Learning for Person Re-identification. [[paper](https://arxiv.org/abs/1904.07223)][[code](https://github.com/NVlabs/DG-Net)] - [CVPR19] Invariance Matters: Exemplar Memory for Domain Adaptive Person Re-identification. [[paper](https://arxiv.org/abs/1904.01990)][[code](https://github.com/zhunzhong07/ECN)] - [CVPR19] Dissecting Person Re-identification from the Viewpoint of Viewpoint. [[paper](https://arxiv.org/abs/1812.02162)][[code](https://github.com/sxzrt/Dissecting-Person-Re-ID-from-the-Viewpoint-of-Viewpoint)] - [CVPR19] Unsupervised Person Re-identification by Soft Multilabel Learning. [[paper](https://arxiv.org/abs/1903.06325)][[code](https://github.com/KovenYu/MAR)] - [CVPR19] Patch-based Discriminative Feature Learning for Unsupervised Person Re-identification. [[paper](https://kovenyu.com/publication/2019-cvpr-pedal/)][[code](https://github.com/QizeYang/PAUL)] - [AAAI19] Spatial and Temporal Mutual Promotion for Video-based Person Re-identification. [[paper](https://arxiv.org/abs/1812.10305)][[code](https://github.com/yolomax/person-reid-lib)] - [AAAI19] Spatial-Temporal Person Re-identification. [[paper](https://arxiv.org/abs/1812.03282)][[code](https://github.com/Wanggcong/Spatial-Temporal-Re-identification)] - [AAAI19] Horizontal Pyramid Matching for Person Re-identification. [[paper](https://arxiv.org/abs/1804.05275)][[code](https://github.com/OasisYang/HPM)] - [AAAI19] Backbone Can Not be Trained at Once: Rolling Back to Pre-trained Network for Person Re-identification. [[paper](https://arxiv.org/abs/1901.06140)][[code](https://github.com/youngminPIL/rollback)] - [AAAI19] A Bottom-Up Clustering Approach to Unsupervised Person Re-identification. [[paper](https://vana77.github.io/vana77.github.io/images/AAAI19.pdf)][[code](https://github.com/vana77/Bottom-up-Clustering-Person-Re-identification)] - [NIPS18] FD-GAN: Pose-guided Feature Distilling GAN for Robust Person Re-identification. [[paper](https://arxiv.org/abs/1810.02936)][[code](https://github.com/yxgeee/FD-GAN)] - [ECCV18] Generalizing A Person Retrieval Model Hetero- and Homogeneously. [[paper](http://openaccess.thecvf.com/content_ECCV_2018/papers/Zhun_Zhong_Generalizing_A_Person_ECCV_2018_paper.pdf)][[code](https://github.com/zhunzhong07/HHL)] - [ECCV18] Pose-Normalized Image Generation for Person Re-identification. [[paper](https://arxiv.org/abs/1712.02225)][[code](https://github.com/naiq/PN_GAN)] - [CVPR18] Camera Style Adaptation for Person Re-Identification. [[paper](https://arxiv.org/abs/1711.10295)][[code](https://github.com/zhunzhong07/CamStyle)] - [CVPR18] Deep Group-Shuffling Random Walk for Person Re-Identification. [[paper](https://arxiv.org/abs/1807.11178)][[code](https://github.com/YantaoShen/kpm_rw_person_reid)] - [CVPR18] End-to-End Deep Kronecker-Product Matching for Person Re-identification. [[paper](https://arxiv.org/abs/1807.11182)][[code](https://github.com/YantaoShen/kpm_rw_person_reid)] - [CVPR18] Features for Multi-Target Multi-Camera Tracking and Re-Identification. [[paper](https://arxiv.org/abs/1803.10859)][[code](https://github.com/ergysr/DeepCC)] - [CVPR18] Group Consistent Similarity Learning via Deep CRF for Person Re-Identification. [[paper](http://openaccess.thecvf.com/content_cvpr_2018/papers/Chen_Group_Consistent_Similarity_CVPR_2018_paper.pdf)][[code](https://github.com/dapengchen123/crf_affinity)] - [CVPR18] Harmonious Attention Network for Person Re-Identification. [[paper](https://arxiv.org/abs/1802.08122)][[code](https://github.com/KaiyangZhou/deep-person-reid)] - [CVPR18] Human Semantic Parsing for Person Re-Identification. [[paper](https://arxiv.org/abs/1804.00216)][[code](https://github.com/emrahbasaran/SPReID)] - [CVPR18] Multi-Level Factorisation Net for Person Re-Identification. [[paper](https://arxiv.org/abs/1803.09132)][[code](https://github.com/KaiyangZhou/deep-person-reid)] - [CVPR18] Resource Aware Person Re-identification across Multiple Resolutions. [[paper](https://arxiv.org/abs/1805.08805)][[code](https://github.com/mileyan/DARENet)] - [CVPR18] Exploit the Unknown Gradually: One-Shot Video-Based Person Re-Identification by Stepwise Learning. [[paper](https://yu-wu.net/pdf/CVPR2018_Exploit-Unknown-Gradually.pdf)][[code](https://github.com/Yu-Wu/Exploit-Unknown-Gradually)] - [ArXiv18] Revisiting Temporal Modeling for Video-based Person ReID. [[paper](https://arxiv.org/abs/1805.02104)][[code](https://github.com/jiyanggao/Video-Person-ReID)] ================================================ FILE: docs/MODEL_ZOO.md ================================================ # Model Zoo - Results are presented in the format of **. - When computing model size and FLOPs, only layers that are used at test time are considered (see `torchreid.utils.compute_model_complexity`). - Asterisk (\*) means the model is trained from scratch. - `combineall=True` means all images in the dataset are used for model training. ## ImageNet pretrained models | Model | Download | | :--- | :---: | | shufflenet | [model](https://mega.nz/#!RDpUlQCY!tr_5xBEkelzDjveIYBBcGcovNCOrgfiJO9kiidz9fZM) | | mobilenetv2_x1_0 | [model](https://mega.nz/#!NKp2wAIA!1NH1pbNzY_M2hVk_hdsxNM1NUOWvvGPHhaNr-fASF6c) | | mobilenetv2_x1_4 | [model](https://mega.nz/#!RGhgEIwS!xN2s2ZdyqI6vQ3EwgmRXLEW3khr9tpXg96G9SUJugGk) | | mlfn | [model](https://mega.nz/#!YHxAhaxC!yu9E6zWl0x5zscSouTdbZu8gdFFytDdl-RAdD2DEfpk) | | osnet_x1_0 | [model](https://mega.nz/#!YK5GRARL!F90NsNB2XHjXGZFC3Lrw1GMic0oMw4fnfuDUnSrPAYM) | | osnet_x0_75 | [model](https://mega.nz/#!NPxilYBA!Se414Wtts__7eY6J5FIrowynvjUUG7a8Z5zUPfJN33s) | | osnet_x0_5 | [model](https://mega.nz/#!NO4ihQSJ!oMIRSZ0HlJF_8FKUbXT8Ei0vzH0xUYs5tWaf_KLrODg) | | osnet_x0_25 | [model](https://mega.nz/#!IDwQwaxT!TbQ_33gPK-ZchPFTf43UMc45rlNKWiWMqH4rTXB1T7k) | | osnet_ibn_x1_0 | [model](https://mega.nz/#!8Wo2kSDR!bNvgu4V0VkCQp_L2ZUDaudYKYRCkkSNdzcA1CcZGZTE) | | osnet_ain_x1_0 | [model](https://drive.google.com/open?id=1-CaioD9NaqbHK_kzSMW8VE4_3KcsRjEo) | ## Same-domain ReID | Model | # Param (10^6) | GFLOPs | Loss | Input | Transforms | Distance | market1501 | dukemtmcreid | msmt17 | | :--- | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | | resnet50 | 23.5 | 2.7 | softmax | (256, 128) | `random_flip`, `random_crop` | `euclidean` | [87.9 (70.4)](https://mega.nz/#!FKZjVKaZ!4v_FR8pTvuHoMQIKdstJ_YCsRrtZW2hwWxc-T0JIlHE) | [78.3 (58.9)](https://mega.nz/#!JPZjCYhK!YVJbE_4vTc8DX19Rt_FB77YY4BaEA1P6Xb5sNJGep2M) | [63.2 (33.9)](https://mega.nz/#!APAxDY4Z!Iou9x8s3ATdYS2SlK2oiJbHrhvlzH7F1gE2qjM-GJGw) | | resnet50_fc512 | 24.6 | 4.1 | softmax | (256, 128) | `random_flip`, `random_crop` | `euclidean` | [90.8 (75.3)](https://mega.nz/#!EaZjhKyS!lBvD3vAJ4DOmElZkNa7gyPM1RE661GUd2v9kK84gSZE) | [81.0 (64.0)](https://mega.nz/#!lXYDSKZa!lumiXkY2H5Sm8gEgTWPBdWKv3ujy4zjrffjERaXkc9I) | [69.6 (38.4)](https://mega.nz/#!9PQTXIpL!iI5wgieTCn0Jm-pyg9RCu0RkH43pV3ntHhr1PeqSyT4) | | mlfn | 32.5 | 2.8 | softmax | (256, 128) | `random_flip`, `random_crop` | `euclidean` | [90.1 (74.3)](https://mega.nz/#!kHQ3ESLT!NoGc8eHEBZOJZM19THh3DFfRBXIPXzM-sdLmF1mvTXA) | [81.1 (63.2)](https://mega.nz/#!8PQXUCaI!mJO1vD9tI739hkNBj2QWUt0VPcZ-s89fSMMGPPP1msc) | [66.4 (37.2)](https://mega.nz/#!paIXFQCS!W3ZGkxyF1idwvQzTRDE2p0DhNDki2SBJRfp7S_Cwphk) | | hacnn* | 4.5 | 0.5 | softmax | (160, 64) | `random_flip`, `random_crop` | `euclidean` | [90.9 (75.6)](https://mega.nz/#!ULQXUQBK!S-8v_pR2xBD3ZpuY0I7Bqift-eX_V84gajHMDG6zUac) | [80.1 (63.2)](https://mega.nz/#!wPJTkAQR!XkKd39lsmBZMrCh3JjF6vnNafBZkouVIVdeBqQKdSzA) | [64.7 (37.2)](https://mega.nz/#!AXAziKjL!JtMwHz2UYy58gDMQLGakSmF3JOr72o8zmkqlQA-LIpQ) | | mobilenetv2_x1_0 | 2.2 | 0.2 | softmax | (256, 128) | `random_flip`, `random_crop` | `euclidean` | [85.6 (67.3)](https://mega.nz/#!8KYTFAIB!3dL35WQLxSoTSClDTv0kxa81k3fh5hXmAWA4_a3qiOI) | [74.2 (54.7)](https://mega.nz/#!hbRXDSCL!YYgqJ6PVUf4clgtUuK2s5FRhYJdU3yTibLscwOTNnDk) | [57.4 (29.3)](https://mega.nz/#!5SJTmCYb!ZQ8O2MN9JF4-WDAeX04Xex1KyuBYQ_o2aoMIsTgQ748) | | mobilenetv2_x1_4 | 4.3 | 0.4 | softmax | (256, 128) | `random_flip`, `random_crop` | `euclidean` | [87.0 (68.5)](https://mega.nz/#!4XZhEKCS!6lTuTRbHIWU5nzJzTPDGykA7sPME8_1ISGsUYFJXZWA) | [76.2 (55.8)](https://mega.nz/#!JbQVDIYQ!-7pnjIfpIDt1EoQOvpvuIEcTj3Qg8SE6o_3ZPGWrIcw) | [60.1 (31.5)](https://mega.nz/#!gOYDAQrK!sMJO7c_X4iIxoVfV_tXYdzeDJByPo5XkUjEN7Z2JTmM) | | osnet_x1_0 | 2.2 | 0.98 | softmax | (256, 128) | `random_flip` | `euclidean` | [94.2 (82.6)](https://mega.nz/#!hLoyTSba!fqt7GcKrHJhwe9BtuK0ozgVAQcrlMG8Pm6JsSfr5HEI) | [87.0 (70.2)](https://mega.nz/#!ETwGhQYB!h2gHN-H3J4X4WqcJXy2b0pPKl28paydkiS-PDHsEgPM) | [74.9 (43.8)](https://mega.nz/#!hWxE2aJA!NGcxu5uYH1qI6DfBTu0KFoi_NfoA0TJcBFW-g43pC0I) | | osnet_x0_75 | 1.3 | 0.57 | softmax | (256, 128) | `random_flip` | `euclidean` | [93.7 (81.2)](https://mega.nz/#!JO4WAaJa!nQuoqZnYfy0xu7vs2mp28AFceya-ZhrXTry837jvoDQ) | [85.8 (69.8)](https://mega.nz/#!lOgkEIoI!fQ5vuYIABIOcRxF-OK-6YxtEufWhyVkYkGB4qPoRYJ4) | [72.8 (41.4)](https://mega.nz/#!0exGXI5a!rxtzBayyRK0on0HFq9XO0UtWEBhbV86dFitljhjeWcs) | | osnet_x0_5 | 0.6 | 0.27 | softmax | (256, 128) | `random_flip` | `euclidean` | [92.5 (79.8)](https://mega.nz/#!QCx0RArD!hqz3Mh0Iif5d8PpQW0frxa-Tepn2a2g24aei7du4MFs) | [85.1 (67.4)](https://mega.nz/#!QTxCDIbT!eOZxj4dHl0uFnjKEB-J3YBY98blXZvppgWGA3CGa-tk) | [69.7 (37.5)](https://mega.nz/#!ETpiECDa!CCkq4JryztHqgw7spL5zDw0usJpAfEsSd5gPlkMufCc) | | osnet_x0_25 | 0.2 | 0.08 | softmax | (256, 128) | `random_flip` | `euclidean` | [91.2 (75.0)](https://mega.nz/#!VWxCgSqY!Q4WaQ3j9D7HMhK3jsbvMuwaZ7yBY80T2Zj5V8JAlAKU) | [82.0 (61.4)](https://mega.nz/#!5TpwnATK!UvU_Asdy_aJ9SNzuvqhEFoemxSSB8vm_Gm8Xe03jqiA) | [61.4 (29.5)](https://mega.nz/#!AWgE3SzD!DngUaNyA7VIqOd2gq10Aty_-ER0CmG0xTJLHLj6_36g) | ## Cross-domain ReID #### Market1501 -> DukeMTMC-reID | Model | # Param (10^6) | GFLOPs | Loss | Input | Transforms | Distance | Rank-1 | Rank-5 | Rank-10 | mAP | Download | | :--- | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | | osnet_ibn_x1_0 | 2.2 | 0.98 | softmax | (256, 128) | `random_flip`, `color_jitter` | `euclidean` | 48.5 | 62.3 | 67.4 | 26.7 | [model](https://mega.nz/#!wXwGxKxK!f8EMk8hBt6AjxU3JIPGMFSMvX7j-Nt5Lp1Gpbqso1Ts) | | osnet_ain_x1_0 | 2.2 | 0.98 | softmax | (256, 128) | `random_flip`, `color_jitter` | `cosine` | 52.4 | 66.1 | 71.2 | 30.5 | [model](https://mega.nz/#!QLJE2CRI!FXYc3Vm6Y5Scwx0xvRwBJxId56kf06fIXNLwA_b_1FE) | #### DukeMTMC-reID -> Market1501 | Model | # Param (10^6) | GFLOPs | Loss | Input | Transforms | Distance | Rank-1 | Rank-5 | Rank-10 | mAP | Download | | :--- | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | | osnet_ibn_x1_0 | 2.2 | 0.98 | softmax | (256, 128) | `random_flip`, `color_jitter` | `euclidean` | 57.7 | 73.7 | 80.0 | 26.1 | [model](https://mega.nz/#!FD4WEKJS!ZGgI-2IwVuX6re09xylChR03o6Dkjpi6KSebrbS0fAA) | | osnet_ain_x1_0 | 2.2 | 0.98 | softmax | (256, 128) | `random_flip`, `color_jitter` | `cosine` | 61.0 | 77.0 | 82.5 | 30.6 | [model](https://mega.nz/#!4PBQlCCL!9yMHu1WyyBVxqssubLAEyoEfHUiNP4Ggg5On0nCX2S4) | #### MSMT17 (`combineall=True`) -> Market1501 & DukeMTMC-reID | Model | # Param (10^6) | GFLOPs | Loss | Input | Transforms | Distance | msmt17 -> market1501 | msmt17 -> dukemtmcreid | Download | | :--- | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | | resnet50 | 23.5 | 2.7 | softmax | (256, 128) | `random_flip`, `color_jitter` | `euclidean` | 46.3 (22.8) | 52.3 (32.1) | [model](https://mega.nz/#!VTpkWSbS!Y8gDnmg7u-sPwnZDhWXrtZNYOj7UYL4QzZkhDf1qWW4) | | osnet_x1_0 | 2.2 | 0.98 | softmax | (256, 128) | `random_flip`, `color_jitter` | `euclidean` | 66.6 (37.5) | 66.0 (45.3) | [model](https://mega.nz/#!MepG3QRC!Lb-C9d7rdS_YJjGSoJ5cRlzjYcP28P_1Cm5S5WSslW0) | | osnet_x0_75 | 1.3 | 0.57 | softmax | (256, 128) | `random_flip`, `color_jitter` | `euclidean` | 63.6 (35.5) | 65.3 (44.5) | [model](https://mega.nz/#!tO4WDagL!8Tl6kdJWRXRHQb16GeUHR008tJqW3N7_3fyVMu-LcKM) | | osnet_x0_5 | 0.6 | 0.27 | softmax | (256, 128) | `random_flip`, `color_jitter` | `euclidean` | 64.3 (34.9) | 65.2 (43.3) | [model](https://mega.nz/#!papSWQhY!IId-QfcHj7nXQ_muUubgv9_n0SsnZzarmb5mQgcMv74) | | osnet_x0_25 | 0.2 | 0.08 | softmax | (256, 128) | `random_flip`, `color_jitter` | `euclidean` | 59.9 (31.0) | 61.5 (39.6) | [model](https://mega.nz/#!QCoE0Kpa!BITLANumgjiR68TUFteL__N_RIoDKkL0M5Bl3Q8LC3U) | | osnet_ibn_x1_0 | 2.2 | 0.98 | softmax | (256, 128) | `random_flip`, `color_jitter` | `euclidean` | 66.5 (37.2) | 67.4 (45.6) | [model](https://mega.nz/#!dL4Q2K5B!ZdHQ_X_rs2T-xmggigM5YvzJhmT1orkr6aQ1_fHgunM) | | osnet_ain_x1_0 | 2.2 | 0.98 | softmax | (256, 128) | `random_flip`, `color_jitter` | `cosine` | 70.1 (43.3) | 71.1 (52.7) | [model](https://mega.nz/#!YTZFnSJY!wlbo_5oa2TpDAGyWCTKTX1hh4d6DvJhh_RUA2z6i_so) | ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/conf.py ================================================ # -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath('..')) # -- Project information ----------------------------------------------------- project = u'torchreid' copyright = u'2019, Kaiyang Zhou' author = u'Kaiyang Zhou' version_file = '../torchreid/__init__.py' with open(version_file, 'r') as f: exec(compile(f.read(), version_file, 'exec')) __version__ = locals()['__version__'] # The short X.Y version version = __version__ # The full version, including alpha/beta/rc tags release = __version__ # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinxcontrib.napoleon', 'sphinx.ext.viewcode', 'sphinx.ext.githubpages', 'sphinx_markdown_tables', ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] # source_suffix = '.rst' source_parsers = {'.md': 'recommonmark.parser.CommonMarkParser'} # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [u'_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'torchreiddoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ ( master_doc, 'torchreid.tex', u'torchreid Documentation', u'Kaiyang Zhou', 'manual' ), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'torchreid', u'torchreid Documentation', [author], 1) ] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( master_doc, 'torchreid', u'torchreid Documentation', author, 'torchreid', 'One line description of project.', 'Miscellaneous' ), ] # -- Options for Epub output ------------------------------------------------- # Bibliographic Dublin Core info. epub_title = project # The unique identifier of the text. This can be a ISBN number # or the project homepage. # # epub_identifier = '' # A unique identification for the text. # # epub_uid = '' # A list of files that should not be packed into the epub file. epub_exclude_files = ['search.html'] # -- Extension configuration ------------------------------------------------- ================================================ FILE: docs/datasets.rst ================================================ .. _datasets: Datasets ========= Here we provide a guide on how to prepare datasets. Suppose you wanna store the reid data in a directory called "path/to/reid-data/", you need to specify the ``root`` as *root='path/to/reid-data/'* when initializing ``DataManager``. Below we use ``$REID`` to denote "path/to/reid-data". Please refer to :ref:`torchreid_data` for details regarding the arguments. .. note:: Dataset with a :math:`\dagger` superscript means that the process is automated, so you can directly call the dataset in ``DataManager`` (which automatically downloads the dataset and organizes the data structure). However, we also provide a way below to help the manual setup in case the automation fails. .. note:: The keys to use specific datasets are enclosed in the parantheses beside the datasets. .. note:: You are suggested to use the provided names for dataset folders such as "market1501" for Market1501 and "dukemtmcreid" for DukeMTMC-reID when doing the manual setup, otherwise you need to modify the source code accordingly (i.e. the ``dataset_dir`` attribute). .. contents:: :local: Image Datasets -------------- Market1501 :math:`^\dagger` (``market1501``) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Create a directory named "market1501" under ``$REID``. - Download the dataset to "market1501" from http://www.liangzheng.org/Project/project_reid.html and extract the files. - The data structure should look like .. code-block:: none market1501/ Market-1501-v15.09.15/ query/ bounding_box_train/ bounding_box_test/ - To use the extra 500K distractors (i.e. Market1501 + 500K), go to the **Market-1501+500k Dataset** section at http://www.liangzheng.org/Project/project_reid.html, download the zip file "distractors_500k.zip" and extract it under "market1501/Market-1501-v15.09.15". The argument to use these 500K distrctors is ``market1501_500k`` in ``ImageDataManager``. CUHK03 (``cuhk03``) ^^^^^^^^^^^^^^^^^^^^^ - Create a folder named "cuhk03" under ``$REID``. - Download the dataset to "cuhk03/" from http://www.ee.cuhk.edu.hk/~xgwang/CUHK_identification.html and extract "cuhk03_release.zip", resulting in "cuhk03/cuhk03_release/". - Download the new split (767/700) from `person-re-ranking `_. What you need are "cuhk03_new_protocol_config_detected.mat" and "cuhk03_new_protocol_config_labeled.mat". Put these two mat files under "cuhk03/". - The data structure should look like .. code-block:: none cuhk03/ cuhk03_release/ cuhk03_new_protocol_config_detected.mat cuhk03_new_protocol_config_labeled.mat - In the default mode, we load data using the new split (767/700). If you wanna use the original (20) splits (1367/100), please set ``cuhk03_classic_split`` to True in ``ImageDataManager``. As the CMC is computed differently from Market1501 for the 1367/100 split (see `here `_), you need to enable ``use_metric_cuhk03`` in ``ImageDataManager`` to activate the *single-gallery-shot* metric for fair comparison with some methods that adopt the old splits (*do not need to report mAP*). In addition, we support both *labeled* and *detected* modes. The default mode loads *detected* images. Enable ``cuhk03_labeled`` in ``ImageDataManager`` if you wanna train and test on *labeled* images. .. note:: The code will extract images in "cuhk-03.mat" and save them under "cuhk03/images_detected" and "cuhk03/images_labeled". Also, four json files will be automatically generated, i.e. "splits_classic_detected.json", "splits_classic_labeled.json", "splits_new_detected.json" and "splits_new_labeled.json". If the parent path of ``$REID`` is changed, these json files should be manually deleted. The code can automatically generate new json files to match the new path. DukeMTMC-reID :math:`^\dagger` (``dukemtmcreid``) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Create a directory called "dukemtmc-reid" under ``$REID``. - Download "DukeMTMC-reID" from http://vision.cs.duke.edu/DukeMTMC/ and extract it under "dukemtmc-reid". - The data structure should look like .. code-block:: none dukemtmc-reid/ DukeMTMC-reID/ query/ bounding_box_train/ bounding_box_test/ ... MSMT17 (``msmt17``) ^^^^^^^^^^^^^^^^^^^^^ - Create a directory called "msmt17" under ``$REID``. - Download the dataset from http://www.pkuvmc.com/publications/msmt17.html to "msmt17" and extract the files. - The data structure should look like .. code-block:: none msmt17/ MSMT17_V1/ # or MSMT17_V2 train/ test/ list_train.txt list_query.txt list_gallery.txt list_val.txt VIPeR :math:`^\dagger` (``viper``) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - The download link is http://users.soe.ucsc.edu/~manduchi/VIPeR.v1.0.zip. - Organize the dataset in a folder named "viper" as follows .. code-block:: none viper/ VIPeR/ cam_a/ cam_b/ GRID :math:`^\dagger` (``grid``) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - The download link is http://personal.ie.cuhk.edu.hk/~ccloy/files/datasets/underground_reid.zip. - Organize the dataset in a folder named "grid" as follows .. code-block:: none grid/ underground_reid/ probe/ gallery/ ... CUHK01 (``cuhk01``) ^^^^^^^^^^^^^^^^^^^^^^^^ - Create a folder named "cuhk01" under ``$REID``. - Download "CUHK01.zip" from http://www.ee.cuhk.edu.hk/~xgwang/CUHK_identification.html and place it under "cuhk01/". - The code can automatically extract the files, or you can do it yourself. - The data structure should look like .. code-block:: none cuhk01/ campus/ SenseReID (``sensereid``) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Create "sensereid" under ``$REID``. - Download the dataset from this `link `_ and extract it to "sensereid". - Organize the data to be like .. code-block:: none sensereid/ SenseReID/ test_probe/ test_gallery/ QMUL-iLIDS :math:`^\dagger` (``ilids``) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Create a folder named "ilids" under ``$REID``. - Download the dataset from http://www.eecs.qmul.ac.uk/~jason/data/i-LIDS_Pedestrian.tgz and organize it to look like .. code-block:: none ilids/ i-LIDS_Pedestrian/ Persons/ PRID (``prid``) ^^^^^^^^^^^^^^^^^^^ - Create a directory named "prid2011" under ``$REID``. - Download the dataset from https://www.tugraz.at/institute/icg/research/team-bischof/lrs/downloads/PRID11/ and extract it under "prid2011". - The data structure should end up with .. code-block:: none prid2011/ prid_2011/ single_shot/ multi_shot/ CUHK02 (``cuhk02``) ^^^^^^^^^^^^^^^^^^^^^ - Create a folder named "cuhk02" under ``$REID``. - Download the data from http://www.ee.cuhk.edu.hk/~xgwang/CUHK_identification.html and put it under "cuhk02/". - Extract the file so the data structure looks like .. code-block:: none cuhk02/ Dataset/ P1/ P2/ P3/ P4/ P5/ Video Datasets -------------- MARS (``mars``) ^^^^^^^^^^^^^^^^^ - Create "mars/" under ``$REID``. - Download the dataset from http://www.liangzheng.com.cn/Project/project_mars.html and place it in "mars/". - Extract "bbox_train.zip" and "bbox_test.zip". - Download the split metadata from https://github.com/liangzheng06/MARS-evaluation/tree/master/info and put "info/" in "mars/". - The data structure should end up with .. code-block:: none mars/ bbox_test/ bbox_train/ info/ iLIDS-VID :math:`^\dagger` (``ilidsvid``) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Create "ilids-vid" under ``$REID``. - Download the dataset from http://www.eecs.qmul.ac.uk/~xiatian/downloads_qmul_iLIDS-VID_ReID_dataset.html to "ilids-vid". - Organize the data structure to match .. code-block:: none ilids-vid/ i-LIDS-VID/ train-test people splits/ PRID2011 (``prid2011``) ^^^^^^^^^^^^^^^^^^^^^^^^^ - Create a directory named "prid2011" under ``$REID``. - Download the dataset from https://www.tugraz.at/institute/icg/research/team-bischof/lrs/downloads/PRID11/ and extract it under "prid2011". - Download the split created by *iLIDS-VID* from `this google drive `_ and put it under "prid2011/". Following the standard protocol, only 178 persons whose sequences are more than a threshold are used. - The data structure should end up with .. code-block:: none prid2011/ splits_prid2011.json prid_2011/ single_shot/ multi_shot/ DukeMTMC-VideoReID :math:`^\dagger` (``dukemtmcvidreid``) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Create "dukemtmc-vidreid" under ``$REID``. - Download "DukeMTMC-VideoReID" from http://vision.cs.duke.edu/DukeMTMC/ and unzip the file to "dukemtmc-vidreid/". - The data structure should look like .. code-block:: none dukemtmc-vidreid/ DukeMTMC-VideoReID/ train/ query/ gallery/ ================================================ FILE: docs/evaluation.rst ================================================ Evaluation ========== Image ReID ----------- - **Market1501**, **DukeMTMC-reID**, **CUHK03 (767/700 split)** and **MSMT17** have fixed split so keeping ``split_id=0`` is fine. - **CUHK03 (classic split)** has 20 fixed splits, so do ``split_id=0~19``. - **VIPeR** contains 632 identities each with 2 images under two camera views. Evaluation should be done for 10 random splits. Each split randomly divides 632 identities to 316 train ids (632 images) and the other 316 test ids (632 images). Note that, in each random split, there are two sub-splits, one using camera-A as query and camera-B as gallery while the other one using camera-B as query and camera-A as gallery. Thus, there are totally 20 splits generated with ``split_id`` starting from 0 to 19. Models can be trained on ``split_id=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]`` (because ``split_id=0`` and ``split_id=1`` share the same train set, and so on and so forth.). At test time, models trained on ``split_id=0`` can be directly evaluated on ``split_id=1``, models trained on ``split_id=2`` can be directly evaluated on ``split_id=3``, and so on and so forth. - **CUHK01** is similar to VIPeR in the split generation. - **GRID** , **iLIDS** and **PRID** have 10 random splits, so evaluation should be done by varying ``split_id`` from 0 to 9. - **SenseReID** has no training images and is used for evaluation only. .. note:: The ``split_id`` argument is defined in ``ImageDataManager`` and ``VideoDataManager``. Please refer to :ref:`torchreid_data`. Video ReID ----------- - **MARS** and **DukeMTMC-VideoReID** have fixed single split so using ``split_id=0`` is ok. - **iLIDS-VID** and **PRID2011** have 10 predefined splits so evaluation should be done by varying ``split_id`` from 0 to 9. ================================================ FILE: docs/index.rst ================================================ .. include:: ../README.rst .. toctree:: :hidden: user_guide datasets evaluation .. toctree:: :caption: Package Reference :hidden: pkg/data pkg/engine pkg/losses pkg/metrics pkg/models pkg/optim pkg/utils .. toctree:: :caption: Resources :hidden: AWESOME_REID.md MODEL_ZOO.md Indices and tables ================== * :ref:`genindex` * :ref:`modindex` ================================================ FILE: docs/pkg/data.rst ================================================ .. _torchreid_data: torchreid.data ============== Data Manager --------------------------- .. automodule:: torchreid.data.datamanager :members: Sampler ----------------------- .. automodule:: torchreid.data.sampler :members: Transforms --------------------------- .. automodule:: torchreid.data.transforms :members: Dataset --------------------------- .. automodule:: torchreid.data.datasets.dataset :members: .. automodule:: torchreid.data.datasets.__init__ :members: Image Datasets ------------------------------ .. automodule:: torchreid.data.datasets.image.market1501 :members: .. automodule:: torchreid.data.datasets.image.cuhk03 :members: .. automodule:: torchreid.data.datasets.image.dukemtmcreid :members: .. automodule:: torchreid.data.datasets.image.msmt17 :members: .. automodule:: torchreid.data.datasets.image.viper :members: .. automodule:: torchreid.data.datasets.image.grid :members: .. automodule:: torchreid.data.datasets.image.cuhk01 :members: .. automodule:: torchreid.data.datasets.image.ilids :members: .. automodule:: torchreid.data.datasets.image.sensereid :members: .. automodule:: torchreid.data.datasets.image.prid :members: Video Datasets ------------------------------ .. automodule:: torchreid.data.datasets.video.mars :members: .. automodule:: torchreid.data.datasets.video.ilidsvid :members: .. automodule:: torchreid.data.datasets.video.prid2011 :members: .. automodule:: torchreid.data.datasets.video.dukemtmcvidreid :members: ================================================ FILE: docs/pkg/engine.rst ================================================ .. _torchreid_engine: torchreid.engine ================== Base Engine ------------ .. autoclass:: torchreid.engine.engine.Engine :members: Image Engines ------------- .. autoclass:: torchreid.engine.image.softmax.ImageSoftmaxEngine :members: .. autoclass:: torchreid.engine.image.triplet.ImageTripletEngine :members: Video Engines ------------- .. autoclass:: torchreid.engine.video.softmax.VideoSoftmaxEngine .. autoclass:: torchreid.engine.video.triplet.VideoTripletEngine ================================================ FILE: docs/pkg/losses.rst ================================================ .. _torchreid_losses: torchreid.losses ================= Softmax -------- .. automodule:: torchreid.losses.cross_entropy_loss :members: Triplet ------- .. automodule:: torchreid.losses.hard_mine_triplet_loss :members: ================================================ FILE: docs/pkg/metrics.rst ================================================ .. _torchreid_metrics: torchreid.metrics ================= Distance --------- .. automodule:: torchreid.metrics.distance :members: Accuracy -------- .. automodule:: torchreid.metrics.accuracy :members: Rank ----- .. automodule:: torchreid.metrics.rank :members: evaluate_rank ================================================ FILE: docs/pkg/models.rst ================================================ .. _torchreid_models: torchreid.models ================= Interface --------- .. automodule:: torchreid.models.__init__ :members: ImageNet Classification Models ------------------------------- .. autoclass:: torchreid.models.resnet.ResNet .. autoclass:: torchreid.models.senet.SENet .. autoclass:: torchreid.models.densenet.DenseNet .. autoclass:: torchreid.models.inceptionresnetv2.InceptionResNetV2 .. autoclass:: torchreid.models.inceptionv4.InceptionV4 .. autoclass:: torchreid.models.xception.Xception Lightweight Models ------------------ .. autoclass:: torchreid.models.nasnet.NASNetAMobile .. autoclass:: torchreid.models.mobilenetv2.MobileNetV2 .. autoclass:: torchreid.models.shufflenet.ShuffleNet .. autoclass:: torchreid.models.squeezenet.SqueezeNet .. autoclass:: torchreid.models.shufflenetv2.ShuffleNetV2 ReID-specific Models -------------------- .. autoclass:: torchreid.models.mudeep.MuDeep .. autoclass:: torchreid.models.resnetmid.ResNetMid .. autoclass:: torchreid.models.hacnn.HACNN .. autoclass:: torchreid.models.pcb.PCB .. autoclass:: torchreid.models.mlfn.MLFN .. autoclass:: torchreid.models.osnet.OSNet .. autoclass:: torchreid.models.osnet_ain.OSNet ================================================ FILE: docs/pkg/optim.rst ================================================ .. _torchreid_optim: torchreid.optim ================= Optimizer ---------- .. automodule:: torchreid.optim.optimizer :members: build_optimizer LR Scheduler ------------- .. automodule:: torchreid.optim.lr_scheduler :members: build_lr_scheduler ================================================ FILE: docs/pkg/utils.rst ================================================ .. _torchreid_utils: torchreid.utils ================= Average Meter -------------- .. automodule:: torchreid.utils.avgmeter :members: Loggers ------- .. automodule:: torchreid.utils.loggers :members: Generic Tools --------------- .. automodule:: torchreid.utils.tools :members: ReID Tools ---------- .. automodule:: torchreid.utils.reidtools :members: Torch Tools ------------ .. automodule:: torchreid.utils.torchtools :members: .. automodule:: torchreid.utils.model_complexity :members: ================================================ FILE: docs/requirements.txt ================================================ sphinx sphinx-markdown-tables sphinx-rtd-theme sphinxcontrib-napoleon sphinxcontrib-websupport recommonmark ================================================ FILE: docs/user_guide.rst ================================================ How-to ============ .. contents:: :local: Prepare datasets ----------------- See :ref:`datasets`. Find model keys ----------------- Keys are listed under the *Public keys* section within each model class in :ref:`torchreid_models`. Show available models ---------------------- .. code-block:: python import torchreid torchreid.models.show_avai_models() Change the training sampler ----------------------------- The default ``train_sampler`` is "RandomSampler". You can give the specific sampler name as input to ``train_sampler``, e.g. ``train_sampler='RandomIdentitySampler'`` for triplet loss. Choose an optimizer/lr_scheduler ---------------------------------- Please refer to the source code of ``build_optimizer``/``build_lr_scheduler`` in :ref:`torchreid_optim` for details. Resume training ---------------- Suppose the checkpoint is saved in "log/resnet50/model.pth.tar-30", you can do .. code-block:: python start_epoch = torchreid.utils.resume_from_checkpoint( 'log/resnet50/model.pth.tar-30', model, optimizer ) engine.run( save_dir='log/resnet50', max_epoch=60, start_epoch=start_epoch ) Compute model complexity -------------------------- We provide a tool in ``torchreid.utils.model_complexity.py`` to automatically compute the model complexity, i.e. number of parameters and FLOPs. .. code-block:: python from torchreid import models, utils model = models.build_model(name='resnet50', num_classes=1000) num_params, flops = utils.compute_model_complexity(model, (1, 3, 256, 128)) # show detailed complexity for each module utils.compute_model_complexity(model, (1, 3, 256, 128), verbose=True) # count flops for all layers including ReLU and BatchNorm utils.compute_model_complexity(model, (1, 3, 256, 128), verbose=True, only_conv_linear=False) Note that (1) this function only provides an estimate of the theoretical time complexity rather than the actual running time which depends on implementations and hardware; (2) the FLOPs is only counted for layers that are used at test time. This means that redundant layers such as person ID classification layer will be ignored. The inference graph depends on how you define the computations in ``forward()``. Combine multiple datasets --------------------------- Easy. Just give whatever datasets (keys) you want to the ``sources`` argument when instantiating a data manager. For example, .. code-block:: python datamanager = torchreid.data.ImageDataManager( root='reid-data', sources=['market1501', 'dukemtmcreid', 'cuhk03', 'msmt17'], height=256, width=128, batch_size=32 ) In this example, the target datasets are Market1501, DukeMTMC-reID, CUHK03 and MSMT17 as the ``targets`` argument is not specified. Please refer to ``Engine.test()`` in :ref:`torchreid_engine` for details regarding how evaluation is performed. Do cross-dataset evaluation ----------------------------- Easy. Just give whatever datasets (keys) you want to the argument ``targets``, like .. code-block:: python datamanager = torchreid.data.ImageDataManager( root='reid-data', sources='market1501', targets='dukemtmcreid', # or targets='cuhk03' or targets=['dukemtmcreid', 'cuhk03'] height=256, width=128, batch_size=32 ) Combine train, query and gallery --------------------------------- This can be easily done by setting ``combineall=True`` when instantiating a data manager. Below is an example of using Market1501, .. code-block:: python datamanager = torchreid.data.ImageDataManager( root='reid-data', sources='market1501', height=256, width=128, batch_size=32, market1501_500k=False, combineall=True # it's me, here ) More specifically, with ``combineall=False``, you will get .. code-block:: none => Loaded Market1501 ---------------------------------------- subset | # ids | # images | # cameras ---------------------------------------- train | 751 | 12936 | 6 query | 750 | 3368 | 6 gallery | 751 | 15913 | 6 --------------------------------------- with ``combineall=True``, you will get .. code-block:: none => Loaded Market1501 ---------------------------------------- subset | # ids | # images | # cameras ---------------------------------------- train | 1501 | 29419 | 6 query | 750 | 3368 | 6 gallery | 751 | 15913 | 6 --------------------------------------- Optimize layers with different learning rates ----------------------------------------------- A common practice for fine-tuning pretrained models is to use a smaller learning rate for base layers and a large learning rate for randomly initialized layers (referred to as ``new_layers``). ``torchreid.optim.optimizer`` has implemented such feature. What you need to do is to set ``staged_lr=True`` and give the names of ``new_layers`` such as "classifier". Below is an example of setting different learning rates for base layers and new layers in ResNet50, .. code-block:: python # New layer "classifier" has a learning rate of 0.01 # The base layers have a learning rate of 0.001 optimizer = torchreid.optim.build_optimizer( model, optim='sgd', lr=0.01, staged_lr=True, new_layers='classifier', base_lr_mult=0.1 ) Please refer to :ref:`torchreid_optim` for more details. Do two-stepped transfer learning ------------------------------------- To prevent the pretrained layers from being damaged by harmful gradients back-propagated from randomly initialized layers, one can adopt the *two-stepped transfer learning strategy* presented in `Deep Transfer Learning for Person Re-identification `_. The basic idea is to pretrain the randomly initialized layers for few epochs while keeping the base layers frozen before training all layers end-to-end. This has been implemented in ``Engine.train()`` (see :ref:`torchreid_engine`). The arguments related to this feature are ``fixbase_epoch`` and ``open_layers``. Intuitively, ``fixbase_epoch`` denotes the number of epochs to keep the base layers frozen; ``open_layers`` means which layer is open for training. For example, say you want to pretrain the classification layer named "classifier" in ResNet50 for 5 epochs before training all layers, you can do .. code-block:: python engine.run( save_dir='log/resnet50', max_epoch=60, eval_freq=10, test_only=False, fixbase_epoch=5, open_layers='classifier' ) # or open_layers=['fc', 'classifier'] if there is another fc layer that # is randomly initialized, like resnet50_fc512 Note that ``fixbase_epoch`` is counted into ``max_epoch``. In the above example, the base network will be fixed for 5 epochs and then open for training for 55 epochs. Thus, if you want to freeze some layers throughout the training, what you can do is to set ``fixbase_epoch`` equal to ``max_epoch`` and put the layer names in ``open_layers`` which you want to train. Test a trained model ---------------------- You can load a trained model using :code:`torchreid.utils.load_pretrained_weights(model, weight_path)` and set ``test_only=True`` in ``engine.run()``. Fine-tune a model pre-trained on reid datasets ----------------------------------------------- Use :code:`torchreid.utils.load_pretrained_weights(model, weight_path)` to load the pre-trained weights and then fine-tune on the dataset you want. Visualize learning curves with tensorboard -------------------------------------------- The ``SummaryWriter()`` for tensorboard will be automatically initialized in ``engine.run()`` when you are training your model. Therefore, you do not need to do extra jobs. After the training is done, the ``*tf.events*`` file will be saved in ``save_dir``. Then, you just call ``tensorboard --logdir=your_save_dir`` in your terminal and visit ``http://localhost:6006/`` in a web browser. See `pytorch tensorboard `_ for further information. Visualize ranking results --------------------------- This can be achieved by setting ``visrank`` to true in ``engine.run()``. ``visrank_topk`` determines the top-k images to be visualized (Default is ``visrank_topk=10``). Note that ``visrank`` can only be used in test mode, i.e. ``test_only=True`` in ``engine.run()``. The output will be saved under ``save_dir/visrank_DATASETNAME`` where each plot contains the top-k similar gallery images given a query. An example is shown below where red and green denote incorrect and correct matches respectively. .. image:: figures/ranking_results.jpg :width: 800px :align: center Visualize activation maps -------------------------- To understand where the CNN focuses on to extract features for ReID, you can visualize the activation maps as in `OSNet `_. This is implemented in ``tools/visualize_actmap.py`` (check the code for more details). An example running command is .. code-block:: shell python tools/visualize_actmap.py \ --root $DATA/reid \ -d market1501 \ -m osnet_x1_0 \ --weights PATH_TO_PRETRAINED_WEIGHTS \ --save-dir log/visactmap_osnet_x1_0_market1501 The output will look like (from left to right: image, activation map, overlapped image) .. image:: figures/actmap.jpg :width: 300px :align: center .. note:: In order to visualize activation maps, the CNN needs to output the last convolutional feature maps at eval mode. See ``torchreid/models/osnet.py`` for example. Use your own dataset ---------------------- 1. Write your own dataset class. Below is a template for image dataset. However, it can also be applied to a video dataset class, for which you simply change ``ImageDataset`` to ``VideoDataset``. .. code-block:: python from __future__ import absolute_import from __future__ import print_function from __future__ import division import sys import os import os.path as osp from torchreid.data import ImageDataset class NewDataset(ImageDataset): dataset_dir = 'new_dataset' def __init__(self, root='', **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) # All you need to do here is to generate three lists, # which are train, query and gallery. # Each list contains tuples of (img_path, pid, camid), # where # - img_path (str): absolute path to an image. # - pid (int): person ID, e.g. 0, 1. # - camid (int): camera ID, e.g. 0, 1. # Note that # - pid and camid should be 0-based. # - query and gallery should share the same pid scope (e.g. # pid=0 in query refers to the same person as pid=0 in gallery). # - train, query and gallery share the same camid scope (e.g. # camid=0 in train refers to the same camera as camid=0 # in query/gallery). train = ... query = ... gallery = ... super(NewDataset, self).__init__(train, query, gallery, **kwargs) 2. Register your dataset. .. code-block:: python import torchreid torchreid.data.register_image_dataset('new_dataset', NewDataset) 3. Initialize a data manager with your dataset. .. code-block:: python # use your own dataset only datamanager = torchreid.data.ImageDataManager( root='reid-data', sources='new_dataset' ) # combine with other datasets datamanager = torchreid.data.ImageDataManager( root='reid-data', sources=['new_dataset', 'dukemtmcreid'] ) # cross-dataset evaluation datamanager = torchreid.data.ImageDataManager( root='reid-data', sources=['new_dataset', 'dukemtmcreid'], targets='market1501' # or targets=['market1501', 'cuhk03'] ) Design your own Engine ------------------------ A new Engine should be designed if you have your own loss function. The base Engine class ``torchreid.engine.Engine`` has implemented some generic methods which you can inherit to avoid re-writing. Please refer to the source code for more details. You are suggested to see how ``ImageSoftmaxEngine`` and ``ImageTripletEngine`` are constructed (also ``VideoSoftmaxEngine`` and ``VideoTripletEngine``). All you need to implement might be just a ``forward_backward()`` function. Use Torchreid as a feature extractor in your projects ------------------------------------------------------- We have provided a simple API for feature extraction, which accepts input of various types such as a list of image paths or numpy arrays. More details can be found in the code at ``torchreid/utils/feature_extractor.py``. Here we show a simple example of how to extract features given a list of image paths. .. code-block:: python from torchreid.utils import FeatureExtractor extractor = FeatureExtractor( model_name='osnet_x1_0', model_path='a/b/c/model.pth.tar', device='cuda' ) image_list = [ 'a/b/c/image001.jpg', 'a/b/c/image002.jpg', 'a/b/c/image003.jpg', 'a/b/c/image004.jpg', 'a/b/c/image005.jpg' ] features = extractor(image_list) print(features.shape) # output (5, 512) ================================================ FILE: linter.sh ================================================ echo "Running isort" isort -y -sp . echo "Done" echo "Running yapf" yapf -i -r -vv -e build . echo "Done" echo "Running flake8" flake8 . echo "Done" ================================================ FILE: pyproject.toml ================================================ [project] name = "torchreid" authors = [{name="Vladimir Somers"}, {name="Kaiyang Zhou"}] urls = {homepage = "https://github.com/VlSomers/bpbreid"} dynamic = ["version", "description", "license", "dependencies", "keywords", "readme", "optional-dependencies"] [tool.setuptools.dynamic] version = {attr = "torchreid.__version__"} [build-system] requires = ["setuptools", "numpy", "cython"] build-backend = "setuptools.build_meta" ================================================ FILE: requirements.txt ================================================ # pip install -r requirements.txt # base torchreid ---------------------------------------- numpy Cython h5py Pillow six scipy opencv-python matplotlib tb-nightly future yacs gdown flake8 yapf isort # added for bpbreid albumentations pandas tabulate deepdiff wandb monai torchmetrics==0.10.3 ================================================ FILE: requirements_labels.txt ================================================ openpifpaf detectron2 @ git+https://github.com/facebookresearch/detectron2.git ================================================ FILE: setup.py ================================================ import numpy as np import os.path as osp from setuptools import setup, find_packages from distutils.extension import Extension from Cython.Build import cythonize def readme(): with open('README.md') as f: content = f.read() return content def numpy_include(): try: numpy_include = np.get_include() except AttributeError: numpy_include = np.get_numpy_include() return numpy_include ext_modules = [ Extension( 'torchreid.metrics.rank_cylib.rank_cy', ['torchreid/metrics/rank_cylib/rank_cy.pyx'], include_dirs=[numpy_include()], ) ] def get_requirements(filename='requirements.txt'): here = osp.dirname(osp.realpath(__file__)) with open(osp.join(here, filename), 'r') as f: requires = [line.replace('\n', '') for line in f.readlines()] return requires setup( description='A library for deep learning person re-ID in PyTorch', license='MIT', long_description=readme(), packages=find_packages(), install_requires=get_requirements(), extras_require={"labels": get_requirements("requirements_labels.txt")}, keywords=['Person Re-Identification', 'Deep Learning', 'Computer Vision'], ext_modules=cythonize(ext_modules) ) ================================================ FILE: torchreid/__init__.py ================================================ from __future__ import print_function, absolute_import from torchreid import data, optim, utils, engine, losses, models, metrics __version__ = '1.2.4' __author__ = 'Kaiyang Zhou' __homepage__ = 'https://kaiyangzhou.github.io/' __description__ = 'Deep learning person re-identification in PyTorch' __url__ = 'https://github.com/KaiyangZhou/deep-person-reid' ================================================ FILE: torchreid/data/__init__.py ================================================ from __future__ import print_function, absolute_import from .datasets import ( Dataset, ImageDataset, VideoDataset, register_image_dataset, register_video_dataset, get_dataset_nickname ) from .datamanager import ImageDataManager, VideoDataManager ================================================ FILE: torchreid/data/data_augmentation/__init__.py ================================================ from __future__ import print_function, absolute_import from .random_occlusion import * ================================================ FILE: torchreid/data/data_augmentation/random_occlusion.py ================================================ # Source: https://github.com/isarandi/synthetic-occlusion/blob/master/augmentation.py import math import os.path import random import sys import xml.etree.ElementTree import numpy as np import matplotlib.pyplot as plt import skimage.data import cv2 import PIL.Image from albumentations import ( DualTransform, functional ) def main(): """Demo of how to use the code""" # path = 'something/something/VOCtrainval_11-May-2012/VOCdevkit/VOC2012' path = sys.argv[1] print('Loading occluders from Pascal VOC dataset...') occluders = load_occluders(pascal_voc_root_path=path) print('Found {} suitable objects'.format(len(occluders))) original_im = cv2.resize(skimage.data.astronaut(), (256, 256)) fig, axarr = plt.subplots(3, 3, figsize=(7, 7)) for ax in axarr.ravel(): occluded_im = occlude_with_objects(original_im, occluders) ax.imshow(occluded_im, interpolation="none") ax.axis('off') fig.tight_layout(h_pad=0) # plt.savefig('examples.jpg', dpi=150, bbox_inches='tight') plt.show() def load_occluders( pascal_voc_root_path, classes_filter=None, ): occluders = [] structuring_element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (8, 8)) if classes_filter is None: # classes_filter = ["person", "bicycle", "boat", "bus", "car", "motorbike", "train", "chair", "dining", "table", "plant", "sofa"] classes_filter = ["person", "bicycle", "boat", "bus", "car", "motorbike", "train"] # classes_filter = ["person"] annotation_paths = list_filepaths(os.path.join(pascal_voc_root_path, 'Annotations')) for annotation_path in annotation_paths: xml_root = xml.etree.ElementTree.parse(annotation_path).getroot() is_segmented = (xml_root.find('segmented').text != '0') if not is_segmented: continue boxes = [] for i_obj, obj in enumerate(xml_root.findall('object')): is_authorized_class = (obj.find('name').text in classes_filter) is_difficult = (obj.find('difficult').text != '0') is_truncated = (obj.find('truncated').text != '0') if is_authorized_class and not is_difficult and not is_truncated: bndbox = obj.find('bndbox') box = [int(bndbox.find(s).text) for s in ['xmin', 'ymin', 'xmax', 'ymax']] boxes.append((i_obj, box)) if not boxes: continue im_filename = xml_root.find('filename').text seg_filename = im_filename.replace('jpg', 'png') im_path = os.path.join(pascal_voc_root_path, 'JPEGImages', im_filename) seg_path = os.path.join(pascal_voc_root_path, 'SegmentationObject', seg_filename) im = np.asarray(PIL.Image.open(im_path)) labels = np.asarray(PIL.Image.open(seg_path)) for i_obj, (xmin, ymin, xmax, ymax) in boxes: object_mask = (labels[ymin:ymax, xmin:xmax] == i_obj + 1).astype(np.uint8) * 255 object_image = im[ymin:ymax, xmin:xmax] if cv2.countNonZero(object_mask) < 500: # Ignore small objects continue # Reduce the opacity of the mask along the border for smoother blending eroded = cv2.erode(object_mask, structuring_element) object_mask[eroded < object_mask] = 192 object_with_mask = np.concatenate([object_image, object_mask[..., np.newaxis]], axis=-1) # Downscale for efficiency object_with_mask = resize_by_factor(object_with_mask, 0.5) occluders.append(object_with_mask) return occluders def occlude_with_objects(im, occluders, n=1, min_overlap=0.1, max_overlap=0.6): """Returns an augmented version of `im`, containing some occluders from the Pascal VOC dataset.""" result = im.copy() width_height = np.asarray([im.shape[1], im.shape[0]]) im_area = im.shape[1] * im.shape[0] count = np.random.randint(1, n+1) for _ in range(count): occluder = random.choice(occluders) occluder_area = occluder.shape[1] * occluder.shape[0] overlap = random.uniform(min_overlap, max_overlap) scale_factor = math.sqrt(overlap * im_area / occluder_area) occluder = resize_by_factor(occluder, scale_factor) assert (occluder.shape[1] * occluder.shape[0]) / im_area == overlap center = np.random.uniform([0, 0], width_height) paste_over(im_src=occluder, im_dst=result, center=center) return result def paste_over(im_src, im_dst, center, is_mask=False): """Pastes `im_src` onto `im_dst` at a specified position, with alpha blending, in place. Locations outside the bounds of `im_dst` are handled as expected (only a part or none of `im_src` becomes visible). Args: im_src: The RGBA image to be pasted onto `im_dst`. Its size can be arbitrary. im_dst: The target image. alpha: A float (0.0-1.0) array of the same size as `im_src` controlling the alpha blending at each pixel. Large values mean more visibility for `im_src`. center: coordinates in `im_dst` where the center of `im_src` should be placed. """ width_height_src = np.asarray([im_src.shape[1], im_src.shape[0]]) width_height_dst = np.asarray([im_dst.shape[1], im_dst.shape[0]]) center = np.round(center).astype(np.int32) raw_start_dst = center - width_height_src // 2 raw_end_dst = raw_start_dst + width_height_src start_dst = np.clip(raw_start_dst, 0, width_height_dst) end_dst = np.clip(raw_end_dst, 0, width_height_dst) region_dst = im_dst[start_dst[1]:end_dst[1], start_dst[0]:end_dst[0]] start_src = start_dst - raw_start_dst end_src = width_height_src + (end_dst - raw_end_dst) region_src = im_src[start_src[1]:end_src[1], start_src[0]:end_src[0]] color_src = region_src[..., 0:3] alpha = region_src[..., 3:].astype(np.float32) / 255 if is_mask: # if this is a segmentation mask, just apply alpha erasing im_dst[start_dst[1]:end_dst[1], start_dst[0]:end_dst[0]] = (1 - alpha) * region_dst else: im_dst[start_dst[1]:end_dst[1], start_dst[0]:end_dst[0]] = ( alpha * color_src + (1 - alpha) * region_dst) im_area = im_src.shape[1] * im_src.shape[0] bbox_overlap = (color_src.shape[0] * color_src.shape[1]) / im_area pxls_overlap = np.count_nonzero(alpha) / im_area return bbox_overlap, pxls_overlap def resize_by_factor(im, factor): """Returns a copy of `im` resized by `factor`, using bilinear interp for up and area interp for downscaling. """ new_size = tuple(np.round(np.array([im.shape[1], im.shape[0]]) * factor).astype(int)) interp = cv2.INTER_LINEAR if factor > 1.0 else cv2.INTER_AREA return cv2.resize(im, new_size, fx=factor, fy=factor, interpolation=interp) def list_filepaths(dirpath): names = os.listdir(dirpath) paths = [os.path.join(dirpath, name) for name in names] return sorted(filter(os.path.isfile, paths)) class RandomOcclusion(DualTransform): def __init__(self, path, im_shape, always_apply=False, p=.5, n=1, min_overlap=0.5, max_overlap=0.8, ): super(RandomOcclusion, self).__init__(always_apply, p) print('Loading occluders from Pascal VOC dataset...') self.all_occluders = load_occluders(pascal_voc_root_path=path) self.bbox_overlaps = [] self.pxls_overlaps = [] self.count = 0 self.n = n self.min_overlap = min_overlap self.max_overlap = max_overlap self.im_shape = im_shape def check_range(self, dimension): if isinstance(dimension, float) and not 0 <= dimension < 1.0: raise ValueError( "Invalid value {}. If using floats, the value should be in the range [0.0, 1.0)".format(dimension) ) def apply(self, image, occluders=(), centers=(), **params): for occluder, center in zip(occluders, centers): bbox_overlap, pxls_overlap = paste_over(im_src=occluder, im_dst=image, center=center) self.bbox_overlaps.append(bbox_overlap) self.pxls_overlaps.append(pxls_overlap) self.count += 1 if self.count % 10000 == 0: bbox_overlaps = np.array(self.bbox_overlaps) pxls_overlaps = np.array(self.pxls_overlaps) print("RandomOcclusion #{}: bbox_overlap=[{:.2f},{:.2f},{:.2f}], pxls_overlap=[{:.2f},{:.2f},{:.2f}]" .format(self.count, bbox_overlaps.min(), bbox_overlaps.max(), bbox_overlaps.mean(), pxls_overlaps.min(), pxls_overlaps.max(), pxls_overlaps.mean() ) ) return image def apply_to_mask(self, image, occluders=(), centers=(), **params): for occluder, center in zip(occluders, centers): paste_over(im_src=occluder, im_dst=image, center=center, is_mask=True) return image def get_params_dependent_on_targets(self, params): img = params["image"] count = np.random.randint(1, self.n + 1) width_height = np.asarray([img.shape[1], img.shape[0]]) im_area = self.im_shape[1] * self.im_shape[0] occluders = [] centers = [] for _ in range(count): occluder = random.choice(self.all_occluders) occluder_area = occluder.shape[1] * occluder.shape[0] overlap = random.uniform(self.min_overlap, self.max_overlap) scale_factor = math.sqrt(overlap * im_area / occluder_area) occluder = resize_by_factor(occluder, scale_factor) # assert abs((occluder.shape[1] * occluder.shape[0]) / im_area - overlap) < 0.005 center = np.random.uniform([0, 0], width_height) occluders.append(occluder) centers.append(center) return {"occluders": occluders, "centers": centers} @property def targets_as_params(self): return ["image"] def get_transform_init_args_names(self): return ( "max_holes", "max_height", "max_width", "min_holes", "min_height", "min_width", "fill_value", "mask_fill_value", ) if __name__ == '__main__': main() ================================================ FILE: torchreid/data/datamanager.py ================================================ from __future__ import division, print_function, absolute_import import torch from torchreid.data.masks_transforms import masks_preprocess_transforms from torchreid.data.sampler import build_train_sampler from torchreid.data.datasets import init_image_dataset, init_video_dataset, get_image_dataset from torchreid.data.transforms import build_transforms class DataManager(object): r"""Base data manager. Args: sources (str or list): source dataset(s). targets (str or list, optional): target dataset(s). If not given, it equals to ``sources``. height (int, optional): target image height. Default is 256. width (int, optional): target image width. Default is 128. transforms (str or list of str, optional): transformations applied to model training. Default is 'random_flip'. norm_mean (list or None, optional): data mean. Default is None (use imagenet mean). norm_std (list or None, optional): data std. Default is None (use imagenet std). use_gpu (bool, optional): use gpu. Default is True. """ def __init__( self, config, sources=None, targets=None, height=256, width=128, transforms='random_flip', norm_mean=None, norm_std=None, use_gpu=False, use_masks=False, masks_dir='', ): self.sources = sources self.targets = targets self.height = height self.width = width self.masks_dir = masks_dir self.config = config if self.sources is None: raise ValueError('sources must not be None') if isinstance(self.sources, str): self.sources = [self.sources] if self.targets is None: self.targets = self.sources if isinstance(self.targets, str): self.targets = [self.targets] masks_config = get_image_dataset(self.sources[0]).get_masks_config(self.masks_dir) self.transform_tr, self.transform_te = build_transforms( self.height, self.width, config, transforms=transforms, norm_mean=norm_mean, norm_std=norm_std, remove_background_mask=masks_config[1] if masks_config is not None else False, masks_preprocess=config.model.bpbreid.masks.preprocess, softmax_weight=config.model.bpbreid.masks.softmax_weight, background_computation_strategy=config.model.bpbreid.masks.background_computation_strategy, mask_filtering_threshold=config.model.bpbreid.masks.mask_filtering_threshold, ) self.use_gpu = (torch.cuda.is_available() and use_gpu) @property def num_train_pids(self): """Returns the number of training person identities.""" return self._num_train_pids @property def num_train_cams(self): """Returns the number of training cameras.""" return self._num_train_cams def fetch_test_loaders(self, name): """Returns query and gallery of a test dataset, each containing tuples of (img_path(s), pid, camid). Args: name (str): dataset name. """ query_loader = self.test_dataset[name]['query'] gallery_loader = self.test_dataset[name]['gallery'] return query_loader, gallery_loader def preprocess_pil_img(self, img): """Transforms a PIL image to torch tensor for testing.""" return self.transform_te(img) class ImageDataManager(DataManager): r"""Image data manager. Args: root (str): root path to datasets. sources (str or list): source dataset(s). targets (str or list, optional): target dataset(s). If not given, it equals to ``sources``. height (int, optional): target image height. Default is 256. width (int, optional): target image width. Default is 128. transforms (str or list of str, optional): transformations applied to model training. Default is 'random_flip'. norm_mean (list or None, optional): data mean. Default is None (use imagenet mean). norm_std (list or None, optional): data std. Default is None (use imagenet std). use_gpu (bool, optional): use gpu. Default is True. split_id (int, optional): split id (*0-based*). Default is 0. combineall (bool, optional): combine train, query and gallery in a dataset for training. Default is False. load_train_targets (bool, optional): construct train-loader for target datasets. Default is False. This is useful for domain adaptation research. batch_size_train (int, optional): number of images in a training batch. Default is 32. batch_size_test (int, optional): number of images in a test batch. Default is 32. workers (int, optional): number of workers. Default is 4. num_instances (int, optional): number of instances per identity in a batch. Default is 4. train_sampler (str, optional): sampler. Default is RandomSampler. train_sampler_t (str, optional): sampler for target train loader. Default is RandomSampler. cuhk03_labeled (bool, optional): use cuhk03 labeled images. Default is False (defaul is to use detected images). cuhk03_classic_split (bool, optional): use the classic split in cuhk03. Default is False. market1501_500k (bool, optional): add 500K distractors to the gallery set in market1501. Default is False. Examples:: datamanager = torchreid.data.ImageDataManager( root='path/to/reid-data', sources='market1501', height=256, width=128, batch_size_train=32, batch_size_test=100 ) # return train loader of source data train_loader = datamanager.train_loader # return test loader of target data test_loader = datamanager.test_loader # return train loader of target data train_loader_t = datamanager.train_loader_t """ data_type = 'image' def __init__( self, config, root='', sources=None, targets=None, height=256, width=128, mask_scale=8, transforms='random_flip', norm_mean=None, norm_std=None, use_gpu=True, split_id=0, combineall=False, load_train_targets=False, batch_size_train=32, batch_size_test=32, workers=4, num_instances=4, train_sampler='RandomSampler', train_sampler_t='RandomSampler', cuhk03_labeled=False, cuhk03_classic_split=False, market1501_500k=False, use_masks=False, masks_dir=None, ): super(ImageDataManager, self).__init__( sources=sources, targets=targets, height=height, width=width, transforms=transforms, norm_mean=norm_mean, norm_std=norm_std, use_gpu=use_gpu, use_masks=use_masks, masks_dir=masks_dir, config=config ) print('=> Loading train (source) dataset') trainset = [] for name in self.sources: trainset_ = init_image_dataset( name, config=config, transform_tr=self.transform_tr, transform_te=self.transform_te, mode='train', combineall=combineall, root=root, split_id=split_id, cuhk03_labeled=cuhk03_labeled, cuhk03_classic_split=cuhk03_classic_split, market1501_500k=market1501_500k, use_masks=use_masks, masks_dir=masks_dir, load_masks=self.config.model.bpbreid.masks.preprocess in masks_preprocess_transforms, ) trainset.append(trainset_) trainset = sum(trainset) self._num_train_pids = trainset.num_train_pids self._num_train_cams = trainset.num_train_cams self.train_loader = torch.utils.data.DataLoader( trainset, sampler=build_train_sampler( trainset.train, train_sampler, batch_size=batch_size_train, num_instances=num_instances ), batch_size=batch_size_train, shuffle=False, num_workers=workers, pin_memory=self.use_gpu, drop_last=True ) self.train_loader_t = None if load_train_targets: # check if sources and targets are identical assert len(set(self.sources) & set(self.targets)) == 0, \ 'sources={} and targets={} must not have overlap'.format(self.sources, self.targets) print('=> Loading train (target) dataset') trainset_t = [] for name in self.targets: trainset_t_ = init_image_dataset( name, config=config, transform_tr=self.transform_tr, transform_te=self.transform_te, mode='train', combineall=False, # only use the training data root=root, split_id=split_id, cuhk03_labeled=cuhk03_labeled, cuhk03_classic_split=cuhk03_classic_split, market1501_500k=market1501_500k, use_masks=use_masks, masks_dir=masks_dir, load_masks=self.config.model.bpbreid.masks.preprocess in masks_preprocess_transforms, ) trainset_t.append(trainset_t_) trainset_t = sum(trainset_t) self.train_loader_t = torch.utils.data.DataLoader( trainset_t, sampler=build_train_sampler( trainset_t.train, train_sampler_t, batch_size=batch_size_train, num_instances=num_instances ), batch_size=batch_size_train, shuffle=False, num_workers=workers, pin_memory=self.use_gpu, drop_last=True ) print('=> Loading test (target) dataset') self.test_loader = { name: { 'query': None, 'gallery': None } for name in self.targets } self.test_dataset = { name: { 'query': None, 'gallery': None } for name in self.targets } for name in self.targets: # build query loader queryset = init_image_dataset( name, config=config, transform_tr=self.transform_tr, transform_te=self.transform_te, mode='query', combineall=combineall, root=root, split_id=split_id, cuhk03_labeled=cuhk03_labeled, cuhk03_classic_split=cuhk03_classic_split, market1501_500k=market1501_500k, use_masks=use_masks, masks_dir=masks_dir, load_masks=self.config.model.bpbreid.masks.preprocess in masks_preprocess_transforms, ) self.test_loader[name]['query'] = torch.utils.data.DataLoader( queryset, batch_size=batch_size_test, shuffle=False, num_workers=workers, pin_memory=self.use_gpu, drop_last=False ) # build gallery loader galleryset = init_image_dataset( name, config=config, transform_tr=self.transform_tr, transform_te=self.transform_te, mode='gallery', combineall=combineall, verbose=False, root=root, split_id=split_id, cuhk03_labeled=cuhk03_labeled, cuhk03_classic_split=cuhk03_classic_split, market1501_500k=market1501_500k, use_masks=use_masks, masks_dir=masks_dir, load_masks=self.config.model.bpbreid.masks.preprocess in masks_preprocess_transforms, ) self.test_loader[name]['gallery'] = torch.utils.data.DataLoader( galleryset, batch_size=batch_size_test, shuffle=False, num_workers=workers, pin_memory=self.use_gpu, drop_last=False ) self.test_dataset[name]['query'] = queryset.query self.test_dataset[name]['gallery'] = galleryset.gallery print('\n') print(' **************** Summary ****************') print(' source : {}'.format(self.sources)) print(' # source datasets : {}'.format(len(self.sources))) print(' # source ids : {}'.format(self.num_train_pids)) print(' # source images : {}'.format(len(trainset))) print(' # source cameras : {}'.format(self.num_train_cams)) if load_train_targets: print( ' # target images : {} (unlabeled)'.format(len(trainset_t)) ) print(' target : {}'.format(self.targets)) print(' *****************************************') print('\n') class VideoDataManager(DataManager): r"""Video data manager. Args: root (str): root path to datasets. sources (str or list): source dataset(s). targets (str or list, optional): target dataset(s). If not given, it equals to ``sources``. height (int, optional): target image height. Default is 256. width (int, optional): target image width. Default is 128. transforms (str or list of str, optional): transformations applied to model training. Default is 'random_flip'. norm_mean (list or None, optional): data mean. Default is None (use imagenet mean). norm_std (list or None, optional): data std. Default is None (use imagenet std). use_gpu (bool, optional): use gpu. Default is True. split_id (int, optional): split id (*0-based*). Default is 0. combineall (bool, optional): combine train, query and gallery in a dataset for training. Default is False. batch_size_train (int, optional): number of tracklets in a training batch. Default is 3. batch_size_test (int, optional): number of tracklets in a test batch. Default is 3. workers (int, optional): number of workers. Default is 4. num_instances (int, optional): number of instances per identity in a batch. Default is 4. train_sampler (str, optional): sampler. Default is RandomSampler. seq_len (int, optional): how many images to sample in a tracklet. Default is 15. sample_method (str, optional): how to sample images in a tracklet. Default is "evenly". Choices are ["evenly", "random", "all"]. "evenly" and "random" will sample ``seq_len`` images in a tracklet while "all" samples all images in a tracklet, where the batch size needs to be set to 1. Examples:: datamanager = torchreid.data.VideoDataManager( root='path/to/reid-data', sources='mars', height=256, width=128, batch_size_train=3, batch_size_test=3, seq_len=15, sample_method='evenly' ) # return train loader of source data train_loader = datamanager.train_loader # return test loader of target data test_loader = datamanager.test_loader .. note:: The current implementation only supports image-like training. Therefore, each image in a sampled tracklet will undergo independent transformation functions. To achieve tracklet-aware training, you need to modify the transformation functions for video reid such that each function applies the same operation to all images in a tracklet to keep consistency. """ data_type = 'video' def __init__( self, config, root='', sources=None, targets=None, height=256, width=128, transforms='random_flip', norm_mean=None, norm_std=None, use_gpu=True, split_id=0, combineall=False, batch_size_train=3, batch_size_test=3, workers=4, num_instances=4, train_sampler='RandomSampler', seq_len=15, sample_method='evenly' ): super(VideoDataManager, self).__init__( config=config, sources=sources, targets=targets, height=height, width=width, transforms=transforms, norm_mean=norm_mean, norm_std=norm_std, use_gpu=use_gpu ) print('=> Loading train (source) dataset') trainset = [] for name in self.sources: trainset_ = init_video_dataset( name, transform=self.transform_tr, mode='train', combineall=combineall, root=root, split_id=split_id, seq_len=seq_len, sample_method=sample_method ) trainset.append(trainset_) trainset = sum(trainset) self._num_train_pids = trainset.num_train_pids self._num_train_cams = trainset.num_train_cams train_sampler = build_train_sampler( trainset.train, train_sampler, batch_size=batch_size_train, num_instances=num_instances ) self.train_loader = torch.utils.data.DataLoader( trainset, sampler=train_sampler, batch_size=batch_size_train, shuffle=False, num_workers=workers, pin_memory=self.use_gpu, drop_last=True ) print('=> Loading test (target) dataset') self.test_loader = { name: { 'query': None, 'gallery': None } for name in self.targets } self.test_dataset = { name: { 'query': None, 'gallery': None } for name in self.targets } for name in self.targets: # build query loader queryset = init_video_dataset( name, transform=self.transform_te, mode='query', combineall=combineall, root=root, split_id=split_id, seq_len=seq_len, sample_method=sample_method ) self.test_loader[name]['query'] = torch.utils.data.DataLoader( queryset, batch_size=batch_size_test, shuffle=False, num_workers=workers, pin_memory=self.use_gpu, drop_last=False ) # build gallery loader galleryset = init_video_dataset( name, transform=self.transform_te, mode='gallery', combineall=combineall, verbose=False, root=root, split_id=split_id, seq_len=seq_len, sample_method=sample_method ) self.test_loader[name]['gallery'] = torch.utils.data.DataLoader( galleryset, batch_size=batch_size_test, shuffle=False, num_workers=workers, pin_memory=self.use_gpu, drop_last=False ) self.test_dataset[name]['query'] = queryset.query self.test_dataset[name]['gallery'] = galleryset.gallery print('\n') print(' **************** Summary ****************') print(' source : {}'.format(self.sources)) print(' # source datasets : {}'.format(len(self.sources))) print(' # source ids : {}'.format(self.num_train_pids)) print(' # source tracklets : {}'.format(len(trainset))) print(' # source cameras : {}'.format(self.num_train_cams)) print(' target : {}'.format(self.targets)) print(' *****************************************') print('\n') ================================================ FILE: torchreid/data/datasets/__init__.py ================================================ from __future__ import print_function, absolute_import import copy from .image import ( GRID, PRID, CUHK01, CUHK02, CUHK03, MSMT17, VIPeR, SenseReID, Market1501, DukeMTMCreID, iLIDS, OccludedDuke, OccludedReID, Partial_iLIDS, Partial_REID, PDukemtmcReid, P_ETHZ ) from .video import PRID2011, Mars, DukeMTMCVidReID, iLIDSVID from .dataset import Dataset, ImageDataset, VideoDataset __image_datasets = { 'market1501': Market1501, 'cuhk03': CUHK03, 'dukemtmcreid': DukeMTMCreID, 'msmt17': MSMT17, 'viper': VIPeR, 'grid': GRID, 'cuhk01': CUHK01, 'ilids': iLIDS, 'sensereid': SenseReID, 'prid': PRID, 'cuhk02': CUHK02, 'occluded_duke': OccludedDuke, 'occluded_reid': OccludedReID, 'partial_reid': Partial_REID, 'partial_ilids': Partial_iLIDS, 'p_ETHZ': P_ETHZ, 'p_dukemtmc_reid': PDukemtmcReid, } __datasets_nicknames = { 'market1501': 'mk', 'cuhk03': 'c03', 'dukemtmcreid': 'du', 'msmt17': 'ms', 'viper': 'vi', 'grid': 'gr', 'cuhk01': 'c01', 'ilids': 'il', 'sensereid': 'se', 'prid': 'pr', 'cuhk02': 'c02', 'occluded_duke': 'od', 'occluded_reid': 'or', 'partial_reid': 'pr', 'partial_ilids': 'pi', 'p_ETHZ': 'pz', 'p_dukemtmc_reid': 'pd', } __video_datasets = { 'mars': Mars, 'ilidsvid': iLIDSVID, 'prid2011': PRID2011, 'dukemtmcvidreid': DukeMTMCVidReID } __datasets_cache = {} def configure_dataset_class(clazz, **ext_kwargs): """ Wrapper function to provide the class with args external to torchreid """ class ClazzWrapper(clazz): def __init__(self, **kwargs): self.__name__ = clazz.__name__ super(ClazzWrapper, self).__init__(**{**kwargs, **ext_kwargs}) ClazzWrapper.__name__ = clazz.__name__ return ClazzWrapper def get_dataset_nickname(name): return __datasets_nicknames.get(name, name) def get_image_dataset(name): avai_datasets = list(__image_datasets.keys()) if name not in avai_datasets: raise ValueError( 'Invalid dataset name. Received "{}", ' 'but expected to be one of {}'.format(name, avai_datasets) ) return __image_datasets[name] def init_image_dataset(name, mode='train', **kwargs): """ Initializes an image dataset. The copy.copy() was introduced to fix Torchreid implementing multiple times the same dataset. In Datamanager, each dataset was instantiated multiple times via 'init_image_dataset': one for train, one for query and one for gallery. Each instance had its own 'data' field containing either train, query or gallery set, based on the 'mode' field passed as argument, and its own transforms, to perform training time or test time data transformation. However, instantiating the same dataset multiple times is not efficient, as it requires to load the dataset metadata from disk multiple times. Moreover, other printing (such as dataset summary) are displayed multiple times. To fix this, we copy the dataset class but not its contained objects (such as train/query/gallery) and set a new 'mode' on each copy. Thanks to that hack, the data list is created only once, and only the Dataset class is instantiated multiple times (for each 'mode'). Therefore, each Dataset uses the same data lists in the background, switching between train, query and gallery based on the 'mode' field. """ if name in __datasets_cache: print("Using cached dataset {}.".format(name)) dataset = __datasets_cache[name] else: print("Creating new dataset {} and add it to the datasets cache.".format(name)) dataset = get_image_dataset(name)(mode=mode, **kwargs) __datasets_cache[name] = dataset mode_dataset = copy.copy(dataset) mode_dataset.mode = mode return mode_dataset def init_video_dataset(name, **kwargs): """Initializes a video dataset.""" avai_datasets = list(__video_datasets.keys()) if name not in avai_datasets: raise ValueError( 'Invalid dataset name. Received "{}", ' 'but expected to be one of {}'.format(name, avai_datasets) ) return __video_datasets[name](**kwargs) def register_image_dataset(name, dataset, nickname=None): """Registers a new image dataset. Args: name (str): key corresponding to the new dataset. dataset (Dataset): the new dataset class. Examples:: import torchreid import NewDataset torchreid.data.register_image_dataset('new_dataset', NewDataset) # single dataset case datamanager = torchreid.data.ImageDataManager( root='reid-data', sources='new_dataset' ) # multiple dataset case datamanager = torchreid.data.ImageDataManager( root='reid-data', sources=['new_dataset', 'dukemtmcreid'] ) """ global __image_datasets curr_datasets = list(__image_datasets.keys()) if name in curr_datasets: raise ValueError( 'The given name already exists, please choose ' 'another name excluding {}'.format(curr_datasets) ) __image_datasets[name] = dataset __datasets_nicknames[name] = nickname if nickname is not None else name def register_video_dataset(name, dataset): """Registers a new video dataset. Args: name (str): key corresponding to the new dataset. dataset (Dataset): the new dataset class. Examples:: import torchreid import NewDataset torchreid.data.register_video_dataset('new_dataset', NewDataset) # single dataset case datamanager = torchreid.data.VideoDataManager( root='reid-data', sources='new_dataset' ) # multiple dataset case datamanager = torchreid.data.VideoDataManager( root='reid-data', sources=['new_dataset', 'ilidsvid'] ) """ global __video_datasets curr_datasets = list(__video_datasets.keys()) if name in curr_datasets: raise ValueError( 'The given name already exists, please choose ' 'another name excluding {}'.format(curr_datasets) ) __video_datasets[name] = dataset ================================================ FILE: torchreid/data/datasets/dataset.py ================================================ from __future__ import division, print_function, absolute_import import copy import os import numpy as np import os.path as osp import tarfile import zipfile import torch from torchreid.utils import read_masks, read_image, download_url, mkdir_if_missing class Dataset(object): """An abstract class representing a Dataset. This is the base class for ``ImageDataset`` and ``VideoDataset``. Args: train (list): contains tuples of (img_path(s), pid, camid). query (list): contains tuples of (img_path(s), pid, camid). gallery (list): contains tuples of (img_path(s), pid, camid). transform: transform function. mode (str): 'train', 'query' or 'gallery'. combineall (bool): combines train, query and gallery in a dataset for training. verbose (bool): show information. """ _junk_pids = [ ] # contains useless person IDs, e.g. background, false detections masks_base_dir = None eval_metric = 'default' # default to market101 def gallery_filter(self, q_pid, q_camid, q_ann, g_pids, g_camids, g_anns): """ Remove gallery samples that have the same pid and camid as the query sample, since ReID is a cross-camera person retrieval task for most datasets. However, we still keep samples from the same camera but of different identity as distractors.""" remove = (g_camids == q_camid) & (g_pids == q_pid) return remove def infer_masks_path(self, img_path): masks_path = os.path.join(self.dataset_dir, self.masks_base_dir, self.masks_dir, os.path.basename(os.path.dirname(img_path)), os.path.splitext(os.path.basename(img_path))[0] + self.masks_suffix) return masks_path def __init__( self, train, query, gallery, config=None, transform_tr=None, transform_te=None, mode='train', combineall=False, verbose=True, use_masks=False, masks_dir=None, masks_base_dir=None, load_masks=False, **kwargs ): self.train = train self.query = query self.gallery = gallery self.transform_tr = transform_tr self.transform_te = transform_te self.cfg = config self.mode = mode self.combineall = combineall self.verbose = verbose self.use_masks = use_masks self.masks_dir = masks_dir self.load_masks = load_masks if masks_base_dir is not None: self.masks_base_dir = masks_base_dir self.num_train_pids = self.get_num_pids(self.train) self.num_train_cams = self.get_num_cams(self.train) if self.combineall: self.combine_all() if self.verbose: self.show_summary() def transforms(self, mode): """Returns the transforms of a specific mode.""" if mode == 'train': return self.transform_tr elif mode == 'query': return self.transform_te elif mode == 'gallery': return self.transform_te else: raise ValueError("Invalid mode. Got {}, but expected to be " "'train', 'query' or 'gallery'".format(mode)) def data(self, mode): """Returns the data of a specific mode. Args: mode (str): 'train', 'query' or 'gallery'. Returns: list: contains tuples of (img_path(s), pid, camid). """ if mode == 'train': return self.train elif mode == 'query': return self.query elif mode == 'gallery': return self.gallery else: raise ValueError("Invalid mode. Got {}, but expected to be " "'train', 'query' or 'gallery'".format(mode)) def __getitem__(self, index): raise NotImplementedError def __len__(self): # kept for backward compatibility return self.len(self.mode) def len(self, mode): return len(self.data(mode)) def __add__(self, other): """Adds two datasets together (only the train set).""" train = copy.deepcopy(self.train) for sample in other.train: sample['pid'] += self.num_train_pids train.append(sample) ################################### # Things to do beforehand: # 1. set verbose=False to avoid unnecessary print # 2. set combineall=False because combineall would have been applied # if it was True for a specific dataset, setting it to True will # create new IDs that should have been included ################################### # FIXME find better implementation for combining datasets and masks assert self.use_masks == other.use_masks if isinstance(self, ImageDataset): return ImageDataset( train, self.query, self.gallery, transform=self.transform, mode=self.mode, combineall=False, verbose=False, use_masks=self.use_masks, masks_base_dir=self.masks_base_dir, ) else: return VideoDataset( train, self.query, self.gallery, transform=self.transform, mode=self.mode, combineall=False, verbose=False, seq_len=self.seq_len, sample_method=self.sample_method ) def __radd__(self, other): """Supports sum([dataset1, dataset2, dataset3]).""" if other == 0: return self else: return self.__add__(other) def parse_data(self, data): """Parses data list and returns the number of person IDs and the number of camera views. Args: data (list): contains tuples of (img_path(s), pid, camid) """ pids = set() cams = set() for i, sample in enumerate(data): pids.add(sample['pid']) cams.add(sample['camid']) return len(pids), len(cams) def get_num_pids(self, data): """Returns the number of training person identities.""" return self.parse_data(data)[0] def get_num_cams(self, data): """Returns the number of training cameras.""" return self.parse_data(data)[1] def show_summary(self): """Shows dataset statistics.""" pass def combine_all(self): """Combines train, query and gallery in a dataset for training.""" combined = copy.deepcopy(self.train) # relabel pids in gallery (query shares the same scope) g_pids = set() for sample in self.gallery: pid = sample['pid'] if pid in self._junk_pids: continue g_pids.add(pid) pid2label = {pid: i for i, pid in enumerate(g_pids)} def _combine_data(data): for sample in data: pid = sample['pid'] if pid in self._junk_pids: continue sample['pid'] = pid2label[pid] + self.num_train_pids combined.append(sample) _combine_data(self.query) _combine_data(self.gallery) self.train = combined self.num_train_pids = self.get_num_pids(self.train) def download_dataset(self, dataset_dir, dataset_url): """Downloads and extracts dataset. Args: dataset_dir (str): dataset directory. dataset_url (str): url to download dataset. """ if osp.exists(dataset_dir): return if dataset_url is None: raise RuntimeError( '{} dataset needs to be manually ' 'prepared, please follow the ' 'document to prepare this dataset'.format( self.__class__.__name__ ) ) print('Creating directory "{}"'.format(dataset_dir)) mkdir_if_missing(dataset_dir) fpath = osp.join(dataset_dir, osp.basename(dataset_url)) print( 'Downloading {} dataset to "{}"'.format( self.__class__.__name__, dataset_dir ) ) download_url(dataset_url, fpath) print('Extracting "{}"'.format(fpath)) try: tar = tarfile.open(fpath) tar.extractall(path=dataset_dir) tar.close() except: zip_ref = zipfile.ZipFile(fpath, 'r') zip_ref.extractall(dataset_dir) zip_ref.close() print('{} dataset is ready'.format(self.__class__.__name__)) def check_before_run(self, required_files): """Checks if required files exist before going deeper. Args: required_files (str or list): string file name(s). """ if isinstance(required_files, str): required_files = [required_files] for fpath in required_files: if not osp.exists(fpath): raise RuntimeError('"{}" is not found'.format(fpath)) def __repr__(self): num_train_pids, num_train_cams = self.parse_data(self.train) num_query_pids, num_query_cams = self.parse_data(self.query) num_gallery_pids, num_gallery_cams = self.parse_data(self.gallery) msg = ' ----------------------------------------\n' \ ' subset | # ids | # items | # cameras\n' \ ' ----------------------------------------\n' \ ' train | {:5d} | {:7d} | {:9d}\n' \ ' query | {:5d} | {:7d} | {:9d}\n' \ ' gallery | {:5d} | {:7d} | {:9d}\n' \ ' ----------------------------------------\n' \ ' items: images/tracklets for image/video dataset\n'.format( num_train_pids, len(self.train), num_train_cams, num_query_pids, len(self.query), num_query_cams, num_gallery_pids, len(self.gallery), num_gallery_cams ) return msg class ImageDataset(Dataset): """A base class representing ImageDataset. All other image datasets should subclass it. ``__getitem__`` returns an image given index. It will return ``img``, ``pid``, ``camid`` and ``img_path`` where ``img`` has shape (channel, height, width). As a result, data in each batch has shape (batch_size, channel, height, width). """ def __init__(self, train, query, gallery, **kwargs): super(ImageDataset, self).__init__(train, query, gallery, **kwargs) def __getitem__(self, index): # kept for backward compatibility return self.getitem(index, self.mode) def getitem(self, index, mode): # BPBreID can work with None masks # list all combination: source vs target, merged/joined vs not, cross domain or not, load from disk vs fixed for BoT/PBP transform vs None, # need masks when available for pixel accuracy prediction sample = self.data(mode)[index] transf_args = {"image": read_image(sample['img_path'])} if self.use_masks: if self.load_masks and 'masks_path' in sample: transf_args["mask"] = read_masks(sample['masks_path']) elif not self.load_masks: # hack for BoT and PCB masks that are generated in transform(). # FIXME BoT and PCB masks should not be generated here, but later in BPBreID model with a config transf_args["mask"] = np.ones((1, 2, 2)) else: pass result = self.transforms(mode)(**transf_args) sample.update(result) return sample def show_summary(self): num_train_pids, num_train_cams = self.parse_data(self.train) num_query_pids, num_query_cams = self.parse_data(self.query) num_gallery_pids, num_gallery_cams = self.parse_data(self.gallery) print('=> Loaded {}'.format(self.__class__.__name__)) print(' ----------------------------------------') print(' subset | # ids | # images | # cameras') print(' ----------------------------------------') print( ' train | {:5d} | {:8d} | {:9d}'.format( num_train_pids, len(self.train), num_train_cams ) ) print( ' query | {:5d} | {:8d} | {:9d}'.format( num_query_pids, len(self.query), num_query_cams ) ) print( ' gallery | {:5d} | {:8d} | {:9d}'.format( num_gallery_pids, len(self.gallery), num_gallery_cams ) ) print(' ----------------------------------------') class VideoDataset(Dataset): """A base class representing VideoDataset. All other video datasets should subclass it. ``__getitem__`` returns an image given index. It will return ``imgs``, ``pid`` and ``camid`` where ``imgs`` has shape (seq_len, channel, height, width). As a result, data in each batch has shape (batch_size, seq_len, channel, height, width). """ def __init__( self, train, query, gallery, seq_len=15, sample_method='evenly', **kwargs ): super(VideoDataset, self).__init__(train, query, gallery, **kwargs) self.seq_len = seq_len self.sample_method = sample_method if self.transform is None: raise RuntimeError('transform must not be None') def getitem(self, index, mode): img_paths, pid, camid = self.data(mode)[index] # FIXME new format num_imgs = len(img_paths) if self.sample_method == 'random': # Randomly samples seq_len images from a tracklet of length num_imgs, # if num_imgs is smaller than seq_len, then replicates images indices = np.arange(num_imgs) replace = False if num_imgs >= self.seq_len else True indices = np.random.choice( indices, size=self.seq_len, replace=replace ) # sort indices to keep temporal order (comment it to be order-agnostic) indices = np.sort(indices) elif self.sample_method == 'evenly': # Evenly samples seq_len images from a tracklet if num_imgs >= self.seq_len: num_imgs -= num_imgs % self.seq_len indices = np.arange(0, num_imgs, num_imgs / self.seq_len) else: # if num_imgs is smaller than seq_len, simply replicate the last image # until the seq_len requirement is satisfied indices = np.arange(0, num_imgs) num_pads = self.seq_len - num_imgs indices = np.concatenate( [ indices, np.ones(num_pads).astype(np.int32) * (num_imgs-1) ] ) assert len(indices) == self.seq_len elif self.sample_method == 'all': # Samples all images in a tracklet. batch_size must be set to 1 indices = np.arange(num_imgs) else: raise ValueError( 'Unknown sample method: {}'.format(self.sample_method) ) imgs = [] for index in indices: img_path = img_paths[int(index)] img = read_image(img_path) if self.transform is not None: img = self.transform(img) img = img.unsqueeze(0) # img must be torch.Tensor imgs.append(img) imgs = torch.cat(imgs, dim=0) return imgs, pid, camid def show_summary(self): num_train_pids, num_train_cams = self.parse_data(self.train) num_query_pids, num_query_cams = self.parse_data(self.query) num_gallery_pids, num_gallery_cams = self.parse_data(self.gallery) print('=> Loaded {}'.format(self.__class__.__name__)) print(' -------------------------------------------') print(' subset | # ids | # tracklets | # cameras') print(' -------------------------------------------') print( ' train | {:5d} | {:11d} | {:9d}'.format( num_train_pids, len(self.train), num_train_cams ) ) print( ' query | {:5d} | {:11d} | {:9d}'.format( num_query_pids, len(self.query), num_query_cams ) ) print( ' gallery | {:5d} | {:11d} | {:9d}'.format( num_gallery_pids, len(self.gallery), num_gallery_cams ) ) print(' -------------------------------------------') ================================================ FILE: torchreid/data/datasets/image/__init__.py ================================================ from __future__ import print_function, absolute_import from .grid import GRID from .prid import PRID from .ilids import iLIDS from .viper import VIPeR from .cuhk01 import CUHK01 from .cuhk02 import CUHK02 from .cuhk03 import CUHK03 from .msmt17 import MSMT17 from .sensereid import SenseReID from .market1501 import Market1501 from .dukemtmcreid import DukeMTMCreID from .occluded_dukemtmc import OccludedDuke from .occluded_reid import OccludedReID from .partial_reid import Partial_REID from .partial_ilids import Partial_iLIDS from .p_ETHZ import P_ETHZ from .p_dukemtmc_reid import PDukemtmcReid ================================================ FILE: torchreid/data/datasets/image/cuhk01.py ================================================ from __future__ import division, print_function, absolute_import import glob import numpy as np import os.path as osp import zipfile from torchreid.utils import read_json, write_json from ..dataset import ImageDataset class CUHK01(ImageDataset): """CUHK01. Reference: Li et al. Human Reidentification with Transferred Metric Learning. ACCV 2012. URL: ``_ Dataset statistics: - identities: 971. - images: 3884. - cameras: 4. """ dataset_dir = 'cuhk01' dataset_url = None eval_metric = 'default' def __init__(self, root='', split_id=0, **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.zip_path = osp.join(self.dataset_dir, 'CUHK01.zip') self.campus_dir = osp.join(self.dataset_dir, 'campus') self.split_path = osp.join(self.dataset_dir, 'splits.json') self.extract_file() required_files = [self.dataset_dir, self.campus_dir] self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, but expected between 0 and {}' .format(split_id, len(splits) - 1) ) split = splits[split_id] train = split['train'] query = split['query'] gallery = split['gallery'] train = [tuple(item) for item in train] query = [tuple(item) for item in query] gallery = [tuple(item) for item in gallery] super(CUHK01, self).__init__(train, query, gallery, **kwargs) def extract_file(self): if not osp.exists(self.campus_dir): print('Extracting files') zip_ref = zipfile.ZipFile(self.zip_path, 'r') zip_ref.extractall(self.dataset_dir) zip_ref.close() def prepare_split(self): """ Image name format: 0001001.png, where first four digits represent identity and last four digits represent cameras. Camera 1&2 are considered the same view and camera 3&4 are considered the same view. """ if not osp.exists(self.split_path): print('Creating 10 random splits of train ids and test ids') img_paths = sorted(glob.glob(osp.join(self.campus_dir, '*.png'))) img_list = [] pid_container = set() for img_path in img_paths: img_name = osp.basename(img_path) pid = int(img_name[:4]) - 1 camid = (int(img_name[4:7]) - 1) // 2 # result is either 0 or 1 img_list.append((img_path, pid, camid)) pid_container.add(pid) num_pids = len(pid_container) num_train_pids = num_pids // 2 splits = [] for _ in range(10): order = np.arange(num_pids) np.random.shuffle(order) train_idxs = order[:num_train_pids] train_idxs = np.sort(train_idxs) idx2label = { idx: label for label, idx in enumerate(train_idxs) } train, test_a, test_b = [], [], [] for img_path, pid, camid in img_list: if pid in train_idxs: train.append((img_path, idx2label[pid], camid)) else: if camid == 0: test_a.append((img_path, pid, camid)) else: test_b.append((img_path, pid, camid)) # use cameraA as query and cameraB as gallery split = { 'train': train, 'query': test_a, 'gallery': test_b, 'num_train_pids': num_train_pids, 'num_query_pids': num_pids - num_train_pids, 'num_gallery_pids': num_pids - num_train_pids } splits.append(split) # use cameraB as query and cameraA as gallery split = { 'train': train, 'query': test_b, 'gallery': test_a, 'num_train_pids': num_train_pids, 'num_query_pids': num_pids - num_train_pids, 'num_gallery_pids': num_pids - num_train_pids } splits.append(split) print('Totally {} splits are created'.format(len(splits))) write_json(splits, self.split_path) print('Split file saved to {}'.format(self.split_path)) ================================================ FILE: torchreid/data/datasets/image/cuhk02.py ================================================ from __future__ import division, print_function, absolute_import import glob import os.path as osp from ..dataset import ImageDataset class CUHK02(ImageDataset): """CUHK02. Reference: Li and Wang. Locally Aligned Feature Transforms across Views. CVPR 2013. URL: ``_ Dataset statistics: - 5 camera view pairs each with two cameras - 971, 306, 107, 193 and 239 identities from P1 - P5 - totally 1,816 identities - image format is png Protocol: Use P1 - P4 for training and P5 for evaluation. """ dataset_dir = 'cuhk02' cam_pairs = ['P1', 'P2', 'P3', 'P4', 'P5'] test_cam_pair = 'P5' eval_metric = 'default' def __init__(self, root='', **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir, 'Dataset') required_files = [self.dataset_dir] self.check_before_run(required_files) train, query, gallery = self.get_data_list() super(CUHK02, self).__init__(train, query, gallery, **kwargs) def get_data_list(self): num_train_pids, camid = 0, 0 train, query, gallery = [], [], [] for cam_pair in self.cam_pairs: cam_pair_dir = osp.join(self.dataset_dir, cam_pair) cam1_dir = osp.join(cam_pair_dir, 'cam1') cam2_dir = osp.join(cam_pair_dir, 'cam2') impaths1 = glob.glob(osp.join(cam1_dir, '*.png')) impaths2 = glob.glob(osp.join(cam2_dir, '*.png')) if cam_pair == self.test_cam_pair: # add images to query for impath in impaths1: pid = osp.basename(impath).split('_')[0] pid = int(pid) query.append((impath, pid, camid)) camid += 1 # add images to gallery for impath in impaths2: pid = osp.basename(impath).split('_')[0] pid = int(pid) gallery.append((impath, pid, camid)) camid += 1 else: pids1 = [ osp.basename(impath).split('_')[0] for impath in impaths1 ] pids2 = [ osp.basename(impath).split('_')[0] for impath in impaths2 ] pids = set(pids1 + pids2) pid2label = { pid: label + num_train_pids for label, pid in enumerate(pids) } # add images to train from cam1 for impath in impaths1: pid = osp.basename(impath).split('_')[0] pid = pid2label[pid] train.append((impath, pid, camid)) camid += 1 # add images to train from cam2 for impath in impaths2: pid = osp.basename(impath).split('_')[0] pid = pid2label[pid] train.append((impath, pid, camid)) camid += 1 num_train_pids += len(pids) return train, query, gallery ================================================ FILE: torchreid/data/datasets/image/cuhk03.py ================================================ from __future__ import division, print_function, absolute_import import os.path as osp from torchreid.utils import read_json, write_json, mkdir_if_missing from ..dataset import ImageDataset class CUHK03(ImageDataset): """CUHK03. Reference: Li et al. DeepReID: Deep Filter Pairing Neural Network for Person Re-identification. CVPR 2014. URL: ``_ Dataset statistics: - identities: 1360. - images: 13164. - cameras: 6. - splits: 20 (classic). """ dataset_dir = 'cuhk03' dataset_url = None eval_metric = 'default' def __init__( self, root='', split_id=0, cuhk03_labeled=False, cuhk03_classic_split=False, **kwargs ): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.data_dir = osp.join(self.dataset_dir, 'cuhk03_release') self.raw_mat_path = osp.join(self.data_dir, 'cuhk-03.mat') self.imgs_detected_dir = osp.join(self.dataset_dir, 'images_detected') self.imgs_labeled_dir = osp.join(self.dataset_dir, 'images_labeled') self.split_classic_det_json_path = osp.join( self.dataset_dir, 'splits_classic_detected.json' ) self.split_classic_lab_json_path = osp.join( self.dataset_dir, 'splits_classic_labeled.json' ) self.split_new_det_json_path = osp.join( self.dataset_dir, 'splits_new_detected.json' ) self.split_new_lab_json_path = osp.join( self.dataset_dir, 'splits_new_labeled.json' ) self.split_new_det_mat_path = osp.join( self.dataset_dir, 'cuhk03_new_protocol_config_detected.mat' ) self.split_new_lab_mat_path = osp.join( self.dataset_dir, 'cuhk03_new_protocol_config_labeled.mat' ) required_files = [ self.dataset_dir, self.data_dir, self.raw_mat_path, self.split_new_det_mat_path, self.split_new_lab_mat_path ] self.check_before_run(required_files) self.preprocess_split() if cuhk03_labeled: split_path = self.split_classic_lab_json_path if cuhk03_classic_split else self.split_new_lab_json_path else: split_path = self.split_classic_det_json_path if cuhk03_classic_split else self.split_new_det_json_path splits = read_json(split_path) assert split_id < len( splits ), 'Condition split_id ({}) < len(splits) ({}) is false'.format( split_id, len(splits) ) split = splits[split_id] train = split['train'] query = split['query'] gallery = split['gallery'] super(CUHK03, self).__init__(train, query, gallery, **kwargs) def preprocess_split(self): # This function is a bit complex and ugly, what it does is # 1. extract data from cuhk-03.mat and save as png images # 2. create 20 classic splits (Li et al. CVPR'14) # 3. create new split (Zhong et al. CVPR'17) if osp.exists(self.imgs_labeled_dir) \ and osp.exists(self.imgs_detected_dir) \ and osp.exists(self.split_classic_det_json_path) \ and osp.exists(self.split_classic_lab_json_path) \ and osp.exists(self.split_new_det_json_path) \ and osp.exists(self.split_new_lab_json_path): return import h5py from scipy.misc import imsave from scipy.io import loadmat mkdir_if_missing(self.imgs_detected_dir) mkdir_if_missing(self.imgs_labeled_dir) print( 'Extract image data from "{}" and save as png'.format( self.raw_mat_path ) ) mat = h5py.File(self.raw_mat_path, 'r') def _deref(ref): return mat[ref][:].T def _process_images(img_refs, campid, pid, save_dir): img_paths = [] # Note: some persons only have images for one view for imgid, img_ref in enumerate(img_refs): img = _deref(img_ref) if img.size == 0 or img.ndim < 3: continue # skip empty cell # images are saved with the following format, index-1 (ensure uniqueness) # campid: index of camera pair (1-5) # pid: index of person in 'campid'-th camera pair # viewid: index of view, {1, 2} # imgid: index of image, (1-10) viewid = 1 if imgid < 5 else 2 img_name = '{:01d}_{:03d}_{:01d}_{:02d}.png'.format( campid + 1, pid + 1, viewid, imgid + 1 ) img_path = osp.join(save_dir, img_name) if not osp.isfile(img_path): imsave(img_path, img) img_paths.append(img_path) return img_paths def _extract_img(image_type): print('Processing {} images ...'.format(image_type)) meta_data = [] imgs_dir = self.imgs_detected_dir if image_type == 'detected' else self.imgs_labeled_dir for campid, camp_ref in enumerate(mat[image_type][0]): camp = _deref(camp_ref) num_pids = camp.shape[0] for pid in range(num_pids): img_paths = _process_images( camp[pid, :], campid, pid, imgs_dir ) assert len(img_paths) > 0, \ 'campid{}-pid{} has no images'.format(campid, pid) meta_data.append((campid + 1, pid + 1, img_paths)) print( '- done camera pair {} with {} identities'.format( campid + 1, num_pids ) ) return meta_data meta_detected = _extract_img('detected') meta_labeled = _extract_img('labeled') def _extract_classic_split(meta_data, test_split): train, test = [], [] num_train_pids, num_test_pids = 0, 0 num_train_imgs, num_test_imgs = 0, 0 for i, (campid, pid, img_paths) in enumerate(meta_data): if [campid, pid] in test_split: for img_path in img_paths: camid = int( osp.basename(img_path).split('_')[2] ) - 1 # make it 0-based test.append((img_path, num_test_pids, camid)) num_test_pids += 1 num_test_imgs += len(img_paths) else: for img_path in img_paths: camid = int( osp.basename(img_path).split('_')[2] ) - 1 # make it 0-based train.append((img_path, num_train_pids, camid)) num_train_pids += 1 num_train_imgs += len(img_paths) return train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs print('Creating classic splits (# = 20) ...') splits_classic_det, splits_classic_lab = [], [] for split_ref in mat['testsets'][0]: test_split = _deref(split_ref).tolist() # create split for detected images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_detected, test_split) splits_classic_det.append( { 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs } ) # create split for labeled images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_labeled, test_split) splits_classic_lab.append( { 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs } ) write_json(splits_classic_det, self.split_classic_det_json_path) write_json(splits_classic_lab, self.split_classic_lab_json_path) def _extract_set(filelist, pids, pid2label, idxs, img_dir, relabel): tmp_set = [] unique_pids = set() for idx in idxs: img_name = filelist[idx][0] camid = int(img_name.split('_')[2]) - 1 # make it 0-based pid = pids[idx] if relabel: pid = pid2label[pid] img_path = osp.join(img_dir, img_name) tmp_set.append((img_path, int(pid), camid)) unique_pids.add(pid) return tmp_set, len(unique_pids), len(idxs) def _extract_new_split(split_dict, img_dir): train_idxs = split_dict['train_idx'].flatten() - 1 # index-0 pids = split_dict['labels'].flatten() train_pids = set(pids[train_idxs]) pid2label = {pid: label for label, pid in enumerate(train_pids)} query_idxs = split_dict['query_idx'].flatten() - 1 gallery_idxs = split_dict['gallery_idx'].flatten() - 1 filelist = split_dict['filelist'].flatten() train_info = _extract_set( filelist, pids, pid2label, train_idxs, img_dir, relabel=True ) query_info = _extract_set( filelist, pids, pid2label, query_idxs, img_dir, relabel=False ) gallery_info = _extract_set( filelist, pids, pid2label, gallery_idxs, img_dir, relabel=False ) return train_info, query_info, gallery_info print('Creating new split for detected images (767/700) ...') train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_det_mat_path), self.imgs_detected_dir ) split = [ { 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2] } ] write_json(split, self.split_new_det_json_path) print('Creating new split for labeled images (767/700) ...') train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_lab_mat_path), self.imgs_labeled_dir ) split = [ { 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2] } ] write_json(split, self.split_new_lab_json_path) ================================================ FILE: torchreid/data/datasets/image/dukemtmcreid.py ================================================ from __future__ import division, print_function, absolute_import import re import glob import os.path as osp from ..dataset import ImageDataset class DukeMTMCreID(ImageDataset): """DukeMTMC-reID. Reference: - Ristani et al. Performance Measures and a Data Set for Multi-Target, Multi-Camera Tracking. ECCVW 2016. - Zheng et al. Unlabeled Samples Generated by GAN Improve the Person Re-identification Baseline in vitro. ICCV 2017. URL: ``_ Dataset statistics: - identities: 1404 (train + query). - images:16522 (train) + 2228 (query) + 17661 (gallery). - cameras: 8. """ dataset_dir = 'DukeMTMC-reID' dataset_url = 'http://vision.cs.duke.edu/DukeMTMC/data/misc/DukeMTMC-reID.zip' masks_base_dir = 'masks' masks_dirs = { # dir_name: (parts_num, masks_stack_size, contains_background_mask) 'pifpaf': (36, False, '.jpg.confidence_fields.npy'), 'pifpaf_maskrcnn_filtering': (36, False, '.npy'), } @staticmethod def get_masks_config(masks_dir): if masks_dir not in DukeMTMCreID.masks_dirs: return None else: return DukeMTMCreID.masks_dirs[masks_dir] def __init__(self, root='', masks_dir=None, **kwargs): self.masks_dir = masks_dir if self.masks_dir in self.masks_dirs: self.masks_parts_numbers, self.has_background, self.masks_suffix = self.masks_dirs[self.masks_dir] else: self.masks_parts_numbers, self.has_background, self.masks_suffix = None, None, None self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.train_dir = osp.join(self.dataset_dir, 'bounding_box_train') self.query_dir = osp.join(self.dataset_dir, 'query') self.gallery_dir = osp.join(self.dataset_dir, 'bounding_box_test') required_files = [ self.dataset_dir, self.train_dir, self.query_dir, self.gallery_dir ] self.check_before_run(required_files) train = self.process_dir(self.train_dir, relabel=True) query = self.process_dir(self.query_dir, relabel=False) gallery = self.process_dir(self.gallery_dir, relabel=False) super(DukeMTMCreID, self).__init__(train, query, gallery, **kwargs) def process_dir(self, dir_path, relabel=False): img_paths = glob.glob(osp.join(dir_path, '*.jpg')) pattern = re.compile(r'([-\d]+)_c(\d)') pid_container = set() for img_path in img_paths: pid, _ = map(int, pattern.search(img_path).groups()) pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} data = [] for img_path in img_paths: pid, camid = map(int, pattern.search(img_path).groups()) assert 1 <= camid <= 8 camid -= 1 # index starts from 0 if relabel: pid = pid2label[pid] masks_path = self.infer_masks_path(img_path) data.append({'img_path': img_path, 'pid': pid, 'masks_path': masks_path, 'camid': camid}) return data ================================================ FILE: torchreid/data/datasets/image/grid.py ================================================ from __future__ import division, print_function, absolute_import import glob import os.path as osp from scipy.io import loadmat from torchreid.utils import read_json, write_json from ..dataset import ImageDataset class GRID(ImageDataset): """GRID. Reference: Loy et al. Multi-camera activity correlation analysis. CVPR 2009. URL: ``_ Dataset statistics: - identities: 250. - images: 1275. - cameras: 8. """ dataset_dir = 'grid' dataset_url = 'http://personal.ie.cuhk.edu.hk/~ccloy/files/datasets/underground_reid.zip' def __init__(self, root='', split_id=0, **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.probe_path = osp.join( self.dataset_dir, 'underground_reid', 'probe' ) self.gallery_path = osp.join( self.dataset_dir, 'underground_reid', 'gallery' ) self.split_mat_path = osp.join( self.dataset_dir, 'underground_reid', 'features_and_partitions.mat' ) self.split_path = osp.join(self.dataset_dir, 'splits.json') required_files = [ self.dataset_dir, self.probe_path, self.gallery_path, self.split_mat_path ] self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, ' 'but expected between 0 and {}'.format( split_id, len(splits) - 1 ) ) split = splits[split_id] train = split['train'] query = split['query'] gallery = split['gallery'] train = [tuple(item) for item in train] query = [tuple(item) for item in query] gallery = [tuple(item) for item in gallery] super(GRID, self).__init__(train, query, gallery, **kwargs) def prepare_split(self): if not osp.exists(self.split_path): print('Creating 10 random splits') split_mat = loadmat(self.split_mat_path) trainIdxAll = split_mat['trainIdxAll'][0] # length = 10 probe_img_paths = sorted( glob.glob(osp.join(self.probe_path, '*.jpeg')) ) gallery_img_paths = sorted( glob.glob(osp.join(self.gallery_path, '*.jpeg')) ) splits = [] for split_idx in range(10): train_idxs = trainIdxAll[split_idx][0][0][2][0].tolist() assert len(train_idxs) == 125 idx2label = { idx: label for label, idx in enumerate(train_idxs) } train, query, gallery = [], [], [] # processing probe folder for img_path in probe_img_paths: img_name = osp.basename(img_path) img_idx = int(img_name.split('_')[0]) camid = int( img_name.split('_')[1] ) - 1 # index starts from 0 if img_idx in train_idxs: train.append((img_path, idx2label[img_idx], camid)) else: query.append((img_path, img_idx, camid)) # process gallery folder for img_path in gallery_img_paths: img_name = osp.basename(img_path) img_idx = int(img_name.split('_')[0]) camid = int( img_name.split('_')[1] ) - 1 # index starts from 0 if img_idx in train_idxs: train.append((img_path, idx2label[img_idx], camid)) else: gallery.append((img_path, img_idx, camid)) split = { 'train': train, 'query': query, 'gallery': gallery, 'num_train_pids': 125, 'num_query_pids': 125, 'num_gallery_pids': 900 } splits.append(split) print('Totally {} splits are created'.format(len(splits))) write_json(splits, self.split_path) print('Split file saved to {}'.format(self.split_path)) ================================================ FILE: torchreid/data/datasets/image/ilids.py ================================================ from __future__ import division, print_function, absolute_import import copy import glob import random import os.path as osp from collections import defaultdict from torchreid.utils import read_json, write_json from ..dataset import ImageDataset class iLIDS(ImageDataset): """QMUL-iLIDS. Reference: Zheng et al. Associating Groups of People. BMVC 2009. Dataset statistics: - identities: 119. - images: 476. - cameras: 8 (not explicitly provided). """ dataset_dir = 'ilids' dataset_url = 'http://www.eecs.qmul.ac.uk/~jason/data/i-LIDS_Pedestrian.tgz' def __init__(self, root='', split_id=0, **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.data_dir = osp.join(self.dataset_dir, 'i-LIDS_Pedestrian/Persons') self.split_path = osp.join(self.dataset_dir, 'splits.json') required_files = [self.dataset_dir, self.data_dir] self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, but ' 'expected between 0 and {}'.format(split_id, len(splits) - 1) ) split = splits[split_id] train, query, gallery = self.process_split(split) super(iLIDS, self).__init__(train, query, gallery, **kwargs) def prepare_split(self): if not osp.exists(self.split_path): print('Creating splits ...') paths = glob.glob(osp.join(self.data_dir, '*.jpg')) img_names = [osp.basename(path) for path in paths] num_imgs = len(img_names) assert num_imgs == 476, 'There should be 476 images, but ' \ 'got {}, please check the data'.format(num_imgs) # store image names # image naming format: # the first four digits denote the person ID # the last four digits denote the sequence index pid_dict = defaultdict(list) for img_name in img_names: pid = int(img_name[:4]) pid_dict[pid].append(img_name) pids = list(pid_dict.keys()) num_pids = len(pids) assert num_pids == 119, 'There should be 119 identities, ' \ 'but got {}, please check the data'.format(num_pids) num_train_pids = int(num_pids * 0.5) splits = [] for _ in range(10): # randomly choose num_train_pids train IDs and the rest for test IDs pids_copy = copy.deepcopy(pids) random.shuffle(pids_copy) train_pids = pids_copy[:num_train_pids] test_pids = pids_copy[num_train_pids:] train = [] query = [] gallery = [] # for train IDs, all images are used in the train set. for pid in train_pids: img_names = pid_dict[pid] train.extend(img_names) # for each test ID, randomly choose two images, one for # query and the other one for gallery. for pid in test_pids: img_names = pid_dict[pid] samples = random.sample(img_names, 2) query.append(samples[0]) gallery.append(samples[1]) split = {'train': train, 'query': query, 'gallery': gallery} splits.append(split) print('Totally {} splits are created'.format(len(splits))) write_json(splits, self.split_path) print('Split file is saved to {}'.format(self.split_path)) def get_pid2label(self, img_names): pid_container = set() for img_name in img_names: pid = int(img_name[:4]) pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} return pid2label def parse_img_names(self, img_names, pid2label=None): data = [] for img_name in img_names: pid = int(img_name[:4]) if pid2label is not None: pid = pid2label[pid] camid = int(img_name[4:7]) - 1 # 0-based img_path = osp.join(self.data_dir, img_name) data.append({'img_path': img_path, 'pid': pid, 'camid': camid}) return data def process_split(self, split): train_pid2label = self.get_pid2label(split['train']) train = self.parse_img_names(split['train'], train_pid2label) query = self.parse_img_names(split['query']) gallery = self.parse_img_names(split['gallery']) return train, query, gallery ================================================ FILE: torchreid/data/datasets/image/market1501.py ================================================ from __future__ import division, print_function, absolute_import import re import glob import os.path as osp import warnings from ..dataset import ImageDataset class Market1501(ImageDataset): """Market1501. Reference: Zheng et al. Scalable Person Re-identification: A Benchmark. ICCV 2015. URL: ``_ Dataset statistics: - identities: 1501 (+1 for background). - images: 12936 (train) + 3368 (query) + 15913 (gallery). """ _junk_pids = [0, -1] dataset_dir = 'Market-1501-v15.09.15' masks_base_dir = 'masks' dataset_url = 'http://188.138.127.15:81/Datasets/Market-1501-v15.09.15.zip' masks_dirs = { # dir_name: (parts_num, masks_stack_size, contains_background_mask) 'pifpaf': (36, False, '.jpg.confidence_fields.npy'), 'pifpaf_maskrcnn_filtering': (36, False, '.npy'), } @staticmethod def get_masks_config(masks_dir): if masks_dir not in Market1501.masks_dirs: return None else: return Market1501.masks_dirs[masks_dir] def __init__(self, root='', market1501_500k=False, masks_dir=None, **kwargs): self.masks_dir = masks_dir if self.masks_dir in self.masks_dirs: self.masks_parts_numbers, self.has_background, self.masks_suffix = self.masks_dirs[self.masks_dir] else: self.masks_parts_numbers, self.has_background, self.masks_suffix = None, None, None self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.masks_dir = masks_dir # allow alternative directory structure if not osp.isdir(self.dataset_dir): warnings.warn( 'The current data structure is deprecated. Please ' 'put data folders such as "bounding_box_train" under ' '"Market-1501-v15.09.15".' ) self.train_dir = osp.join(self.dataset_dir, 'bounding_box_train') self.query_dir = osp.join(self.dataset_dir, 'query') self.gallery_dir = osp.join(self.dataset_dir, 'bounding_box_test') self.extra_gallery_dir = osp.join(self.dataset_dir, 'images') self.market1501_500k = market1501_500k required_files = [ self.dataset_dir, self.train_dir, self.query_dir, self.gallery_dir ] if self.market1501_500k: required_files.append(self.extra_gallery_dir) self.check_before_run(required_files) train = self.process_dir(self.train_dir, relabel=True) query = self.process_dir(self.query_dir, relabel=False) gallery = self.process_dir(self.gallery_dir, relabel=False) if self.market1501_500k: gallery += self.process_dir(self.extra_gallery_dir, relabel=False) super(Market1501, self).__init__(train, query, gallery, **kwargs) def process_dir(self, dir_path, relabel=False): img_paths = glob.glob(osp.join(dir_path, '*.jpg')) pattern = re.compile(r'([-\d]+)_c(\d)') pid_container = set() for img_path in img_paths: pid, _ = map(int, pattern.search(img_path).groups()) if pid == -1: continue # junk images are just ignored pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} data = [] for img_path in img_paths: pid, camid = map(int, pattern.search(img_path).groups()) if pid == -1: continue # junk images are just ignored assert 0 <= pid <= 1501 # pid == 0 means background assert 1 <= camid <= 6 camid -= 1 # index starts from 0 if relabel: pid = pid2label[pid] masks_path = self.infer_masks_path(img_path) data.append({'img_path': img_path, 'pid': pid, 'masks_path': masks_path, 'camid': camid}) return data ================================================ FILE: torchreid/data/datasets/image/msmt17.py ================================================ from __future__ import division, print_function, absolute_import import os.path as osp from ..dataset import ImageDataset # Log # 22.01.2019 # - add v2 # - v1 and v2 differ in dir names # - note that faces in v2 are blurred TRAIN_DIR_KEY = 'train_dir' TEST_DIR_KEY = 'test_dir' VERSION_DICT = { 'MSMT17_V1': { TRAIN_DIR_KEY: 'train', TEST_DIR_KEY: 'test', }, 'MSMT17_V2': { TRAIN_DIR_KEY: 'mask_train_v2', TEST_DIR_KEY: 'mask_test_v2', } } class MSMT17(ImageDataset): """MSMT17. Reference: Wei et al. Person Transfer GAN to Bridge Domain Gap for Person Re-Identification. CVPR 2018. URL: ``_ Dataset statistics: - identities: 4101. - images: 32621 (train) + 11659 (query) + 82161 (gallery). - cameras: 15. """ dataset_dir = 'msmt17' dataset_url = None masks_dirs = { # dir_name: (masks_stack_size, contains_background_mask) } @staticmethod def get_masks_config(masks_dir): if masks_dir not in MSMT17.masks_dirs: return None else: return MSMT17.masks_dirs[masks_dir] def __init__(self, root='', masks_dir=None, **kwargs): self.masks_dir = masks_dir if self.masks_dir in self.masks_dirs: self.masks_parts_numbers, self.has_background, self.masks_suffix = self.masks_dirs[self.masks_dir] else: self.masks_parts_numbers, self.has_background, self.masks_suffix = None, None, None self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) has_main_dir = False for main_dir in VERSION_DICT: if osp.exists(osp.join(self.dataset_dir, main_dir)): train_dir = VERSION_DICT[main_dir][TRAIN_DIR_KEY] test_dir = VERSION_DICT[main_dir][TEST_DIR_KEY] has_main_dir = True break assert has_main_dir, 'Dataset folder not found' self.train_dir = osp.join(self.dataset_dir, main_dir, train_dir) self.test_dir = osp.join(self.dataset_dir, main_dir, test_dir) self.list_train_path = osp.join( self.dataset_dir, main_dir, 'list_train.txt' ) self.list_val_path = osp.join( self.dataset_dir, main_dir, 'list_val.txt' ) self.list_query_path = osp.join( self.dataset_dir, main_dir, 'list_query.txt' ) self.list_gallery_path = osp.join( self.dataset_dir, main_dir, 'list_gallery.txt' ) required_files = [self.dataset_dir, self.train_dir, self.test_dir] self.check_before_run(required_files) train = self.process_dir(self.train_dir, self.list_train_path) val = self.process_dir(self.train_dir, self.list_val_path) query = self.process_dir(self.test_dir, self.list_query_path) gallery = self.process_dir(self.test_dir, self.list_gallery_path) # Note: to fairly compare with published methods on the conventional ReID setting, # do not add val images to the training set. if 'combineall' in kwargs and kwargs['combineall']: train += val super(MSMT17, self).__init__(train, query, gallery, **kwargs) def process_dir(self, dir_path, list_path): with open(list_path, 'r') as txt: lines = txt.readlines() data = [] for img_idx, img_info in enumerate(lines): img_path, pid = img_info.split(' ') pid = int(pid) # no need to relabel camid = int(img_path.split('_')[2]) - 1 # index starts from 0 img_path = osp.join(dir_path, img_path) masks_path = self.infer_masks_path(img_path) data.append({'img_path': img_path, 'pid': pid, 'masks_path': masks_path, 'camid': camid}) return data ================================================ FILE: torchreid/data/datasets/image/occluded_dukemtmc.py ================================================ from __future__ import absolute_import from __future__ import print_function from __future__ import division import os.path as osp import glob import re from ..dataset import ImageDataset # Sources : # https://github.com/hh23333/PVPM # https://github.com/lightas/Occluded-DukeMTMC-Dataset # Miao, J., Wu, Y., Liu, P., DIng, Y., & Yang, Y. (2019). "Pose-guided feature alignment for occluded person re-identification". ICCV 2019 class OccludedDuke(ImageDataset): dataset_dir = 'Occluded_Duke' masks_base_dir = 'masks' masks_dirs = { # dir_name: (parts_num, masks_stack_size, contains_background_mask) 'pifpaf': (36, False, '.jpg.confidence_fields.npy'), 'pifpaf_maskrcnn_filtering': (36, False, '.jpg.confidence_fields.npy'), 'isp_6_parts': (5, True, '.jpg.confidence_fields.npy', ["p{}".format(p) for p in range(1, 5+1)]) } @staticmethod def get_masks_config(masks_dir): if masks_dir not in OccludedDuke.masks_dirs: return None else: return OccludedDuke.masks_dirs[masks_dir] def __init__(self, root='', masks_dir=None, **kwargs): self.masks_dir = masks_dir if self.masks_dir in self.masks_dirs: self.masks_parts_numbers, self.has_background, self.masks_suffix = self.masks_dirs[self.masks_dir] else: self.masks_parts_numbers, self.has_background, self.masks_suffix = None, None, None self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.train_dir = osp.join(self.dataset_dir, 'bounding_box_train') self.query_dir = osp.join(self.dataset_dir, 'query') self.gallery_dir = osp.join(self.dataset_dir, 'bounding_box_test') required_files = [ self.dataset_dir, self.train_dir, self.query_dir, self.gallery_dir ] self.check_before_run(required_files) train = self.process_dir(self.train_dir, relabel=True) query = self.process_dir(self.query_dir, relabel=False) gallery = self.process_dir(self.gallery_dir, relabel=False) super(OccludedDuke, self).__init__(train, query, gallery, **kwargs) def process_dir(self, dir_path, relabel=False): img_paths = glob.glob(osp.join(dir_path, '*.jpg')) pattern = re.compile(r'([-\d]+)_c(\d)') pid_container = set() for img_path in img_paths: pid, _ = map(int, pattern.search(img_path).groups()) pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} data = [] for img_path in img_paths: pid, camid = map(int, pattern.search(img_path).groups()) assert 1 <= camid <= 8 camid -= 1 # index starts from 0 if relabel: pid = pid2label[pid] masks_path = self.infer_masks_path(img_path) data.append({'img_path': img_path, 'pid': pid, 'masks_path': masks_path, 'camid': camid}) return data ================================================ FILE: torchreid/data/datasets/image/occluded_reid.py ================================================ from __future__ import absolute_import from __future__ import print_function from __future__ import division import os import os.path as osp import glob import warnings from ..dataset import ImageDataset # Sources : # https://github.com/hh23333/PVPM # Zhuo, J., Chen, Z., Lai, J., & Wang, G. (2018). Occluded Person Re-identification. class OccludedReID(ImageDataset): dataset_dir = 'Occluded_REID' masks_base_dir = 'masks' masks_dirs = { # dir_name: (parts_num, masks_stack_size, contains_background_mask) 'pifpaf': (36, False, '.tif.confidence_fields.npy'), 'pifpaf_maskrcnn_filtering': (36, False, '.npy'), } @staticmethod def get_masks_config(masks_dir): if masks_dir not in OccludedReID.masks_dirs: return None else: return OccludedReID.masks_dirs[masks_dir] def infer_masks_path(self, img_path): masks_path = os.path.join(self.dataset_dir, self.masks_base_dir, self.masks_dir, os.path.basename(os.path.dirname(os.path.dirname(img_path))), os.path.splitext(os.path.basename(img_path))[0] + self.masks_suffix) return masks_path def __init__(self, root='', masks_dir=None, **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.masks_dir = masks_dir if self.masks_dir in self.masks_dirs: self.masks_parts_numbers, self.has_background, self.masks_suffix = self.masks_dirs[self.masks_dir] else: self.masks_parts_numbers, self.has_background, self.masks_suffix = None, None, None # allow alternative directory structure if not osp.isdir(self.dataset_dir): warnings.warn( 'The current data structure is deprecated. Please ' 'put data folders such as "bounding_box_train" under ' '"Market-1501-v15.09.15".' ) self.query_dir = osp.join(self.dataset_dir, 'occluded_body_images') self.gallery_dir = osp.join(self.dataset_dir, 'whole_body_images') train = [] query = self.process_dir(self.query_dir, relabel=False) gallery = self.process_dir(self.gallery_dir, relabel=False, is_query=False) super(OccludedReID, self).__init__(train, query, gallery, **kwargs) def process_dir(self, dir_path, relabel=False, is_query=True): img_paths = glob.glob(osp.join(dir_path, '*', '*.tif')) if is_query: camid = 0 else: camid = 1 pid_container = set() for img_path in img_paths: img_name = img_path.split('/')[-1] pid = int(img_name.split('_')[0]) pid_container.add(pid) pid2label = {pid:label for label, pid in enumerate(pid_container)} data = [] for img_path in img_paths: img_name = img_path.split('/')[-1] pid = int(img_name.split('_')[0]) if relabel: pid = pid2label[pid] masks_path = self.infer_masks_path(img_path) data.append({'img_path': img_path, 'pid': pid, 'masks_path': masks_path, 'camid': camid}) return data ================================================ FILE: torchreid/data/datasets/image/p_ETHZ.py ================================================ from __future__ import absolute_import from __future__ import print_function from __future__ import division import os.path as osp import glob import warnings from ..dataset import ImageDataset # Sources : # https://github.com/hh23333/PVPM # A. Ess, B. Leibe, K. Schindler, and L. Van Gool, "A mobile vision system for robust multi-person tracking" in CVPR, 2008 class P_ETHZ(ImageDataset): dataset_dir = 'P_ETHZ' def __init__(self, root='', **kwargs): self.root=osp.abspath(osp.expanduser(root)) # self.dataset_dir = self.root data_dir = osp.join(self.root, self.dataset_dir) if osp.isdir(data_dir): self.data_dir = data_dir else: warnings.warn('The current data structure is deprecated.') self.query_dir=osp.join(self.data_dir, 'occluded_body_images') self.gallery_dir=osp.join(self.data_dir, 'whole_body_images') train = [] query = self.process_dir(self.query_dir, relabel=False) gallery = self.process_dir(self.gallery_dir, relabel=False, is_query=False) super(P_ETHZ, self).__init__(train, query, gallery, **kwargs) def process_dir(self, dir_path, relabel=False, is_query=True): img_paths = glob.glob(osp.join(dir_path, '*', '*.png')) if is_query: camid = 0 else: camid = 1 pid_container = set() for img_path in img_paths: img_name = img_path.split('/')[-1] pid = int(img_name.split('_')[0]) pid_container.add(pid) pid2label = {pid:label for label, pid in enumerate(pid_container)} data = [] for img_path in img_paths: img_name = img_path.split('/')[-1] pid = int(img_name.split('_')[0]) if relabel: pid = pid2label[pid] data.append({'img_path': img_path, 'pid': pid, 'camid': camid}) return data ================================================ FILE: torchreid/data/datasets/image/p_dukemtmc_reid.py ================================================ from __future__ import absolute_import from __future__ import print_function from __future__ import division import os import os.path as osp import glob import warnings from ..dataset import ImageDataset # Sources : # https://github.com/hh23333/PVPM # Z.D. Zheng, L. Zheng, and Y. Yang, "Unlabeled samples generated by gan improve the person re-identification baseline in vitro" in ICCV, 2017. class PDukemtmcReid(ImageDataset): dataset_dir = 'P-DukeMTMC-reID' masks_base_dir = 'masks' masks_dirs = { # dir_name: (parts_num, masks_stack_size, contains_background_mask) 'pifpaf': (36, False, '.jpg.confidence_fields.npy'), 'pifpaf_maskrcnn_filtering': (36, False, '.npy'), } @staticmethod def get_masks_config(masks_dir): if masks_dir not in PDukemtmcReid.masks_dirs: return None else: return PDukemtmcReid.masks_dirs[masks_dir] def infer_masks_path(self, img_path): masks_path = os.path.join(self.dataset_dir, self.masks_base_dir, self.masks_dir, os.path.basename(os.path.dirname(os.path.dirname(os.path.dirname(img_path)))), os.path.basename(os.path.dirname(os.path.dirname(img_path))), # FIXME ugly os.path.splitext(os.path.basename(img_path))[0] + self.masks_suffix) return masks_path def __init__(self, root='', masks_dir=None, **kwargs): self.masks_dir = masks_dir if self.masks_dir in self.masks_dirs: self.masks_parts_numbers, self.has_background, self.masks_suffix = self.masks_dirs[self.masks_dir] else: self.masks_parts_numbers, self.has_background, self.masks_suffix = None, None, None self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) if not osp.isdir(self.dataset_dir): warnings.warn( 'The current data structure is deprecated. Please ' 'put data folders such as "bounding_box_train" under ' '"Market-1501-v15.09.15".' ) self.train_dir=osp.join(self.dataset_dir, 'train') self.query_dir=osp.join(self.dataset_dir, 'test', 'occluded_body_images') self.gallery_dir=osp.join(self.dataset_dir, 'test', 'whole_body_images') train = self.process_train_dir(self.train_dir, relabel=True) query = self.process_dir(self.query_dir, relabel=False) gallery = self.process_dir(self.gallery_dir, relabel=False, is_query=False) super(PDukemtmcReid, self).__init__(train, query, gallery, **kwargs) def process_train_dir(self, dir_path, relabel=True): img_paths = glob.glob(osp.join(dir_path,'whole_body_images', '*', '*.jpg')) camid=1 pid_container = set() for img_path in img_paths: img_name = img_path.split('/')[-1] pid = int(img_name.split('_')[0]) pid_container.add(pid) pid2label = {pid:label for label, pid in enumerate(pid_container)} data = [] for img_path in img_paths: img_name = img_path.split('/')[-1] pid = int(img_name.split('_')[0]) if relabel: pid = pid2label[pid] masks_path = self.infer_masks_path(img_path) data.append({'img_path': img_path, 'pid': pid, 'masks_path': masks_path, 'camid': camid}) img_paths = glob.glob(osp.join(dir_path,'occluded_body_images','*','*.jpg')) camid=0 for img_path in img_paths: img_name = img_path.split('/')[-1] pid = int(img_name.split('_')[0]) if relabel: pid = pid2label[pid] masks_path = self.infer_masks_path(img_path) data.append({'img_path': img_path, 'pid': pid, 'masks_path': masks_path, 'camid': camid}) return data def process_dir(self, dir_path, relabel=False, is_query=True): img_paths = glob.glob(osp.join(dir_path, '*', '*.jpg')) if is_query: camid = 0 else: camid = 1 pid_container = set() for img_path in img_paths: img_name = img_path.split('/')[-1] pid = int(img_name.split('_')[0]) pid_container.add(pid) pid2label = {pid:label for label, pid in enumerate(pid_container)} data = [] for img_path in img_paths: img_name = img_path.split('/')[-1] pid = int(img_name.split('_')[0]) if relabel: pid = pid2label[pid] masks_path = self.infer_masks_path(img_path) data.append({'img_path': img_path, 'pid': pid, 'masks_path': masks_path, 'camid': camid}) return data ================================================ FILE: torchreid/data/datasets/image/partial_ilids.py ================================================ from __future__ import absolute_import from __future__ import print_function from __future__ import division import os.path as osp import glob import warnings from ..dataset import ImageDataset # Sources : # https://github.com/hh23333/PVPM # Lingxiao He, Jian Liang, Haiqing Li, and Zhenan Sun, "Deep spatial feature reconstruction for partial person reidentification: Alignment-free approach", 2018 class Partial_iLIDS(ImageDataset): dataset_dir = 'Partial_iLIDS' def __init__(self, root='', **kwargs): self.root=osp.abspath(osp.expanduser(root)) # self.dataset_dir = self.root data_dir = osp.join(self.root, self.dataset_dir) if osp.isdir(data_dir): self.data_dir = data_dir else: warnings.warn('The current data structure is deprecated.') self.query_dir = osp.join(self.data_dir, 'partial_body_images') self.gallery_dir = osp.join(self.data_dir, 'whole_body_images') train = [] query = self.process_dir(self.query_dir) gallery = self.process_dir(self.gallery_dir, is_query=False) super(Partial_iLIDS, self).__init__(train, query, gallery, **kwargs) self.load_pose = isinstance(self.transform, tuple) if self.load_pose: if self.mode == 'query': self.pose_dir = osp.join(self.data_dir, 'occluded_body_pose') elif self.mode == 'gallery': self.pose_dir = osp.join(self.data_dir, 'whole_body_pose') else: self.pose_dir = '' def process_dir(self, dir_path, is_query=True): img_paths = glob.glob(osp.join(dir_path, '*.jpg')) if is_query: camid = 0 else: camid = 1 data = [] for img_path in img_paths: img_name = img_path.split('/')[-1] pid = int(img_name.split('.')[0]) data.append({'img_path': img_path, 'pid': pid, 'camid': camid}) return data ================================================ FILE: torchreid/data/datasets/image/partial_reid.py ================================================ from __future__ import absolute_import from __future__ import print_function from __future__ import division import os.path as osp import glob import warnings from ..dataset import ImageDataset # Source : # https://github.com/hh23333/PVPM # Zheng, W. S., Li, X., Xiang, T., Liao, S., Lai, J., & Gong, S. (2015). Partial person re-identification. ICCV, 2015 class Partial_REID(ImageDataset): dataset_dir = 'Partial_REID' def __init__(self, root='', **kwargs): self.root=osp.abspath(osp.expanduser(root)) # self.dataset_dir = self.root data_dir = osp.join(self.root, self.dataset_dir) if osp.isdir(data_dir): self.data_dir = data_dir else: warnings.warn('The current data structure is deprecated.') self.query_dir = osp.join(self.data_dir, 'partial_body_images') self.gallery_dir = osp.join(self.data_dir, 'whole_body_images') train = [] query = self.process_dir(self.query_dir, relabel=False) gallery = self.process_dir(self.gallery_dir, relabel=False, is_query=False) super(Partial_REID, self).__init__(train, query, gallery, **kwargs) self.load_pose = isinstance(self.transform, tuple) if self.load_pose: if self.mode == 'query': self.pose_dir = osp.join(self.data_dir, 'occluded_body_pose') elif self.mode == 'gallery': self.pose_dir = osp.join(self.data_dir, 'whole_body_pose') else: self.pose_dir = '' def process_dir(self, dir_path, relabel=False, is_query=True): img_paths = glob.glob(osp.join(dir_path, '*.jpg')) if is_query: camid = 0 else: camid = 1 pid_container = set() for img_path in img_paths: img_name = img_path.split('/')[-1] pid = int(img_name.split('_')[0]) pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} data = [] for img_path in img_paths: img_name = img_path.split('/')[-1] pid = int(img_name.split('_')[0]) if relabel: pid = pid2label[pid] data.append({'img_path': img_path, 'pid': pid, 'camid': camid}) return data ================================================ FILE: torchreid/data/datasets/image/prid.py ================================================ from __future__ import division, print_function, absolute_import import random import os.path as osp from torchreid.utils import read_json, write_json from ..dataset import ImageDataset class PRID(ImageDataset): """PRID (single-shot version of prid-2011) Reference: Hirzer et al. Person Re-Identification by Descriptive and Discriminative Classification. SCIA 2011. URL: ``_ Dataset statistics: - Two views. - View A captures 385 identities. - View B captures 749 identities. - 200 identities appear in both views. """ dataset_dir = 'prid2011' dataset_url = None def __init__(self, root='', split_id=0, **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.cam_a_dir = osp.join( self.dataset_dir, 'prid_2011', 'single_shot', 'cam_a' ) self.cam_b_dir = osp.join( self.dataset_dir, 'prid_2011', 'single_shot', 'cam_b' ) self.split_path = osp.join(self.dataset_dir, 'splits_single_shot.json') required_files = [self.dataset_dir, self.cam_a_dir, self.cam_b_dir] self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, but expected between 0 and {}' .format(split_id, len(splits) - 1) ) split = splits[split_id] train, query, gallery = self.process_split(split) super(PRID, self).__init__(train, query, gallery, **kwargs) def prepare_split(self): if not osp.exists(self.split_path): print('Creating splits ...') splits = [] for _ in range(10): # randomly sample 100 IDs for train and use the rest 100 IDs for test # (note: there are only 200 IDs appearing in both views) pids = [i for i in range(1, 201)] train_pids = random.sample(pids, 100) train_pids.sort() test_pids = [i for i in pids if i not in train_pids] split = {'train': train_pids, 'test': test_pids} splits.append(split) print('Totally {} splits are created'.format(len(splits))) write_json(splits, self.split_path) print('Split file is saved to {}'.format(self.split_path)) def process_split(self, split): train_pids = split['train'] test_pids = split['test'] train_pid2label = {pid: label for label, pid in enumerate(train_pids)} # train train = [] for pid in train_pids: img_name = 'person_' + str(pid).zfill(4) + '.png' pid = train_pid2label[pid] img_a_path = osp.join(self.cam_a_dir, img_name) train.append((img_a_path, pid, 0)) img_b_path = osp.join(self.cam_b_dir, img_name) train.append((img_b_path, pid, 1)) # query and gallery query, gallery = [], [] for pid in test_pids: img_name = 'person_' + str(pid).zfill(4) + '.png' img_a_path = osp.join(self.cam_a_dir, img_name) query.append((img_a_path, pid, 0)) img_b_path = osp.join(self.cam_b_dir, img_name) gallery.append((img_b_path, pid, 1)) for pid in range(201, 750): img_name = 'person_' + str(pid).zfill(4) + '.png' img_b_path = osp.join(self.cam_b_dir, img_name) gallery.append((img_b_path, pid, 1)) return train, query, gallery ================================================ FILE: torchreid/data/datasets/image/sensereid.py ================================================ from __future__ import division, print_function, absolute_import import copy import glob import os.path as osp from ..dataset import ImageDataset class SenseReID(ImageDataset): """SenseReID. This dataset is used for test purpose only. Reference: Zhao et al. Spindle Net: Person Re-identification with Human Body Region Guided Feature Decomposition and Fusion. CVPR 2017. URL: ``_ Dataset statistics: - query: 522 ids, 1040 images. - gallery: 1717 ids, 3388 images. """ dataset_dir = 'sensereid' dataset_url = None def __init__(self, root='', **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.query_dir = osp.join(self.dataset_dir, 'SenseReID', 'test_probe') self.gallery_dir = osp.join( self.dataset_dir, 'SenseReID', 'test_gallery' ) required_files = [self.dataset_dir, self.query_dir, self.gallery_dir] self.check_before_run(required_files) query = self.process_dir(self.query_dir) gallery = self.process_dir(self.gallery_dir) # relabel g_pids = set() for _, pid, _ in gallery: g_pids.add(pid) pid2label = {pid: i for i, pid in enumerate(g_pids)} query = [ (img_path, pid2label[pid], camid) for img_path, pid, camid in query ] gallery = [ (img_path, pid2label[pid], camid) for img_path, pid, camid in gallery ] train = copy.deepcopy(query) + copy.deepcopy(gallery) # dummy variable super(SenseReID, self).__init__(train, query, gallery, **kwargs) def process_dir(self, dir_path): img_paths = glob.glob(osp.join(dir_path, '*.jpg')) data = [] for img_path in img_paths: img_name = osp.splitext(osp.basename(img_path))[0] pid, camid = img_name.split('_') pid, camid = int(pid), int(camid) data.append({'img_path': img_path, 'pid': pid, 'camid': camid}) return data ================================================ FILE: torchreid/data/datasets/image/viper.py ================================================ from __future__ import division, print_function, absolute_import import glob import numpy as np import os.path as osp from torchreid.utils import read_json, write_json from ..dataset import ImageDataset class VIPeR(ImageDataset): """VIPeR. Reference: Gray et al. Evaluating appearance models for recognition, reacquisition, and tracking. PETS 2007. URL: ``_ Dataset statistics: - identities: 632. - images: 632 x 2 = 1264. - cameras: 2. """ dataset_dir = 'viper' dataset_url = 'http://users.soe.ucsc.edu/~manduchi/VIPeR.v1.0.zip' def __init__(self, root='', split_id=0, **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.cam_a_dir = osp.join(self.dataset_dir, 'VIPeR', 'cam_a') self.cam_b_dir = osp.join(self.dataset_dir, 'VIPeR', 'cam_b') self.split_path = osp.join(self.dataset_dir, 'splits.json') required_files = [self.dataset_dir, self.cam_a_dir, self.cam_b_dir] self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, ' 'but expected between 0 and {}'.format( split_id, len(splits) - 1 ) ) split = splits[split_id] train = split['train'] query = split['query'] # query and gallery share the same images gallery = split['gallery'] train = [tuple(item) for item in train] query = [tuple(item) for item in query] gallery = [tuple(item) for item in gallery] super(VIPeR, self).__init__(train, query, gallery, **kwargs) def prepare_split(self): if not osp.exists(self.split_path): print('Creating 10 random splits of train ids and test ids') cam_a_imgs = sorted(glob.glob(osp.join(self.cam_a_dir, '*.bmp'))) cam_b_imgs = sorted(glob.glob(osp.join(self.cam_b_dir, '*.bmp'))) assert len(cam_a_imgs) == len(cam_b_imgs) num_pids = len(cam_a_imgs) print('Number of identities: {}'.format(num_pids)) num_train_pids = num_pids // 2 """ In total, there will be 20 splits because each random split creates two sub-splits, one using cameraA as query and cameraB as gallery while the other using cameraB as query and cameraA as gallery. Therefore, results should be averaged over 20 splits (split_id=0~19). In practice, a model trained on split_id=0 can be applied to split_id=0&1 as split_id=0&1 share the same training data (so on and so forth). """ splits = [] for _ in range(10): order = np.arange(num_pids) np.random.shuffle(order) train_idxs = order[:num_train_pids] test_idxs = order[num_train_pids:] assert not bool(set(train_idxs) & set(test_idxs)), \ 'Error: train and test overlap' train = [] for pid, idx in enumerate(train_idxs): cam_a_img = cam_a_imgs[idx] cam_b_img = cam_b_imgs[idx] train.append((cam_a_img, pid, 0)) train.append((cam_b_img, pid, 1)) test_a = [] test_b = [] for pid, idx in enumerate(test_idxs): cam_a_img = cam_a_imgs[idx] cam_b_img = cam_b_imgs[idx] test_a.append((cam_a_img, pid, 0)) test_b.append((cam_b_img, pid, 1)) # use cameraA as query and cameraB as gallery split = { 'train': train, 'query': test_a, 'gallery': test_b, 'num_train_pids': num_train_pids, 'num_query_pids': num_pids - num_train_pids, 'num_gallery_pids': num_pids - num_train_pids } splits.append(split) # use cameraB as query and cameraA as gallery split = { 'train': train, 'query': test_b, 'gallery': test_a, 'num_train_pids': num_train_pids, 'num_query_pids': num_pids - num_train_pids, 'num_gallery_pids': num_pids - num_train_pids } splits.append(split) print('Totally {} splits are created'.format(len(splits))) write_json(splits, self.split_path) print('Split file saved to {}'.format(self.split_path)) ================================================ FILE: torchreid/data/datasets/video/__init__.py ================================================ from __future__ import print_function, absolute_import from .mars import Mars from .ilidsvid import iLIDSVID from .prid2011 import PRID2011 from .dukemtmcvidreid import DukeMTMCVidReID ================================================ FILE: torchreid/data/datasets/video/dukemtmcvidreid.py ================================================ from __future__ import division, print_function, absolute_import import glob import os.path as osp import warnings from torchreid.utils import read_json, write_json from ..dataset import VideoDataset class DukeMTMCVidReID(VideoDataset): """DukeMTMCVidReID. Reference: - Ristani et al. Performance Measures and a Data Set for Multi-Target, Multi-Camera Tracking. ECCVW 2016. - Wu et al. Exploit the Unknown Gradually: One-Shot Video-Based Person Re-Identification by Stepwise Learning. CVPR 2018. URL: ``_ Dataset statistics: - identities: 702 (train) + 702 (test). - tracklets: 2196 (train) + 2636 (test). """ dataset_dir = 'dukemtmc-vidreid' dataset_url = 'http://vision.cs.duke.edu/DukeMTMC/data/misc/DukeMTMC-VideoReID.zip' def __init__(self, root='', min_seq_len=0, **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.train_dir = osp.join(self.dataset_dir, 'DukeMTMC-VideoReID/train') self.query_dir = osp.join(self.dataset_dir, 'DukeMTMC-VideoReID/query') self.gallery_dir = osp.join( self.dataset_dir, 'DukeMTMC-VideoReID/gallery' ) self.split_train_json_path = osp.join( self.dataset_dir, 'split_train.json' ) self.split_query_json_path = osp.join( self.dataset_dir, 'split_query.json' ) self.split_gallery_json_path = osp.join( self.dataset_dir, 'split_gallery.json' ) self.min_seq_len = min_seq_len required_files = [ self.dataset_dir, self.train_dir, self.query_dir, self.gallery_dir ] self.check_before_run(required_files) train = self.process_dir( self.train_dir, self.split_train_json_path, relabel=True ) query = self.process_dir( self.query_dir, self.split_query_json_path, relabel=False ) gallery = self.process_dir( self.gallery_dir, self.split_gallery_json_path, relabel=False ) super(DukeMTMCVidReID, self).__init__(train, query, gallery, **kwargs) def process_dir(self, dir_path, json_path, relabel): if osp.exists(json_path): split = read_json(json_path) return split['tracklets'] print('=> Generating split json file (** this might take a while **)') pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store print( 'Processing "{}" with {} person identities'.format( dir_path, len(pdirs) ) ) pid_container = set() for pdir in pdirs: pid = int(osp.basename(pdir)) pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} tracklets = [] for pdir in pdirs: pid = int(osp.basename(pdir)) if relabel: pid = pid2label[pid] tdirs = glob.glob(osp.join(pdir, '*')) for tdir in tdirs: raw_img_paths = glob.glob(osp.join(tdir, '*.jpg')) num_imgs = len(raw_img_paths) if num_imgs < self.min_seq_len: continue img_paths = [] for img_idx in range(num_imgs): # some tracklet starts from 0002 instead of 0001 img_idx_name = 'F' + str(img_idx + 1).zfill(4) res = glob.glob( osp.join(tdir, '*' + img_idx_name + '*.jpg') ) if len(res) == 0: warnings.warn( 'Index name {} in {} is missing, skip'.format( img_idx_name, tdir ) ) continue img_paths.append(res[0]) img_name = osp.basename(img_paths[0]) if img_name.find('_') == -1: # old naming format: 0001C6F0099X30823.jpg camid = int(img_name[5]) - 1 else: # new naming format: 0001_C6_F0099_X30823.jpg camid = int(img_name[6]) - 1 img_paths = tuple(img_paths) tracklets.append((img_paths, pid, camid)) print('Saving split to {}'.format(json_path)) split_dict = {'tracklets': tracklets} write_json(split_dict, json_path) return tracklets ================================================ FILE: torchreid/data/datasets/video/ilidsvid.py ================================================ from __future__ import division, print_function, absolute_import import glob import os.path as osp from scipy.io import loadmat from torchreid.utils import read_json, write_json from ..dataset import VideoDataset class iLIDSVID(VideoDataset): """iLIDS-VID. Reference: Wang et al. Person Re-Identification by Video Ranking. ECCV 2014. URL: ``_ Dataset statistics: - identities: 300. - tracklets: 600. - cameras: 2. """ dataset_dir = 'ilids-vid' dataset_url = 'http://www.eecs.qmul.ac.uk/~xiatian/iLIDS-VID/iLIDS-VID.tar' def __init__(self, root='', split_id=0, **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.data_dir = osp.join(self.dataset_dir, 'i-LIDS-VID') self.split_dir = osp.join(self.dataset_dir, 'train-test people splits') self.split_mat_path = osp.join( self.split_dir, 'train_test_splits_ilidsvid.mat' ) self.split_path = osp.join(self.dataset_dir, 'splits.json') self.cam_1_path = osp.join( self.dataset_dir, 'i-LIDS-VID/sequences/cam1' ) self.cam_2_path = osp.join( self.dataset_dir, 'i-LIDS-VID/sequences/cam2' ) required_files = [self.dataset_dir, self.data_dir, self.split_dir] self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, but expected between 0 and {}' .format(split_id, len(splits) - 1) ) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] train = self.process_data(train_dirs, cam1=True, cam2=True) query = self.process_data(test_dirs, cam1=True, cam2=False) gallery = self.process_data(test_dirs, cam1=False, cam2=True) super(iLIDSVID, self).__init__(train, query, gallery, **kwargs) def prepare_split(self): if not osp.exists(self.split_path): print('Creating splits ...') mat_split_data = loadmat(self.split_mat_path)['ls_set'] num_splits = mat_split_data.shape[0] num_total_ids = mat_split_data.shape[1] assert num_splits == 10 assert num_total_ids == 300 num_ids_each = num_total_ids // 2 # pids in mat_split_data are indices, so we need to transform them # to real pids person_cam1_dirs = sorted( glob.glob(osp.join(self.cam_1_path, '*')) ) person_cam2_dirs = sorted( glob.glob(osp.join(self.cam_2_path, '*')) ) person_cam1_dirs = [ osp.basename(item) for item in person_cam1_dirs ] person_cam2_dirs = [ osp.basename(item) for item in person_cam2_dirs ] # make sure persons in one camera view can be found in the other camera view assert set(person_cam1_dirs) == set(person_cam2_dirs) splits = [] for i_split in range(num_splits): # first 50% for testing and the remaining for training, following Wang et al. ECCV'14. train_idxs = sorted( list(mat_split_data[i_split, num_ids_each:]) ) test_idxs = sorted( list(mat_split_data[i_split, :num_ids_each]) ) train_idxs = [int(i) - 1 for i in train_idxs] test_idxs = [int(i) - 1 for i in test_idxs] # transform pids to person dir names train_dirs = [person_cam1_dirs[i] for i in train_idxs] test_dirs = [person_cam1_dirs[i] for i in test_idxs] split = {'train': train_dirs, 'test': test_dirs} splits.append(split) print( 'Totally {} splits are created, following Wang et al. ECCV\'14' .format(len(splits)) ) print('Split file is saved to {}'.format(self.split_path)) write_json(splits, self.split_path) def process_data(self, dirnames, cam1=True, cam2=True): tracklets = [] dirname2pid = {dirname: i for i, dirname in enumerate(dirnames)} for dirname in dirnames: if cam1: person_dir = osp.join(self.cam_1_path, dirname) img_names = glob.glob(osp.join(person_dir, '*.png')) assert len(img_names) > 0 img_names = tuple(img_names) pid = dirname2pid[dirname] tracklets.append((img_names, pid, 0)) if cam2: person_dir = osp.join(self.cam_2_path, dirname) img_names = glob.glob(osp.join(person_dir, '*.png')) assert len(img_names) > 0 img_names = tuple(img_names) pid = dirname2pid[dirname] tracklets.append((img_names, pid, 1)) return tracklets ================================================ FILE: torchreid/data/datasets/video/mars.py ================================================ from __future__ import division, print_function, absolute_import import os.path as osp import warnings from scipy.io import loadmat from ..dataset import VideoDataset class Mars(VideoDataset): """MARS. Reference: Zheng et al. MARS: A Video Benchmark for Large-Scale Person Re-identification. ECCV 2016. URL: ``_ Dataset statistics: - identities: 1261. - tracklets: 8298 (train) + 1980 (query) + 9330 (gallery). - cameras: 6. """ dataset_dir = 'mars' dataset_url = None def __init__(self, root='', **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.train_name_path = osp.join( self.dataset_dir, 'info/train_name.txt' ) self.test_name_path = osp.join(self.dataset_dir, 'info/test_name.txt') self.track_train_info_path = osp.join( self.dataset_dir, 'info/tracks_train_info.mat' ) self.track_test_info_path = osp.join( self.dataset_dir, 'info/tracks_test_info.mat' ) self.query_IDX_path = osp.join(self.dataset_dir, 'info/query_IDX.mat') required_files = [ self.dataset_dir, self.train_name_path, self.test_name_path, self.track_train_info_path, self.track_test_info_path, self.query_IDX_path ] self.check_before_run(required_files) train_names = self.get_names(self.train_name_path) test_names = self.get_names(self.test_name_path) track_train = loadmat(self.track_train_info_path )['track_train_info'] # numpy.ndarray (8298, 4) track_test = loadmat(self.track_test_info_path )['track_test_info'] # numpy.ndarray (12180, 4) query_IDX = loadmat(self.query_IDX_path )['query_IDX'].squeeze() # numpy.ndarray (1980,) query_IDX -= 1 # index from 0 track_query = track_test[query_IDX, :] gallery_IDX = [ i for i in range(track_test.shape[0]) if i not in query_IDX ] track_gallery = track_test[gallery_IDX, :] train = self.process_data( train_names, track_train, home_dir='bbox_train', relabel=True ) query = self.process_data( test_names, track_query, home_dir='bbox_test', relabel=False ) gallery = self.process_data( test_names, track_gallery, home_dir='bbox_test', relabel=False ) super(Mars, self).__init__(train, query, gallery, **kwargs) def get_names(self, fpath): names = [] with open(fpath, 'r') as f: for line in f: new_line = line.rstrip() names.append(new_line) return names def process_data( self, names, meta_data, home_dir=None, relabel=False, min_seq_len=0 ): assert home_dir in ['bbox_train', 'bbox_test'] num_tracklets = meta_data.shape[0] pid_list = list(set(meta_data[:, 2].tolist())) if relabel: pid2label = {pid: label for label, pid in enumerate(pid_list)} tracklets = [] for tracklet_idx in range(num_tracklets): data = meta_data[tracklet_idx, ...] start_index, end_index, pid, camid = data if pid == -1: continue # junk images are just ignored assert 1 <= camid <= 6 if relabel: pid = pid2label[pid] camid -= 1 # index starts from 0 img_names = names[start_index - 1:end_index] # make sure image names correspond to the same person pnames = [img_name[:4] for img_name in img_names] assert len( set(pnames) ) == 1, 'Error: a single tracklet contains different person images' # make sure all images are captured under the same camera camnames = [img_name[5] for img_name in img_names] assert len( set(camnames) ) == 1, 'Error: images are captured under different cameras!' # append image names with directory information img_paths = [ osp.join(self.dataset_dir, home_dir, img_name[:4], img_name) for img_name in img_names ] if len(img_paths) >= min_seq_len: img_paths = tuple(img_paths) tracklets.append((img_paths, pid, camid)) return tracklets def combine_all(self): warnings.warn( 'Some query IDs do not appear in gallery. Therefore, combineall ' 'does not make any difference to Mars' ) ================================================ FILE: torchreid/data/datasets/video/prid2011.py ================================================ from __future__ import division, print_function, absolute_import import glob import os.path as osp from torchreid.utils import read_json from ..dataset import VideoDataset class PRID2011(VideoDataset): """PRID2011. Reference: Hirzer et al. Person Re-Identification by Descriptive and Discriminative Classification. SCIA 2011. URL: ``_ Dataset statistics: - identities: 200. - tracklets: 400. - cameras: 2. """ dataset_dir = 'prid2011' dataset_url = None def __init__(self, root='', split_id=0, **kwargs): self.root = osp.abspath(osp.expanduser(root)) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.download_dataset(self.dataset_dir, self.dataset_url) self.split_path = osp.join(self.dataset_dir, 'splits_prid2011.json') self.cam_a_dir = osp.join( self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_a' ) self.cam_b_dir = osp.join( self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_b' ) required_files = [self.dataset_dir, self.cam_a_dir, self.cam_b_dir] self.check_before_run(required_files) splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, but expected between 0 and {}' .format(split_id, len(splits) - 1) ) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] train = self.process_dir(train_dirs, cam1=True, cam2=True) query = self.process_dir(test_dirs, cam1=True, cam2=False) gallery = self.process_dir(test_dirs, cam1=False, cam2=True) super(PRID2011, self).__init__(train, query, gallery, **kwargs) def process_dir(self, dirnames, cam1=True, cam2=True): tracklets = [] dirname2pid = {dirname: i for i, dirname in enumerate(dirnames)} for dirname in dirnames: if cam1: person_dir = osp.join(self.cam_a_dir, dirname) img_names = glob.glob(osp.join(person_dir, '*.png')) assert len(img_names) > 0 img_names = tuple(img_names) pid = dirname2pid[dirname] tracklets.append((img_names, pid, 0)) if cam2: person_dir = osp.join(self.cam_b_dir, dirname) img_names = glob.glob(osp.join(person_dir, '*.png')) assert len(img_names) > 0 img_names = tuple(img_names) pid = dirname2pid[dirname] tracklets.append((img_names, pid, 1)) return tracklets ================================================ FILE: torchreid/data/masks_transforms/__init__.py ================================================ from __future__ import print_function, absolute_import from .mask_transform import * from .pcb_transforms import * from .pifpaf_mask_transform import * from .coco_keypoints_transforms import * from torchreid.data.datasets import get_image_dataset masks_preprocess_pifpaf = { 'full': CombinePifPafIntoFullBodyMask, 'bs_fu': AddFullBodyMaskToBaseMasks, 'bs_fu_bb': AddFullBodyMaskAndFullBoundingBoxToBaseMasks, 'mu_sc': CombinePifPafIntoMultiScaleBodyMasks, 'one': CombinePifPafIntoOneBodyMasks, 'two_v': CombinePifPafIntoTwoBodyMasks, 'three_v': CombinePifPafIntoThreeBodyMasks, 'four': CombinePifPafIntoFourBodyMasks, 'four_no': CombinePifPafIntoFourBodyMasksNoOverlap, 'four_v': CombinePifPafIntoFourVerticalParts, 'four_v_pif': CombinePifPafIntoFourVerticalPartsPif, 'five_v': CombinePifPafIntoFiveVerticalParts, 'five': CombinePifPafIntoFiveBodyMasks, 'six': CombinePifPafIntoSixBodyMasks, 'six_v': CombinePifPafIntoSixVerticalParts, 'six_no': CombinePifPafIntoSixBodyMasksSum, 'six_new': CombinePifPafIntoSixBodyMasksSimilarToEight, 'seven_v': CombinePifPafIntoSevenVerticalBodyMasks, 'seven_new': CombinePifPafIntoSevenBodyMasksSimilarToEight, 'eight': CombinePifPafIntoEightBodyMasks, 'eight_v': CombinePifPafIntoEightVerticalBodyMasks, 'ten_ms': CombinePifPafIntoTenMSBodyMasks, 'eleven': CombinePifPafIntoElevenBodyMasks, 'fourteen': CombinePifPafIntoFourteenBodyMasks, } masks_preprocess_coco = { 'cc6': CocoToSixBodyMasks } masks_preprocess_fixed = { 'id': IdentityMask, 'strp_2': PCBMasks2, 'strp_3': PCBMasks3, 'strp_4': PCBMasks4, 'strp_5': PCBMasks5, 'strp_6': PCBMasks6, 'strp_7': PCBMasks7, 'strp_8': PCBMasks8, } masks_preprocess_transforms = {**masks_preprocess_pifpaf, **masks_preprocess_coco} masks_preprocess_all = {**masks_preprocess_pifpaf, **masks_preprocess_fixed, **masks_preprocess_coco} def compute_parts_num_and_names(cfg): mask_config = get_image_dataset(cfg.data.sources[0]).get_masks_config(cfg.model.bpbreid.masks.dir) if cfg.loss.name == 'part_based': if (mask_config is not None and mask_config[1]) or cfg.model.bpbreid.masks.preprocess == 'none': # ISP masks or no transform cfg.model.bpbreid.masks.parts_num = mask_config[0] cfg.model.bpbreid.masks.parts_names = mask_config[3] if 3 in mask_config else ["p{}".format(p) for p in range(1, cfg.data.parts_num+1)] else: masks_transform = masks_preprocess_all[cfg.model.bpbreid.masks.preprocess]() cfg.model.bpbreid.masks.parts_num = masks_transform.parts_num cfg.model.bpbreid.masks.parts_names = masks_transform.parts_names ================================================ FILE: torchreid/data/masks_transforms/coco_keypoints_transforms.py ================================================ from torchreid.data.masks_transforms.mask_transform import MaskGroupingTransform COCO_KEYPOINTS = ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle"] COCO_KEYPOINTS_MAP = {k: i for i, k in enumerate(COCO_KEYPOINTS)} class CocoToSixBodyMasks(MaskGroupingTransform): parts_grouping = { "head": ["nose", "left_eye", "right_eye", "left_ear", "right_ear"], "torso": ["left_shoulder", "right_shoulder", "left_hip", "right_hip"], "left_arm": ["left_shoulder", "left_elbow", "left_wrist"], "right_arm": ["right_shoulder", "right_elbow", "right_wrist"], "left_leg": ["left_hip", "left_knee", "left_ankle"], "right_leg": ["right_hip", "right_knee", "right_ankle"] } def __init__(self): super().__init__(self.parts_grouping, COCO_KEYPOINTS_MAP) ================================================ FILE: torchreid/data/masks_transforms/mask_transform.py ================================================ import torch from torch import nn from albumentations import DualTransform import torch.nn.functional as F class MaskTransform(DualTransform): def __init__(self): super(MaskTransform, self).__init__(always_apply=True, p=1) def apply(self, img, **params): return img def apply_to_bbox(self, bbox, **params): raise NotImplementedError("Method apply_to_bbox is not implemented in class " + self.__class__.__name__) def apply_to_keypoint(self, keypoint, **params): raise NotImplementedError("Method apply_to_keypoint is not implemented in class " + self.__class__.__name__) class MaskGroupingTransform(MaskTransform): def __init__(self, parts_grouping, parts_map, combine_mode='max'): super().__init__() self.parts_grouping = parts_grouping self.parts_map = parts_map self.parts_names = list(parts_grouping.keys()) self.parts_num = len(self.parts_names) self.combine_mode = combine_mode def apply_to_mask(self, masks, **params): parts_masks = [] for i, part in enumerate(self.parts_names): if self.combine_mode == 'sum': parts_masks.append(masks[[self.parts_map[k] for k in self.parts_grouping[part]]].sum(dim=0).clamp(0, 1)) else: parts_masks.append(masks[[self.parts_map[k] for k in self.parts_grouping[part]]].max(dim=0)[0].clamp(0, 1)) return torch.stack(parts_masks) class PermuteMasksDim(MaskTransform): def apply_to_mask(self, masks, **params): return masks.permute(2, 0, 1) class ResizeMasks(MaskTransform): def __init__(self, height, width, mask_scale): super(ResizeMasks, self).__init__() self._size = (int(height/mask_scale), int(width/mask_scale)) def apply_to_mask(self, masks, **params): return nn.functional.interpolate(masks.unsqueeze(0), self._size, mode='nearest').squeeze(0) # Best perf with nearest here and bilinear in parts engine class RemoveBackgroundMask(MaskTransform): def apply_to_mask(self, masks, **params): return masks[:, :, 1::] class AddBackgroundMask(MaskTransform): def __init__(self, background_computation_strategy='sum', softmax_weight=0, mask_filtering_threshold=0.3): super().__init__() self.background_computation_strategy = background_computation_strategy self.softmax_weight = softmax_weight self.mask_filtering_threshold = mask_filtering_threshold def apply_to_mask(self, masks, **params): if self.background_computation_strategy == 'sum': background_mask = 1 - masks.sum(dim=0) background_mask = background_mask.clamp(0, 1) masks = torch.cat([background_mask.unsqueeze(0), masks]) elif self.background_computation_strategy == 'threshold': background_mask = masks.max(dim=0)[0] < self.mask_filtering_threshold masks = torch.cat([background_mask.unsqueeze(0), masks]) elif self.background_computation_strategy == 'diff_from_max': background_mask = 1 - masks.max(dim=0)[0] background_mask = background_mask.clamp(0, 1) masks = torch.cat([background_mask.unsqueeze(0), masks]) else: raise ValueError('Background mask combine strategy {} not supported'.format(self.background_computation_strategy)) if self.softmax_weight > 0: masks = F.softmax(masks * self.softmax_weight, dim=0) else: masks = masks / masks.sum(dim=0) return masks class IdentityMask(MaskTransform): parts_names = ['id'] parts_num = 1 def apply_to_mask(self, masks, **params): return torch.ones((1, masks.shape[1], masks.shape[2])) ================================================ FILE: torchreid/data/masks_transforms/pcb_transforms.py ================================================ import numpy as np import torch from torchreid.data.masks_transforms.mask_transform import MaskTransform class PCBMasks(MaskTransform): def apply_to_mask(self, masks, **params): self._size = masks.shape[1:3] self.stripe_height = self._size[0] / self.parts_num self.pcb_masks = torch.zeros((self.parts_num, self._size[0], self._size[1])) stripes_range = np.round(np.arange(0, self.parts_num + 1) * self._size[0] / self.parts_num).astype(int) for i in range(0, stripes_range.size-1): self.pcb_masks[i, stripes_range[i]:stripes_range[i+1], :] = 1 return self.pcb_masks class PCBMasks2(PCBMasks): parts_num = 2 parts_names = ["p{}".format(p) for p in range(1, parts_num+1)] class PCBMasks3(PCBMasks): parts_num = 3 parts_names = ["p{}".format(p) for p in range(1, parts_num+1)] class PCBMasks4(PCBMasks): parts_num = 4 parts_names = ["p{}".format(p) for p in range(1, parts_num+1)] class PCBMasks5(PCBMasks): parts_num = 5 parts_names = ["p{}".format(p) for p in range(1, parts_num+1)] class PCBMasks6(PCBMasks): parts_num = 6 parts_names = ["p{}".format(p) for p in range(1, parts_num+1)] class PCBMasks7(PCBMasks): parts_num = 7 parts_names = ["p{}".format(p) for p in range(1, parts_num+1)] class PCBMasks8(PCBMasks): parts_num = 8 parts_names = ["p{}".format(p) for p in range(1, parts_num+1)] ================================================ FILE: torchreid/data/masks_transforms/pifpaf_mask_transform.py ================================================ from __future__ import division, print_function, absolute_import import torch from torchreid.data.masks_transforms.mask_transform import MaskGroupingTransform PIFPAF_KEYPOINTS = ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle"] PIFPAF_JOINTS = ["left_ankle_to_left_knee", "left_knee_to_left_hip", "right_ankle_to_right_knee", "right_knee_to_right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder", "left_shoulder_to_left_elbow", "right_shoulder_to_right_elbow", "left_elbow_to_left_wrist", "right_elbow_to_right_wrist", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"] PIFPAF_PARTS = PIFPAF_KEYPOINTS + PIFPAF_JOINTS PIFPAF_SINGLE_GROUPS = {k:k for k in PIFPAF_PARTS} PIFPAF_PARTS_MAP = {k: i for i, k in enumerate(PIFPAF_PARTS)} class CombinePifPafIntoFullBodyMask(MaskGroupingTransform): parts_grouping = { "full_body": PIFPAF_PARTS } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class AddFullBodyMaskToBaseMasks(MaskGroupingTransform): parts_grouping = {**PIFPAF_SINGLE_GROUPS, **{ "full_body": PIFPAF_PARTS } } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class AddFullBodyMaskAndFullBoundingBoxToBaseMasks(MaskGroupingTransform): parts_num = 38 parts_names = ["p{}".format(p) for p in range(1, parts_num+1)] def apply_to_mask(self, masks, **params): full_body_mask = torch.max(masks, 0, keepdim=True)[0] full_bounding_box = torch.ones(masks.shape[1:3]).unsqueeze(0) return torch.cat([masks, full_body_mask, full_bounding_box ]) class CombinePifPafIntoMultiScaleBodyMasks(MaskGroupingTransform): parts_grouping = {**PIFPAF_SINGLE_GROUPS, **{ "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "arms_mask": ["left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_shoulder_to_left_elbow", "right_shoulder_to_right_elbow", "left_elbow_to_left_wrist", "right_elbow_to_right_wrist"], "torso_mask": ["left_shoulder", "right_shoulder", "left_hip", "right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "legs_mask": ["left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle", "left_ankle_to_left_knee", "left_knee_to_left_hip", "right_ankle_to_right_knee", "right_knee_to_right_hip", "left_hip_to_right_hip"], "feet_mask": ["left_ankle", "right_ankle"], "upper_body": ["torso_mask", "arms_mask", "head_mask"], "lower_body": ["legs_mask", "feet_mask"], "full_body_mask": PIFPAF_PARTS } } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoOneBodyMasks(MaskGroupingTransform): parts_grouping = { "full": PIFPAF_PARTS } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoTwoBodyMasks(MaskGroupingTransform): parts_grouping = { "torso_arms_head": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder", "left_shoulder", "right_shoulder", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_shoulder_to_left_elbow", "right_shoulder_to_right_elbow", "left_elbow_to_left_wrist", "right_elbow_to_right_wrist"], "legs": ["left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle", "left_ankle_to_left_knee", "left_knee_to_left_hip", "right_ankle_to_right_knee", "right_knee_to_right_hip", "left_hip_to_right_hip"] } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoThreeBodyMasks(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "torso_arms_mask": ["left_shoulder", "right_shoulder", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_shoulder_to_left_elbow", "right_shoulder_to_right_elbow", "left_elbow_to_left_wrist", "right_elbow_to_right_wrist"], "legs_mask": ["left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle", "left_ankle_to_left_knee", "left_knee_to_left_hip", "right_ankle_to_right_knee", "right_knee_to_right_hip", "left_hip_to_right_hip"] } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoFourBodyMasks(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "arms_mask": ["left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_shoulder_to_left_elbow", "right_shoulder_to_right_elbow", "left_elbow_to_left_wrist", "right_elbow_to_right_wrist"], "torso_mask": ["left_shoulder", "right_shoulder", "left_hip", "right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "legs_mask": ["left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle", "left_ankle_to_left_knee", "left_knee_to_left_hip", "right_ankle_to_right_knee", "right_knee_to_right_hip", "left_hip_to_right_hip"] } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoFourBodyMasksNoOverlap(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "arms_mask": ["left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_shoulder_to_left_elbow", "right_shoulder_to_right_elbow", "left_elbow_to_left_wrist", "right_elbow_to_right_wrist"], "torso_mask": ["left_shoulder", "right_shoulder", "left_hip", "right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "legs_mask": ["left_knee", "right_knee", "left_ankle", "right_ankle", "left_ankle_to_left_knee", "left_knee_to_left_hip", "right_ankle_to_right_knee", "right_knee_to_right_hip", "left_hip_to_right_hip"] } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoFourVerticalParts(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "arms_torso_mask": ["left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_shoulder_to_left_elbow", "right_shoulder_to_right_elbow", "left_elbow_to_left_wrist", "right_elbow_to_right_wrist", "left_shoulder", "right_shoulder", "left_hip", "right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "legs_mask": ["left_hip", "right_hip", "left_knee", "right_knee", "left_ankle_to_left_knee", "left_knee_to_left_hip", "right_ankle_to_right_knee", "right_knee_to_right_hip"], "feet_mask": ["left_ankle", "right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoFourVerticalPartsPif(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear"], "arms_torso_mask": ["left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_shoulder", "right_shoulder", "left_hip", "right_hip"], "legs_mask": ["left_hip", "right_hip", "left_knee", "right_knee"], "feet_mask": ["left_ankle", "right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoFiveVerticalParts(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "upper_arms_torso_mask": ["left_elbow", "right_elbow", "left_shoulder_to_left_elbow", "right_shoulder_to_right_elbow", "left_shoulder", "right_shoulder", "left_shoulder_to_right_shoulder"], "lower_arms_torso_mask": ["left_wrist", "right_wrist", "left_elbow_to_left_wrist", "right_elbow_to_right_wrist", "left_hip", "right_hip", "right_shoulder_to_right_hip"], "legs_mask": ["left_hip", "right_hip", "left_knee", "right_knee", "left_ankle_to_left_knee", "left_knee_to_left_hip", "right_ankle_to_right_knee", "right_knee_to_right_hip"], "feet_mask": ["left_ankle", "right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoFiveBodyMasks(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "arms_mask": ["left_shoulder", "left_elbow", "left_wrist", "left_shoulder_to_left_elbow", "left_elbow_to_left_wrist", "right_shoulder", "right_elbow", "right_wrist", "right_shoulder_to_right_elbow", "right_elbow_to_right_wrist"], "torso_mask": ["left_hip", "right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "legs_mask": ["left_hip_to_right_hip", "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle_to_left_knee", "left_knee_to_left_hip", "right_ankle_to_right_knee", "right_knee_to_right_hip"], "feet_mask": ["left_ankle", "right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoSixVerticalParts(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "arms_mask": ["left_shoulder", "left_elbow", "left_wrist", "left_shoulder_to_left_elbow", "left_elbow_to_left_wrist", "right_shoulder", "right_elbow", "right_wrist", "right_shoulder_to_right_elbow", "right_elbow_to_right_wrist"], "upper_torso_mask": ["left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "lower_torso_mask": ["left_hip", "right_hip", "left_hip_to_right_hip"], "legs_mask": ["left_hip", "right_hip", "left_knee", "right_knee", "left_ankle_to_left_knee", "left_knee_to_left_hip", "right_ankle_to_right_knee", "right_knee_to_right_hip"], "feet_mask": ["left_ankle", "right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoSixBodyMasks(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "left_arm_mask": ["left_shoulder", "left_elbow", "left_wrist", "left_shoulder_to_left_elbow", "left_elbow_to_left_wrist"], "right_arm_mask": ["right_shoulder", "right_elbow", "right_wrist", "right_shoulder_to_right_elbow", "right_elbow_to_right_wrist"], "torso_mask": ["left_hip", "right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "left_leg_mask": ["left_knee", "left_ankle", "left_ankle_to_left_knee", "left_knee_to_left_hip", "left_hip_to_right_hip"], "right_leg_mask": ["right_knee", "right_ankle", "right_ankle_to_right_knee", "right_knee_to_right_hip"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoSixBodyMasksSum(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "left_arm_mask": ["left_shoulder", "left_elbow", "left_wrist", "left_shoulder_to_left_elbow", "left_elbow_to_left_wrist"], "right_arm_mask": ["right_shoulder", "right_elbow", "right_wrist", "right_shoulder_to_right_elbow", "right_elbow_to_right_wrist"], "torso_mask": ["left_hip", "right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "left_leg_mask": ["left_knee", "left_ankle", "left_ankle_to_left_knee", "left_knee_to_left_hip", "left_hip_to_right_hip"], "right_leg_mask": ["right_knee", "right_ankle", "right_ankle_to_right_knee", "right_knee_to_right_hip"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS, 'sum') class CombinePifPafIntoSixBodyMasksSimilarToEight(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "torso_mask": ["left_hip", "right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "left_arm_mask": ["left_shoulder", "left_elbow", "left_wrist", "left_shoulder_to_left_elbow", "left_elbow_to_left_wrist"], "right_arm_mask": ["right_shoulder", "right_elbow", "right_wrist", "right_shoulder_to_right_elbow", "right_elbow_to_right_wrist"], "leg_mask": ["left_knee", "left_ankle_to_left_knee", "left_knee_to_left_hip", "left_hip_to_right_hip", "right_knee", "right_ankle_to_right_knee", "right_knee_to_right_hip"], "feet_mask": ["left_ankle", "right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoEightBodyMasks(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "left_arm_mask": ["left_shoulder", "left_elbow", "left_wrist", "left_shoulder_to_left_elbow", "left_elbow_to_left_wrist"], "right_arm_mask": ["right_shoulder", "right_elbow", "right_wrist", "right_shoulder_to_right_elbow", "right_elbow_to_right_wrist"], "torso_mask": ["left_hip", "right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "left_leg_mask": ["left_knee", "left_ankle_to_left_knee", "left_knee_to_left_hip", "left_hip_to_right_hip"], "right_leg_mask": ["right_knee", "right_ankle_to_right_knee", "right_knee_to_right_hip"], "left_feet_mask": ["left_ankle"], "right_feet_mask": ["right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoEightVerticalBodyMasks(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "left_arm_mask": ["left_shoulder", "left_elbow", "left_wrist", "left_shoulder_to_left_elbow", "left_elbow_to_left_wrist"], "right_arm_mask": ["right_shoulder", "right_elbow", "right_wrist", "right_shoulder_to_right_elbow", "right_elbow_to_right_wrist"], "torso_mask": ["left_hip", "right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "left_leg_mask": ["left_knee", "left_ankle_to_left_knee", "left_knee_to_left_hip", "left_hip_to_right_hip"], "right_leg_mask": ["right_knee", "right_ankle_to_right_knee", "right_knee_to_right_hip"], "left_feet_mask": ["left_ankle"], "right_feet_mask": ["right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoTenMSBodyMasks(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "left_arm_mask": ["left_shoulder", "left_elbow", "left_wrist", "left_shoulder_to_left_elbow", "left_elbow_to_left_wrist"], "right_arm_mask": ["right_shoulder", "right_elbow", "right_wrist", "right_shoulder_to_right_elbow", "right_elbow_to_right_wrist"], "torso_mask": ["left_hip", "right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "left_leg_mask": ["left_knee", "left_ankle_to_left_knee", "left_knee_to_left_hip", "left_hip_to_right_hip"], "right_leg_mask": ["right_knee", "right_ankle_to_right_knee", "right_knee_to_right_hip"], "left_feet_mask": ["left_ankle"], "right_feet_mask": ["right_ankle"], "upper_body_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder", "left_shoulder", "left_elbow", "left_wrist", "left_shoulder_to_left_elbow", "left_elbow_to_left_wrist", "right_shoulder", "right_elbow", "right_wrist", "right_shoulder_to_right_elbow", "right_elbow_to_right_wrist", "left_hip", "right_hip", "left_hip_to_right_hip", "left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "lower_body_mask": ["left_knee", "left_ankle_to_left_knee", "left_knee_to_left_hip", "left_hip_to_right_hip", "right_knee", "right_ankle_to_right_knee", "right_knee_to_right_hip", "left_ankle", "right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoSevenVerticalBodyMasks(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "shoulders_mask": ["left_shoulder", "right_shoulder", "left_shoulder_to_right_shoulder"], "elbow_mask": ["left_elbow", "right_elbow"], "wrist_mask": ["left_wrist", "right_wrist"], "hip_mask": ["left_hip", "right_hip", "left_hip_to_right_hip"], "knee_mask": ["left_knee", "right_knee"], "ankle_mask": ["left_ankle", "right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoSevenBodyMasksSimilarToEight(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "left_arm_mask": ["left_shoulder", "left_elbow", "left_wrist", "left_shoulder_to_left_elbow", "left_elbow_to_left_wrist"], "right_arm_mask": ["right_shoulder", "right_elbow", "right_wrist", "right_shoulder_to_right_elbow", "right_elbow_to_right_wrist"], "upper_torso_mask": ["left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "lower_torso_mask": ["left_hip", "right_hip", "left_hip_to_right_hip"], "leg_mask": ["left_knee", "left_ankle_to_left_knee", "left_knee_to_left_hip", "left_hip_to_right_hip", "right_knee", "right_ankle_to_right_knee", "right_knee_to_right_hip"], "feet_mask": ["left_ankle", "right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoElevenBodyMasks(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear", "left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "left_elbow_mask": ["left_shoulder", "left_elbow", "left_shoulder_to_left_elbow"], "left_wrist_mask": ["left_wrist", "left_elbow_to_left_wrist"], "right_elbow_mask": ["right_shoulder", "right_elbow", "right_shoulder_to_right_elbow"], "right_wrist_mask": ["right_wrist", "right_elbow_to_right_wrist"], "upper_torso_mask": ["left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "lower_torso_mask": ["left_hip", "right_hip", "left_hip_to_right_hip"], "left_leg_mask": ["left_knee", "left_knee_to_left_hip", "left_hip_to_right_hip"], "right_leg_mask": ["right_knee", "right_knee_to_right_hip"], "left_feet_mask": ["left_ankle_to_left_knee", "left_ankle"], "right_feet_mask": ["right_ankle_to_right_knee", "right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) class CombinePifPafIntoFourteenBodyMasks(MaskGroupingTransform): parts_grouping = { "head_mask": ["nose", "left_eye", "right_eye", "left_ear", "right_ear", "left_eye_to_right_eye", "nose_to_left_eye", "nose_to_right_eye", "left_eye_to_left_ear", "right_eye_to_right_ear"], "neck_mask": ["left_ear_to_left_shoulder", "right_ear_to_right_shoulder"], "left_elbow_mask": ["left_shoulder", "left_elbow", "left_shoulder_to_left_elbow"], "left_wrist_mask": ["left_wrist", "left_elbow_to_left_wrist"], "right_elbow_mask": ["right_shoulder", "right_elbow", "right_shoulder_to_right_elbow"], "right_wrist_mask": ["right_wrist", "right_elbow_to_right_wrist"], "upper_torso_mask": ["left_shoulder_to_left_hip", "right_shoulder_to_right_hip", "left_shoulder_to_right_shoulder"], "lower_torso_mask": ["left_hip", "right_hip", "left_hip_to_right_hip"], "left_leg_mask": ["left_knee", "left_knee_to_left_hip", "left_hip_to_right_hip"], "right_leg_mask": ["right_knee", "right_knee_to_right_hip"], "left_tibia_mask": ["left_ankle_to_left_knee"], "right_tibia_mask": ["right_ankle_to_right_knee"], "left_feet_mask": ["left_ankle"], "right_feet_mask": ["right_ankle"], } def __init__(self): super().__init__(self.parts_grouping, PIFPAF_PARTS_MAP) ================================================ FILE: torchreid/data/sampler.py ================================================ from __future__ import division, absolute_import import copy import numpy as np import random from collections import defaultdict from torch.utils.data.sampler import Sampler, RandomSampler, SequentialSampler AVAI_SAMPLERS = ['RandomIdentitySampler', 'SequentialSampler', 'RandomSampler'] class RandomIdentitySampler(Sampler): """Randomly samples N identities each with K instances. Args: data_source (list): contains tuples of (img_path(s), pid, camid). batch_size (int): batch size. num_instances (int): number of instances per identity in a batch. """ def __init__(self, data_source, batch_size, num_instances): if batch_size < num_instances: raise ValueError( 'batch_size={} must be no less ' 'than num_instances={}'.format(batch_size, num_instances) ) self.data_source = data_source self.batch_size = batch_size self.num_instances = num_instances self.num_pids_per_batch = self.batch_size // self.num_instances self.index_dic = defaultdict(list) for index, sample in enumerate(self.data_source): self.index_dic[sample['pid']].append(index) self.pids = list(self.index_dic.keys()) # estimate number of examples in an epoch self.length = 0 for pid in self.pids: idxs = self.index_dic[pid] num = len(idxs) if num < self.num_instances: num = self.num_instances self.length += num - num % self.num_instances def __iter__(self): batch_idxs_dict = defaultdict(list) for pid in self.pids: idxs = copy.deepcopy(self.index_dic[pid]) if len(idxs) < self.num_instances: idxs = np.random.choice( idxs, size=self.num_instances, replace=True ) random.shuffle(idxs) batch_idxs = [] for idx in idxs: batch_idxs.append(idx) if len(batch_idxs) == self.num_instances: batch_idxs_dict[pid].append(batch_idxs) batch_idxs = [] avai_pids = copy.deepcopy(self.pids) final_idxs = [] while len(avai_pids) >= self.num_pids_per_batch: selected_pids = random.sample(avai_pids, self.num_pids_per_batch) for pid in selected_pids: batch_idxs = batch_idxs_dict[pid].pop(0) final_idxs.extend(batch_idxs) if len(batch_idxs_dict[pid]) == 0: avai_pids.remove(pid) return iter(final_idxs) def __len__(self): return self.length def build_train_sampler( data_source, train_sampler, batch_size=32, num_instances=4, **kwargs ): """Builds a training sampler. Args: data_source (list): contains tuples of (img_path(s), pid, camid). train_sampler (str): sampler name (default: ``RandomSampler``). batch_size (int, optional): batch size. Default is 32. num_instances (int, optional): number of instances per identity in a batch (when using ``RandomIdentitySampler``). Default is 4. """ assert train_sampler in AVAI_SAMPLERS, \ 'train_sampler must be one of {}, but got {}'.format(AVAI_SAMPLERS, train_sampler) if train_sampler == 'RandomIdentitySampler': sampler = RandomIdentitySampler(data_source, batch_size, num_instances) elif train_sampler == 'SequentialSampler': sampler = SequentialSampler(data_source) elif train_sampler == 'RandomSampler': sampler = RandomSampler(data_source) return sampler ================================================ FILE: torchreid/data/transforms.py ================================================ from __future__ import division, print_function, absolute_import import cv2 import torch import numpy as np from albumentations import ( Resize, Compose, Normalize, ColorJitter, HorizontalFlip, CoarseDropout, RandomCrop, PadIfNeeded ) from albumentations.pytorch import ToTensorV2 from torchreid.data.masks_transforms import masks_preprocess_all, AddBackgroundMask, ResizeMasks, PermuteMasksDim, \ RemoveBackgroundMask from torchreid.data.data_augmentation import RandomOcclusion class NpToTensor(object): def __call__(self, masks): assert isinstance(masks, np.ndarray) return torch.as_tensor(masks) def __repr__(self): return self.__class__.__name__ + '()' def build_transforms( height, width, config, mask_scale=4, transforms='random_flip', norm_mean=[0.485, 0.456, 0.406], norm_std=[0.229, 0.224, 0.225], remove_background_mask=False, masks_preprocess = 'none', softmax_weight = 0, mask_filtering_threshold = 0.3, background_computation_strategy = 'threshold', **kwargs ): """Builds train and test transform functions. Args: height (int): target image height. width (int): target image width. transforms (str or list of str, optional): transformations applied to model training. Default is 'random_flip'. norm_mean (list or None, optional): normalization mean values. Default is ImageNet means. norm_std (list or None, optional): normalization standard deviation values. Default is ImageNet standard deviation values. """ if transforms is None: transforms = [] if isinstance(transforms, str): transforms = [transforms] if not isinstance(transforms, list): raise ValueError( 'transforms must be a list of strings, but found to be {}'.format( type(transforms) ) ) if len(transforms) > 0: transforms = [t.lower() for t in transforms] if norm_mean is None or norm_std is None: norm_mean = [0.485, 0.456, 0.406] # imagenet mean norm_std = [0.229, 0.224, 0.225] # imagenet std normalize = Normalize(mean=norm_mean, std=norm_std) print('Building train transforms ...') transform_tr = [] print('+ resize to {}x{}'.format(height, width)) transform_tr += [Resize(height, width)] if 'random_occlusion' in transforms or 'ro' in transforms: print('+ random occlusion') transform_tr += [RandomOcclusion(path=config.data.ro.path, im_shape=[config.data.height, config.data.width], p=config.data.ro.p, n=config.data.ro.n, min_overlap=config.data.ro.min_overlap, max_overlap=config.data.ro.max_overlap, )] if 'random_flip' in transforms or 'rf' in transforms: print('+ random flip') transform_tr += [HorizontalFlip()] if 'random_crop' in transforms or 'rc' in transforms: print('+ random crop') pad_size = 10 transform_tr += [PadIfNeeded(min_height=height+pad_size*2, min_width=width+pad_size*2, border_mode=cv2.BORDER_CONSTANT, value=0, mask_value=0, p=1), RandomCrop(height, width, p=1)] if 'color_jitter' in transforms or 'cj' in transforms: print('+ color jitter') transform_tr += [ ColorJitter(brightness=config.data.cj.brightness, contrast=config.data.cj.contrast, saturation=config.data.cj.saturation, hue=config.data.cj.hue, always_apply=config.data.cj.always_apply, p=config.data.cj.p, ) ] print('+ normalization (mean={}, std={})'.format(norm_mean, norm_std)) transform_tr += [normalize] if 'random_erase' in transforms or 're' in transforms: print('+ random erase') transform_tr += [CoarseDropout(min_holes=1, max_holes=1, min_height=int(height*0.15), max_height=int(height*0.65), min_width=int(width*0.15), max_width=int(width*0.65), fill_value=[0.485, 0.456, 0.406], mask_fill_value=0, always_apply=False, p=0.5)] print('+ to torch tensor of range [0, 1]') transform_tr += [ToTensorV2()] print('Building test transforms ...') print('+ resize to {}x{}'.format(height, width)) print('+ to torch tensor of range [0, 1]') print('+ normalization (mean={}, std={})'.format(norm_mean, norm_std)) transform_te = [ Resize(height, width), normalize, ToTensorV2() ] transform_tr += [PermuteMasksDim()] transform_te += [PermuteMasksDim()] if remove_background_mask: # ISP masks print('+ use remove background mask') # remove background before performing other transforms transform_tr = [RemoveBackgroundMask()] + transform_tr transform_te = [RemoveBackgroundMask()] + transform_te # Derive background mask from all foreground masks once other tasks have been performed print('+ use add background mask') transform_tr += [AddBackgroundMask('sum')] transform_te += [AddBackgroundMask('sum')] else: # Pifpaf confidence based masks if masks_preprocess != 'none': print('+ masks preprocess = {}'.format(masks_preprocess)) masks_preprocess_transform = masks_preprocess_all[masks_preprocess] transform_tr += [masks_preprocess_transform()] transform_te += [masks_preprocess_transform()] print('+ use add background mask') transform_tr += [AddBackgroundMask(background_computation_strategy, softmax_weight, mask_filtering_threshold)] transform_te += [AddBackgroundMask(background_computation_strategy, softmax_weight, mask_filtering_threshold)] transform_tr += [ResizeMasks(height, width, mask_scale)] transform_te += [ResizeMasks(height, width, mask_scale)] transform_tr = Compose(transform_tr, is_check_shapes=False) transform_te = Compose(transform_te, is_check_shapes=False) return transform_tr, transform_te ================================================ FILE: torchreid/engine/__init__.py ================================================ from __future__ import print_function, absolute_import from .image import ImageSoftmaxEngine, ImageTripletEngine, ImagePartBasedEngine from .video import VideoSoftmaxEngine, VideoTripletEngine from .engine import Engine ================================================ FILE: torchreid/engine/engine.py ================================================ from __future__ import division, print_function, absolute_import import os.path as osp from collections import OrderedDict import numpy as np import torch from torch.nn import functional as F from torchreid import metrics from torchreid.data.datasets import get_dataset_nickname from torchreid.losses import deep_supervision from torchreid.utils import ( re_ranking, open_all_layers, save_checkpoint, open_specified_layers, visualize_ranked_results, Logger, AverageMeter, perc, plot_pairs_distance_distribution ) from torchreid.utils.torchtools import collate class Engine(object): """A generic base Engine class for both image- and video-reid. Args: datamanager (DataManager): an instance of ``torchreid.data.ImageDataManager`` or ``torchreid.data.VideoDataManager``. model (nn.Module): model instance. optimizer (Optimizer): an Optimizer. scheduler (LRScheduler, optional): if None, no learning rate decay will be performed. use_gpu (bool, optional): use gpu. Default is True. """ def __init__(self, config, datamanager, writer, engine_state, use_gpu=True, save_model_flag=False, detailed_ranking=False): self.config = config self.datamanager = datamanager self.train_loader = self.datamanager.train_loader self.test_loader = self.datamanager.test_loader self.use_gpu = (torch.cuda.is_available() and use_gpu) self.save_model_flag = save_model_flag self.detailed_ranking = detailed_ranking self.engine_state = engine_state self.writer = writer self.logger = Logger.current_logger() self.model = None self.optimizer = None self.scheduler = None self._models = OrderedDict() self._optims = OrderedDict() self._scheds = OrderedDict() def register_model(self, name='model', model=None, optim=None, sched=None): if self.__dict__.get('_models') is None: raise AttributeError( 'Cannot assign model before super().__init__() call' ) if self.__dict__.get('_optims') is None: raise AttributeError( 'Cannot assign optim before super().__init__() call' ) if self.__dict__.get('_scheds') is None: raise AttributeError( 'Cannot assign sched before super().__init__() call' ) self._models[name] = model self._optims[name] = optim self._scheds[name] = sched def get_model_names(self, names=None): names_real = list(self._models.keys()) if names is not None: if not isinstance(names, list): names = [names] for name in names: assert name in names_real return names else: return names_real def save_model(self, epoch, cmc, mAP, ssmd, save_dir, is_best=False): if self.save_model_flag: names = self.get_model_names() for name in names: save_checkpoint( { 'state_dict': self._models[name].state_dict(), 'epoch': epoch + 1, 'rank1': cmc, 'mAP': mAP, 'ssmd': ssmd, 'config': self.config, 'optimizer': self._optims[name].state_dict(), 'scheduler': self._scheds[name].state_dict() }, osp.join(save_dir, self.writer.model_name + name), job_id=self.config.project.job_id, is_best=is_best ) def set_model_mode(self, mode='train', names=None): assert mode in ['train', 'eval', 'test'] names = self.get_model_names(names) for name in names: if mode == 'train': self._models[name].train() else: self._models[name].eval() def get_current_lr(self, names=None): names = self.get_model_names(names) name = names[0] return self._optims[name].param_groups[0]['lr'] def update_lr(self, names=None): names = self.get_model_names(names) for name in names: if self._scheds[name] is not None: self._scheds[name].step() self.engine_state.update_lr(self.get_current_lr()) def run( self, save_dir='log', fixbase_epoch=0, open_layers=None, test_only=False, dist_metric='euclidean', normalize_feature=False, visrank=False, visrank_topk=10, visrank_q_idx_list=[], visrank_count=10, use_metric_cuhk03=False, ranks=[1, 5, 10, 20], rerank=False, save_features=False ): """A unified pipeline for training and evaluating a model. Args: save_dir (str): directory to save model. max_epoch (int): maximum epoch. start_epoch (int, optional): starting epoch. Default is 0. fixbase_epoch (int, optional): number of epochs to train ``open_layers`` (new layers) while keeping base layers frozen. Default is 0. ``fixbase_epoch`` is counted in ``max_epoch``. open_layers (str or list, optional): layers (attribute names) open for training. start_eval (int, optional): from which epoch to start evaluation. Default is 0. eval_freq (int, optional): evaluation frequency. Default is -1 (meaning evaluation is only performed at the end of training). test_only (bool, optional): if True, only runs evaluation on test datasets. Default is False. dist_metric (str, optional): distance metric used to compute distance matrix between query and gallery. Default is "euclidean". normalize_feature (bool, optional): performs L2 normalization on feature vectors before computing feature distance. Default is False. visrank (bool, optional): visualizes ranked results. Default is False. It is recommended to enable ``visrank`` when ``test_only`` is True. The ranked images will be saved to "save_dir/visrank_dataset", e.g. "save_dir/visrank_market1501". visrank_topk (int, optional): top-k ranked images to be visualized. Default is 10. use_metric_cuhk03 (bool, optional): use single-gallery-shot setting for cuhk03. Default is False. This should be enabled when using cuhk03 classic split. ranks (list, optional): cmc ranks to be computed. Default is [1, 5, 10, 20]. rerank (bool, optional): uses person re-ranking (by Zhong et al. CVPR'17). Default is False. This is only enabled when test_only=True. save_features (bool, optional): save test query and test gallery extracted features to disk """ if test_only: self.test( 0, dist_metric=dist_metric, normalize_feature=normalize_feature, visrank=visrank, visrank_topk=visrank_topk, visrank_q_idx_list=visrank_q_idx_list, visrank_count=visrank_count, save_dir=save_dir, use_metric_cuhk03=use_metric_cuhk03, ranks=ranks, rerank=rerank, save_features=save_features ) return self.writer.total_run_timer.start() self.engine_state.estimated_num_batches = len(self.train_loader) self.engine_state.update_lr(self.get_current_lr()) print('=> Start training') self.engine_state.training_started() mAP = 0 for epoch in range(self.engine_state.start_epoch, self.engine_state.max_epoch): self.writer.epoch_timer.start() self.engine_state.epoch_started() self.train( fixbase_epoch=fixbase_epoch, open_layers=open_layers ) self.writer.epoch_timer.stop() self.engine_state.epoch_completed() if self.writer.intermediate_evaluate(): print('=> Intermediate test') rank_1, mAP, ssmd = self.test( epoch, dist_metric=dist_metric, normalize_feature=normalize_feature, visrank=False, visrank_topk=visrank_topk, visrank_q_idx_list=visrank_q_idx_list, visrank_count=visrank_count, save_dir=save_dir, use_metric_cuhk03=use_metric_cuhk03, ranks=ranks, evalate_on_sources_only=True ) self.save_model(epoch, rank_1, mAP, ssmd, save_dir) self.engine_state.training_completed() if self.engine_state.max_epoch > 0: print('=> Final test') rank_1, mAP, ssmd = self.test( self.engine_state.epoch, dist_metric=dist_metric, normalize_feature=normalize_feature, visrank=visrank, visrank_topk=visrank_topk, visrank_q_idx_list=visrank_q_idx_list, visrank_count=visrank_count, save_dir=save_dir, use_metric_cuhk03=use_metric_cuhk03, ranks=ranks, save_features=save_features, evalate_on_sources_only=False ) self.save_model(self.engine_state.epoch, rank_1, mAP, ssmd, save_dir) self.writer.total_run_timer.stop() self.engine_state.run_completed() self.logger.close() return mAP def train(self, fixbase_epoch=0, open_layers=None): self.set_model_mode('train') self.logger.add_scalar('Train/lr', self.get_current_lr(), self.engine_state.epoch) self.two_stepped_transfer_learning( self.engine_state.epoch, fixbase_epoch, open_layers ) self.writer.data_loading_timer.start() for self.batch_idx, data in enumerate(self.train_loader): self.writer.data_loading_timer.stop() self.writer.batch_timer.start() loss, loss_summary = self.forward_backward(data) self.writer.batch_timer.stop() self.writer.losses.update(loss_summary) self.writer.loss.update(loss) self.writer.data_loading_timer.start() self.engine_state.batch_completed() self.update_lr() def forward_backward(self, data): raise NotImplementedError def test( self, epoch, dist_metric='euclidean', normalize_feature=False, visrank=False, visrank_topk=10, visrank_q_idx_list=[], visrank_count=10, save_dir='', use_metric_cuhk03=False, ranks=[1, 5, 10, 20], rerank=False, save_features=False, evalate_on_sources_only=False ): """Tests model on target datasets. .. note:: This function has been called in ``run()``. .. note:: The test pipeline implemented in this function suits both image- and video-reid. In general, a subclass of Engine only needs to re-implement ``extract_features()`` and ``parse_data_for_eval()`` (most of the time), but not a must. Please refer to the source code for more details. """ self.writer.test_timer.start() self.set_model_mode('eval') targets = list(self.test_loader.keys()) if len(targets) == 0: raise RuntimeError("Test set is either empty or target dataset was not specified.") cmc_avg = AverageMeter() mAP_avg = AverageMeter() ssmd_avg = AverageMeter() pxl_acc_avg = AverageMeter() # TODO: capture metrics with Pandas frame (more scalable for new metrics) cmc_per_dataset = {} mAP_per_dataset = {} ssmd_per_dataset = {} pxl_acc_per_dataset = {} for name in targets: is_source_dataset = name in self.datamanager.sources domain = 'source' if is_source_dataset else 'target' if is_source_dataset or not evalate_on_sources_only: print('##### Evaluating {} ({}) #####'.format(name, domain)) query_loader = self.test_loader[name]['query'] gallery_loader = self.test_loader[name]['gallery'] cmc, mAP, ssmd, avg_pxl_pred_accuracy = self._evaluate( epoch, dataset_name=name, query_loader=query_loader, gallery_loader=gallery_loader, dist_metric=dist_metric, normalize_feature=normalize_feature, visrank=visrank, visrank_topk=visrank_topk, visrank_q_idx_list=visrank_q_idx_list, visrank_count=visrank_count, save_dir=save_dir, use_metric_cuhk03=use_metric_cuhk03, ranks=ranks, rerank=rerank, save_features=save_features ) dataset_nickname = get_dataset_nickname(name) self.writer.report_performance(cmc, mAP, ssmd, avg_pxl_pred_accuracy, dataset_nickname) cmc_per_dataset[dataset_nickname] = perc(cmc) mAP_per_dataset[dataset_nickname] = perc(mAP) ssmd_per_dataset[dataset_nickname] = np.around(ssmd, 2) pxl_acc_per_dataset[dataset_nickname] = avg_pxl_pred_accuracy if is_source_dataset: cmc_avg.update(cmc) mAP_avg.update(mAP) ssmd_avg.update(ssmd) pxl_acc_avg.update(avg_pxl_pred_accuracy) else: print('##### Skipping {} ({}) #####'.format(name, domain)) average_score_key = 'avg' cmc_per_dataset[average_score_key] = np.array(list(cmc_per_dataset.values())).mean(0) # transform dataset->cmc to cmc->dataset cmc_per_dataset = [{k: v[i-1] for k, v in cmc_per_dataset.items()} for i in ranks] mAP_per_dataset[average_score_key] = np.array(list(mAP_per_dataset.values())).mean() ssmd_per_dataset[average_score_key] = np.array(list(ssmd_per_dataset.values())).mean() pxl_acc_per_dataset[average_score_key] = np.array(list(pxl_acc_per_dataset.values())).mean() self.engine_state.test_completed() self.writer.test_timer.stop() if mAP_avg.count != 0: self.writer.report_performance(cmc_avg.avg, mAP_avg.avg, ssmd_avg.avg, pxl_acc_avg.avg) self.writer.report_global_performance(cmc_per_dataset, mAP_per_dataset, ssmd_per_dataset, pxl_acc_per_dataset) r1 = cmc_avg.avg[0] if mAP_avg.count != 0 else 0 return r1, mAP_avg.avg, ssmd_avg.avg @torch.no_grad() def _evaluate( self, epoch, dataset_name='', query_loader=None, gallery_loader=None, dist_metric='euclidean', normalize_feature=False, visrank=False, visrank_topk=10, visrank_q_idx_list=[], visrank_count=10, save_dir='', use_metric_cuhk03=False, ranks=[1, 5, 10, 20], rerank=False, save_features=False ): print('Extracting features from query set:') qf, q_pids, q_camids, q_anns = self._feature_extraction(query_loader) print('Done, obtained {} tensor'.format(qf.shape)) print('Extracting features from gallery set:') gf, g_pids, g_camids, g_anns = self._feature_extraction(gallery_loader) print('Done, obtained {} tensor'.format(gf.shape)) print('Test batch feature extraction speed: {:.4f} sec/batch'.format(self.writer.test_batch_timer.avg)) if save_features: features_dir = osp.join(save_dir, 'features') print('Saving features to : ' + features_dir) # TODO create if doesn't exist torch.save(gf, osp.join(features_dir, 'gallery_features_' + dataset_name + '.pt')) torch.save(qf, osp.join(features_dir, 'query_features_' + dataset_name + '.pt')) # save pids, camids and feature length self.writer.performance_evaluation_timer.start() if normalize_feature: print('Normalizing features with L2 norm ...') qf = self.normalize(qf) gf = self.normalize(gf) print('Computing distance matrix with metric={} ...'.format(dist_metric)) distmat = metrics.compute_distance_matrix(qf, gf, dist_metric) distmat = distmat.numpy() if rerank: print('Applying person re-ranking ...') distmat_qq = metrics.compute_distance_matrix(qf, qf, dist_metric) distmat_gg = metrics.compute_distance_matrix(gf, gf, dist_metric) distmat = re_ranking(distmat, distmat_qq, distmat_gg) print('Computing CMC and mAP ...') eval_metrics = metrics.evaluate_rank( distmat, q_pids, g_pids, q_camids, g_camids, q_anns=q_anns, g_anns=g_anns, eval_metric=self.datamanager.test_loader[dataset_name]['query'].dataset.eval_metric ) mAP = eval_metrics['mAP'] cmc = eval_metrics['cmc'] print('** Results **') print('mAP: {:.2%}'.format(mAP)) print('CMC curve') for r in ranks: print('Rank-{:<3}: {:.2%}'.format(r, cmc[r - 1])) for metric in eval_metrics.keys(): if metric != 'mAP' and metric != 'cmc': val, size = eval_metrics[metric] if val is not None: print('{:<20}: {:.2%} ({})'.format(metric, val, size)) else: print('{:<20}: not provided'.format(metric)) # TODO move below to writer print('Evaluate distribution of distances of pairs with same id vs different ids') same_ids_dist_mean, same_ids_dist_std, different_ids_dist_mean, different_ids_dist_std, ssmd = \ plot_pairs_distance_distribution(distmat, q_pids, g_pids, "Query-gallery") # TODO separate ssmd from plot, put plot in writer print("Positive pairs distance distribution mean: {:.3f}".format(same_ids_dist_mean)) print("Positive pairs distance distribution standard deviation: {:.3f}".format(same_ids_dist_std)) print("Negative pairs distance distribution mean: {:.3f}".format(different_ids_dist_mean)) print("Negative pairs distance distribution standard deviation: {:.3f}".format( different_ids_dist_std)) print("SSMD = {:.4f}".format(ssmd)) if visrank: visualize_ranked_results( distmat, self.datamanager.fetch_test_loaders(dataset_name), self.datamanager.data_type, width=self.datamanager.width, height=self.datamanager.height, save_dir=osp.join(save_dir, 'visrank_' + dataset_name), topk=visrank_topk ) self.writer.visualize_embeddings(qf, gf, q_pids, g_pids, self.datamanager.test_loader[dataset_name], dataset_name, None, None, mAP, cmc[0]) self.writer.performance_evaluation_timer.stop() return cmc, mAP, ssmd, 0 def _feature_extraction(self, data_loader): f_, pids_, camids_ = [], [], [] anns = [] for batch_idx, data in enumerate(data_loader): imgs, pids, camids = self.parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() self.writer.test_batch_timer.start() features = self.extract_features(imgs) self.writer.test_batch_timer.stop() features = features.data.cpu() f_.append(features) pids_.extend(pids) camids_.extend(camids) anns.append(data) anns = collate(anns) f_ = torch.cat(f_, 0) pids_ = np.asarray(pids_) camids_ = np.asarray(camids_) return f_, pids_, camids_, anns def compute_loss(self, criterion, outputs, targets, **kwargs): if isinstance(outputs, (tuple, list)): loss = deep_supervision(criterion, outputs, targets, **kwargs) else: loss = criterion(outputs, targets, **kwargs) return loss def extract_features(self, input): return self.model(input) def parse_data_for_train(self, data): imgs = data['image'] pids = data['pid'] return imgs, pids def parse_data_for_eval(self, data): imgs = data['image'] pids = data['pid'] camids = data['camid'] return imgs, pids, camids def two_stepped_transfer_learning( self, epoch, fixbase_epoch, open_layers, model=None ): """Two-stepped transfer learning. The idea is to freeze base layers for a certain number of epochs and then open all layers for training. Reference: https://arxiv.org/abs/1611.05244 """ model = self.model if model is None else model if model is None: return if fixbase_epoch > 0: if (epoch + 1) <= fixbase_epoch and open_layers is not None: print( '* Only train {} (epoch: {}/{})'.format( open_layers, epoch + 1, fixbase_epoch ) ) open_specified_layers(model, open_layers) else: open_all_layers(model) def normalize(self, features): return F.normalize(features, p=2, dim=-1) ================================================ FILE: torchreid/engine/image/__init__.py ================================================ from __future__ import absolute_import from .softmax import ImageSoftmaxEngine from .triplet import ImageTripletEngine from .part_based_engine import ImagePartBasedEngine ================================================ FILE: torchreid/engine/image/part_based_engine.py ================================================ from __future__ import division, print_function, absolute_import import os.path as osp import torch import numpy as np from tabulate import tabulate from torch import nn from tqdm import tqdm from ..engine import Engine from ... import metrics from ...losses.GiLt_loss import GiLtLoss from ...losses.body_part_attention_loss import BodyPartAttentionLoss from ...metrics.distance import compute_distance_matrix_using_bp_features from ...utils import plot_body_parts_pairs_distance_distribution, \ plot_pairs_distance_distribution, re_ranking from torchreid.utils.constants import * from ...utils.torchtools import collate from ...utils.visualization.feature_map_visualization import display_feature_maps class ImagePartBasedEngine(Engine): r"""Training/testing engine for part-based image-reid. """ def __init__( self, datamanager, model, optimizer, writer, loss_name, config, dist_combine_strat, batch_size_pairwise_dist_matrix, engine_state, margin=0.3, scheduler=None, use_gpu=True, save_model_flag=False, mask_filtering_training=False, mask_filtering_testing=False ): super(ImagePartBasedEngine, self).__init__(config, datamanager, writer, engine_state, use_gpu=use_gpu, save_model_flag=save_model_flag, detailed_ranking=config.test.detailed_ranking) self.model = model self.register_model('model', model, optimizer, scheduler) self.optimizer = optimizer self.scheduler = scheduler self.parts_num = self.config.model.bpbreid.masks.parts_num self.mask_filtering_training = mask_filtering_training self.mask_filtering_testing = mask_filtering_testing self.dist_combine_strat = dist_combine_strat self.batch_size_pairwise_dist_matrix = batch_size_pairwise_dist_matrix self.losses_weights = self.config.loss.part_based.weights # Losses self.GiLt = GiLtLoss(self.losses_weights, use_visibility_scores=self.mask_filtering_training, triplet_margin=margin, loss_name=loss_name, writer=self.writer, use_gpu=self.use_gpu) self.body_part_attention_loss = BodyPartAttentionLoss(loss_type=self.config.loss.part_based.ppl, use_gpu=self.use_gpu) # Timers self.feature_extraction_timer = self.writer.feature_extraction_timer self.loss_timer = self.writer.loss_timer self.optimizer_timer = self.writer.optimizer_timer def forward_backward(self, data): imgs, target_masks, pids, imgs_path = self.parse_data_for_train(data) # feature extraction self.feature_extraction_timer.start() embeddings_dict, visibility_scores_dict, id_cls_scores_dict, pixels_cls_scores, spatial_features, masks \ = self.model(imgs, external_parts_masks=target_masks) display_feature_maps(embeddings_dict, spatial_features, masks[PARTS], imgs_path, pids) self.feature_extraction_timer.stop() # loss self.loss_timer.start() loss, loss_summary = self.combine_losses(visibility_scores_dict, embeddings_dict, id_cls_scores_dict, pids, pixels_cls_scores, target_masks, bpa_weight=self.losses_weights[PIXELS]['ce']) self.loss_timer.stop() # optimization step self.optimizer_timer.start() self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.optimizer_timer.stop() return loss, loss_summary def combine_losses(self, visibility_scores_dict, embeddings_dict, id_cls_scores_dict, pids, pixels_cls_scores=None, target_masks=None, bpa_weight=0): # 1. ReID objective: # GiLt loss on holistic and part-based embeddings loss, loss_summary = self.GiLt(embeddings_dict, visibility_scores_dict, id_cls_scores_dict, pids) # 2. Part prediction objective: # Body part attention loss on spatial feature map if pixels_cls_scores is not None\ and target_masks is not None\ and bpa_weight > 0: # resize external masks to fit feature map size target_masks = nn.functional.interpolate(target_masks, pixels_cls_scores.shape[2::], mode='bilinear', align_corners=True) # compute target part index for each spatial location, i.e. each spatial location (pixel) value indicate # the (body) part that spatial location belong to, or 0 for background. pixels_cls_score_targets = target_masks.argmax(dim=1) # [N, Hf, Wf] # compute the classification loss for each pixel bpa_loss, bpa_loss_summary = self.body_part_attention_loss(pixels_cls_scores, pixels_cls_score_targets) loss += bpa_weight * bpa_loss loss_summary = {**loss_summary, **bpa_loss_summary} return loss, loss_summary def _feature_extraction(self, data_loader): f_, pids_, camids_, parts_visibility_, p_masks_, pxl_scores_, anns = [], [], [], [], [], [], [] for batch_idx, data in enumerate(tqdm(data_loader, desc=f'Batches processed')): imgs, masks, pids, camids = self.parse_data_for_eval(data) if self.use_gpu: if masks is not None: masks = masks.cuda() imgs = imgs.cuda() self.writer.test_batch_timer.start() model_output = self.model(imgs, external_parts_masks=masks) features, visibility_scores, parts_masks, pixels_cls_scores = self.extract_test_embeddings(model_output) self.writer.test_batch_timer.stop() if self.mask_filtering_testing: parts_visibility = visibility_scores parts_visibility = parts_visibility.cpu() parts_visibility_.append(parts_visibility) else: parts_visibility_ = None features = features.data.cpu() parts_masks = parts_masks.data.cpu() f_.append(features) p_masks_.append(parts_masks) pxl_scores_.append(pixels_cls_scores) pids_.extend(pids) camids_.extend(camids) anns.append(data) if self.mask_filtering_testing: parts_visibility_ = torch.cat(parts_visibility_, 0) f_ = torch.cat(f_, 0) p_masks_ = torch.cat(p_masks_, 0) pxl_scores_ = torch.cat(pxl_scores_, 0) if pxl_scores_[0] is not None else None pids_ = np.asarray(pids_) camids_ = np.asarray(camids_) anns = collate(anns) return f_, pids_, camids_, parts_visibility_, p_masks_, pxl_scores_, anns @torch.no_grad() def _evaluate( self, epoch, dataset_name='', query_loader=None, gallery_loader=None, dist_metric='euclidean', normalize_feature=False, visrank=False, visrank_topk=10, visrank_q_idx_list=[], visrank_count=10, save_dir='', use_metric_cuhk03=False, ranks=[1, 5, 10, 20], rerank=False, save_features=False ): print('Extracting features from query set ...') qf, q_pids, q_camids, qf_parts_visibility, q_parts_masks, q_pxl_scores_, q_anns = self._feature_extraction(query_loader) print('Done, obtained {} tensor'.format(qf.shape)) print('Extracting features from gallery set ...') gf, g_pids, g_camids, gf_parts_visibility, g_parts_masks, g_pxl_scores_, g_anns = self._feature_extraction(gallery_loader) print('Done, obtained {} tensor'.format(gf.shape)) print('Test batch feature extraction speed: {:.4f} sec/batch'.format(self.writer.test_batch_timer.avg)) if save_features: features_dir = osp.join(save_dir, 'features') print('Saving features to : ' + features_dir) # TODO create if doesn't exist torch.save(gf, osp.join(features_dir, 'gallery_features_' + dataset_name + '.pt')) torch.save(qf, osp.join(features_dir, 'query_features_' + dataset_name + '.pt')) # save pids, camids and feature length self.writer.performance_evaluation_timer.start() if normalize_feature: print('Normalizing features with L2 norm ...') qf = self.normalize(qf) gf = self.normalize(gf) print('Computing distance matrix with metric={} ...'.format(dist_metric)) distmat, body_parts_distmat = compute_distance_matrix_using_bp_features(qf, gf, qf_parts_visibility, gf_parts_visibility, self.dist_combine_strat, self.batch_size_pairwise_dist_matrix, self.use_gpu, dist_metric) distmat = distmat.numpy() body_parts_distmat = body_parts_distmat.numpy() if rerank: print('Applying person re-ranking ...') distmat_qq, body_parts_distmat_qq = compute_distance_matrix_using_bp_features(qf, qf, qf_parts_visibility, qf_parts_visibility, self.dist_combine_strat, self.batch_size_pairwise_dist_matrix, self.use_gpu, dist_metric) distmat_gg, body_parts_distmat_gg = compute_distance_matrix_using_bp_features(gf, gf, gf_parts_visibility, gf_parts_visibility, self.dist_combine_strat, self.batch_size_pairwise_dist_matrix, self.use_gpu, dist_metric) distmat = re_ranking(distmat, distmat_qq, distmat_gg) eval_metric = self.datamanager.test_loader[dataset_name]['query'].dataset.eval_metric print('Computing CMC and mAP ...') eval_metrics = metrics.evaluate_rank( distmat, q_pids, g_pids, q_camids, g_camids, q_anns=q_anns, g_anns=g_anns, eval_metric=eval_metric ) mAP = eval_metrics['mAP'] cmc = eval_metrics['cmc'] print('** Results **') print('mAP: {:.2%}'.format(mAP)) print('CMC curve') for r in ranks: print('Rank-{:<3}: {:.2%}'.format(r, cmc[r - 1])) for metric in eval_metrics.keys(): if metric != 'mAP' and metric != 'cmc': val, size = eval_metrics[metric] if val is not None: print('{:<20}: {:.2%} ({})'.format(metric, val, size)) else: print('{:<20}: not provided'.format(metric)) # Parts ranking if self.detailed_ranking: self.display_individual_parts_ranking_performances(body_parts_distmat, cmc, g_camids, g_pids, mAP, q_camids, q_pids, eval_metric) # TODO move below to writer plot_body_parts_pairs_distance_distribution(body_parts_distmat, q_pids, g_pids, "Query-gallery") print('Evaluate distribution of distances of pairs with same id vs different ids') same_ids_dist_mean, same_ids_dist_std, different_ids_dist_mean, different_ids_dist_std, ssmd = \ plot_pairs_distance_distribution(distmat, q_pids, g_pids, "Query-gallery") # TODO separate ssmd from plot, put plot in writer print("Positive pairs distance distribution mean: {:.3f}".format(same_ids_dist_mean)) print("Positive pairs distance distribution standard deviation: {:.3f}".format(same_ids_dist_std)) print("Negative pairs distance distribution mean: {:.3f}".format(different_ids_dist_mean)) print("Negative pairs distance distribution standard deviation: {:.3f}".format( different_ids_dist_std)) print("SSMD = {:.4f}".format(ssmd)) # if groundtruth target body masks are provided, compute part prediction accuracy avg_pxl_pred_accuracy = 0.0 if 'mask' in q_anns and 'mask' in g_anns and q_pxl_scores_ is not None and g_pxl_scores_ is not None: q_pxl_pred_accuracy = self.compute_pixels_cls_accuracy(torch.from_numpy(q_anns['mask']), q_pxl_scores_) g_pxl_pred_accuracy = self.compute_pixels_cls_accuracy(torch.from_numpy(g_anns['mask']), g_pxl_scores_) avg_pxl_pred_accuracy = (q_pxl_pred_accuracy * len(q_parts_masks) + g_pxl_pred_accuracy * len(g_parts_masks)) /\ (len(q_parts_masks) + len(g_parts_masks)) print("Pixel prediction accuracy for query = {:.2f}% and for gallery = {:.2f}% and on average = {:.2f}%" .format(q_pxl_pred_accuracy, g_pxl_pred_accuracy, avg_pxl_pred_accuracy)) if visrank: self.writer.visualize_rank(self.datamanager.test_loader[dataset_name], dataset_name, distmat, save_dir, visrank_topk, visrank_q_idx_list, visrank_count, body_parts_distmat, qf_parts_visibility, gf_parts_visibility, q_parts_masks, g_parts_masks, mAP, cmc[0]) self.writer.visualize_embeddings(qf, gf, q_pids, g_pids, self.datamanager.test_loader[dataset_name], dataset_name, qf_parts_visibility, gf_parts_visibility, mAP, cmc[0]) self.writer.performance_evaluation_timer.stop() return cmc, mAP, ssmd, avg_pxl_pred_accuracy def compute_pixels_cls_accuracy(self, target_masks, pixels_cls_scores): if pixels_cls_scores.is_cuda: target_masks = target_masks.cuda() target_masks = nn.functional.interpolate(target_masks, pixels_cls_scores.shape[2::], mode='bilinear', align_corners=True) # Best perf with bilinear here and nearest in resize transform pixels_cls_score_targets = target_masks.argmax(dim=1) # [N, Hf, Wf] pixels_cls_score_targets = pixels_cls_score_targets.flatten() # [N*Hf*Wf] pixels_cls_scores = pixels_cls_scores.permute(0, 2, 3, 1).flatten(0, 2) # [N*Hf*Wf, M] accuracy = metrics.accuracy(pixels_cls_scores, pixels_cls_score_targets)[0] return accuracy.item() def display_individual_parts_ranking_performances(self, body_parts_distmat, cmc, g_camids, g_pids, mAP, q_camids, q_pids, eval_metric): print('Parts embeddings individual rankings :') bp_offset = 0 if GLOBAL in self.config.model.bpbreid.test_embeddings: bp_offset += 1 if FOREGROUND in self.config.model.bpbreid.test_embeddings: bp_offset += 1 table = [] for bp in range(0, body_parts_distmat.shape[0]): # TODO DO NOT TAKE INTO ACCOUNT -1 DISTANCES!!!! perf_metrics = metrics.evaluate_rank( body_parts_distmat[bp], q_pids, g_pids, q_camids, g_camids, eval_metric=eval_metric ) title = 'p {}'.format(bp - bp_offset) if bp < bp_offset: if bp == 0: if GLOBAL in self.config.model.bpbreid.test_embeddings: title = GLOBAL else: title = FOREGROUND if bp == 1: title = FOREGROUND mAP = perf_metrics['mAP'] cmc = perf_metrics['cmc'] table.append([title, mAP, cmc[0], cmc[4], cmc[9]]) headers = ["embed", "mAP", "R-1", "R-5", "R-10"] print(tabulate(table, headers, tablefmt="fancy_grid", floatfmt=".3f")) def parse_data_for_train(self, data): imgs = data['image'] imgs_path = data['img_path'] masks = data['mask'] if 'mask' in data else None pids = data['pid'] if self.use_gpu: imgs = imgs.cuda() if masks is not None: masks = masks.cuda() pids = pids.cuda() if masks is not None: assert masks.shape[1] == (self.config.model.bpbreid.masks.parts_num + 1) return imgs, masks, pids, imgs_path def parse_data_for_eval(self, data): imgs = data['image'] masks = data['mask'] if 'mask' in data else None pids = data['pid'] camids = data['camid'] return imgs, masks, pids, camids def extract_test_embeddings(self, model_output): embeddings, visibility_scores, id_cls_scores, pixels_cls_scores, spatial_features, parts_masks = model_output embeddings_list = [] visibility_scores_list = [] embeddings_masks_list = [] for test_emb in self.config.model.bpbreid.test_embeddings: embds = embeddings[test_emb] embeddings_list.append(embds if len(embds.shape) == 3 else embds.unsqueeze(1)) if test_emb in bn_correspondants: test_emb = bn_correspondants[test_emb] vis_scores = visibility_scores[test_emb] visibility_scores_list.append(vis_scores if len(vis_scores.shape) == 2 else vis_scores.unsqueeze(1)) pt_masks = parts_masks[test_emb] embeddings_masks_list.append(pt_masks if len(pt_masks.shape) == 4 else pt_masks.unsqueeze(1)) assert len(embeddings) != 0 embeddings = torch.cat(embeddings_list, dim=1) # [N, P+2, D] visibility_scores = torch.cat(visibility_scores_list, dim=1) # [N, P+2] embeddings_masks = torch.cat(embeddings_masks_list, dim=1) # [N, P+2, Hf, Wf] return embeddings, visibility_scores, embeddings_masks, pixels_cls_scores ================================================ FILE: torchreid/engine/image/softmax.py ================================================ from __future__ import division, print_function, absolute_import from torchreid import metrics from torchreid.losses import CrossEntropyLoss from ..engine import Engine class ImageSoftmaxEngine(Engine): r"""Softmax-loss engine for image-reid. Args: datamanager (DataManager): an instance of ``torchreid.data.ImageDataManager`` or ``torchreid.data.VideoDataManager``. model (nn.Module): model instance. optimizer (Optimizer): an Optimizer. scheduler (LRScheduler, optional): if None, no learning rate decay will be performed. use_gpu (bool, optional): use gpu. Default is True. label_smooth (bool, optional): use label smoothing regularizer. Default is True. Examples:: import torchreid datamanager = torchreid.data.ImageDataManager( root='path/to/reid-data', sources='market1501', height=256, width=128, combineall=False, batch_size=32 ) model = torchreid.models.build_model( name='resnet50', num_classes=datamanager.num_train_pids, loss='softmax' ) model = model.cuda() optimizer = torchreid.optim.build_optimizer( model, optim='adam', lr=0.0003 ) scheduler = torchreid.optim.build_lr_scheduler( optimizer, lr_scheduler='single_step', stepsize=20 ) engine = torchreid.engine.ImageSoftmaxEngine( datamanager, model, optimizer, scheduler=scheduler ) engine.run( max_epoch=60, save_dir='log/resnet50-softmax-market1501' ) """ def __init__( self, datamanager, model, optimizer, writer, engine_state, scheduler=None, use_gpu=True, label_smooth=True, save_model_flag=False ): super(ImageSoftmaxEngine, self).__init__(datamanager, writer, engine_state, use_gpu, save_model_flag) self.model = model self.optimizer = optimizer self.scheduler = scheduler self.register_model('model', model, optimizer, scheduler) self.criterion = CrossEntropyLoss( label_smooth=label_smooth ) def forward_backward(self, data): imgs, pids = self.parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() outputs = self.model(imgs) loss = self.compute_loss(self.criterion, outputs, pids) self.optimizer.zero_grad() loss.backward() self.optimizer.step() loss_summary = { 'loss': loss.item(), 'acc': metrics.accuracy(outputs, pids)[0].item() } return loss, {'glb_ft': loss_summary} ================================================ FILE: torchreid/engine/image/triplet.py ================================================ from __future__ import division, print_function, absolute_import from torchreid import metrics from torchreid.losses import TripletLoss, CrossEntropyLoss from ..engine import Engine class ImageTripletEngine(Engine): r"""Triplet-loss engine for image-reid. Args: datamanager (DataManager): an instance of ``torchreid.data.ImageDataManager`` or ``torchreid.data.VideoDataManager``. model (nn.Module): model instance. optimizer (Optimizer): an Optimizer. margin (float, optional): margin for triplet loss. Default is 0.3. weight_t (float, optional): weight for triplet loss. Default is 1. weight_x (float, optional): weight for softmax loss. Default is 1. scheduler (LRScheduler, optional): if None, no learning rate decay will be performed. use_gpu (bool, optional): use gpu. Default is True. label_smooth (bool, optional): use label smoothing regularizer. Default is True. Examples:: import torchreid datamanager = torchreid.data.ImageDataManager( root='path/to/reid-data', sources='market1501', height=256, width=128, combineall=False, batch_size=32, num_instances=4, train_sampler='RandomIdentitySampler' # this is important ) model = torchreid.models.build_model( name='resnet50', num_classes=datamanager.num_train_pids, loss='triplet' ) model = model.cuda() optimizer = torchreid.optim.build_optimizer( model, optim='adam', lr=0.0003 ) scheduler = torchreid.optim.build_lr_scheduler( optimizer, lr_scheduler='single_step', stepsize=20 ) engine = torchreid.engine.ImageTripletEngine( datamanager, model, optimizer, margin=0.3, weight_t=0.7, weight_x=1, scheduler=scheduler ) engine.run( max_epoch=60, save_dir='log/resnet50-triplet-market1501' ) """ def __init__( self, datamanager, model, optimizer, writer, engine_state, margin=0.3, weight_t=1, weight_x=1, scheduler=None, use_gpu=True, label_smooth=True, save_model_flag=False ): super(ImageTripletEngine, self).__init__(datamanager, writer, engine_state, use_gpu, save_model_flag) self.model = model self.optimizer = optimizer self.scheduler = scheduler self.register_model('model', model, optimizer, scheduler) self.weight_t = weight_t self.weight_x = weight_x self.criterion_t = TripletLoss(margin=margin) self.criterion_x = CrossEntropyLoss( label_smooth=label_smooth ) def forward_backward(self, data): imgs, pids = self.parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() outputs, features = self.model(imgs) loss_t = self.compute_loss(self.criterion_t, features, pids) loss_x = self.compute_loss(self.criterion_x, outputs, pids) loss = self.weight_t * loss_t + self.weight_x * loss_x self.optimizer.zero_grad() loss.backward() self.optimizer.step() loss_summary = { 'loss_t': loss_t.item(), 'loss_x': loss_x.item(), 'acc': metrics.accuracy(outputs, pids)[0].item() } return loss, {'glb_ft': loss_summary} ================================================ FILE: torchreid/engine/video/__init__.py ================================================ from __future__ import absolute_import from .softmax import VideoSoftmaxEngine from .triplet import VideoTripletEngine ================================================ FILE: torchreid/engine/video/softmax.py ================================================ from __future__ import division, print_function, absolute_import import torch from torchreid.engine.image import ImageSoftmaxEngine class VideoSoftmaxEngine(ImageSoftmaxEngine): """Softmax-loss engine for video-reid. Args: datamanager (DataManager): an instance of ``torchreid.data.ImageDataManager`` or ``torchreid.data.VideoDataManager``. model (nn.Module): model instance. optimizer (Optimizer): an Optimizer. scheduler (LRScheduler, optional): if None, no learning rate decay will be performed. use_gpu (bool, optional): use gpu. Default is True. label_smooth (bool, optional): use label smoothing regularizer. Default is True. pooling_method (str, optional): how to pool features for a tracklet. Default is "avg" (average). Choices are ["avg", "max"]. Examples:: import torch import torchreid # Each batch contains batch_size*seq_len images datamanager = torchreid.data.VideoDataManager( root='path/to/reid-data', sources='mars', height=256, width=128, combineall=False, batch_size=8, # number of tracklets seq_len=15 # number of images in each tracklet ) model = torchreid.models.build_model( name='resnet50', num_classes=datamanager.num_train_pids, loss='softmax' ) model = model.cuda() optimizer = torchreid.optim.build_optimizer( model, optim='adam', lr=0.0003 ) scheduler = torchreid.optim.build_lr_scheduler( optimizer, lr_scheduler='single_step', stepsize=20 ) engine = torchreid.engine.VideoSoftmaxEngine( datamanager, model, optimizer, scheduler=scheduler, pooling_method='avg' ) engine.run( max_epoch=60, save_dir='log/resnet50-softmax-mars' ) """ def __init__( self, datamanager, model, optimizer, writer, scheduler=None, use_gpu=True, label_smooth=True, pooling_method='avg', save_model_flag=False ): super(VideoSoftmaxEngine, self).__init__( datamanager, model, optimizer, writer, scheduler=scheduler, use_gpu=use_gpu, label_smooth=label_smooth, save_model_flag=save_model_flag ) self.pooling_method = pooling_method def parse_data_for_train(self, data): imgs = data[0] pids = data[1] if imgs.dim() == 5: # b: batch size # s: sequence length # c: channel depth # h: height # w: width b, s, c, h, w = imgs.size() imgs = imgs.view(b * s, c, h, w) pids = pids.view(b, 1).expand(b, s) pids = pids.contiguous().view(b * s) return imgs, pids def extract_features(self, input): # b: batch size # s: sequence length # c: channel depth # h: height # w: width b, s, c, h, w = input.size() input = input.view(b * s, c, h, w) features = self.model(input) features = features.view(b, s, -1) if self.pooling_method == 'avg': features = torch.mean(features, 1) else: features = torch.max(features, 1)[0] return features ================================================ FILE: torchreid/engine/video/triplet.py ================================================ from __future__ import division, print_function, absolute_import from torchreid.engine.image import ImageTripletEngine from torchreid.engine.video import VideoSoftmaxEngine class VideoTripletEngine(ImageTripletEngine, VideoSoftmaxEngine): """Triplet-loss engine for video-reid. Args: datamanager (DataManager): an instance of ``torchreid.data.ImageDataManager`` or ``torchreid.data.VideoDataManager``. model (nn.Module): model instance. optimizer (Optimizer): an Optimizer. margin (float, optional): margin for triplet loss. Default is 0.3. weight_t (float, optional): weight for triplet loss. Default is 1. weight_x (float, optional): weight for softmax loss. Default is 1. scheduler (LRScheduler, optional): if None, no learning rate decay will be performed. use_gpu (bool, optional): use gpu. Default is True. label_smooth (bool, optional): use label smoothing regularizer. Default is True. pooling_method (str, optional): how to pool features for a tracklet. Default is "avg" (average). Choices are ["avg", "max"]. Examples:: import torch import torchreid # Each batch contains batch_size*seq_len images # Each identity is sampled with num_instances tracklets datamanager = torchreid.data.VideoDataManager( root='path/to/reid-data', sources='mars', height=256, width=128, combineall=False, num_instances=4, train_sampler='RandomIdentitySampler' batch_size=8, # number of tracklets seq_len=15 # number of images in each tracklet ) model = torchreid.models.build_model( name='resnet50', num_classes=datamanager.num_train_pids, loss='triplet' ) model = model.cuda() optimizer = torchreid.optim.build_optimizer( model, optim='adam', lr=0.0003 ) scheduler = torchreid.optim.build_lr_scheduler( optimizer, lr_scheduler='single_step', stepsize=20 ) engine = torchreid.engine.VideoTripletEngine( datamanager, model, optimizer, margin=0.3, weight_t=0.7, weight_x=1, scheduler=scheduler, pooling_method='avg' ) engine.run( max_epoch=60, save_dir='log/resnet50-triplet-mars' ) """ def __init__( self, datamanager, model, optimizer, writer, margin=0.3, weight_t=1, weight_x=1, scheduler=None, use_gpu=True, label_smooth=True, pooling_method='avg', save_model_flag=False ): super(VideoTripletEngine, self).__init__( datamanager, model, optimizer, writer, margin=margin, weight_t=weight_t, weight_x=weight_x, scheduler=scheduler, use_gpu=use_gpu, label_smooth=label_smooth, save_model_flag=save_model_flag ) self.pooling_method = pooling_method ================================================ FILE: torchreid/hyperparameter/custom_hyperparameter_optimizer.py ================================================ import os import sys import time import os.path as osp import argparse import torch import torch.nn as nn import torchreid from torchreid.utils import ( Logger, check_isfile, set_random_seed, collect_env_info, resume_from_checkpoint, load_pretrained_weights, compute_model_complexity, Writer ) from scripts.default_config import ( imagedata_kwargs, optimizer_kwargs, videodata_kwargs, engine_run_kwargs, get_default_config, lr_scheduler_kwargs ) def build_datamanager(cfg): if cfg.data.type == 'image': return torchreid.data.ImageDataManager(**imagedata_kwargs(cfg)) else: return torchreid.data.VideoDataManager(**videodata_kwargs(cfg)) def build_engine(cfg, datamanager, model, optimizer, scheduler, writer): if cfg.data.type == 'image': if cfg.loss.name == 'softmax': engine = torchreid.engine.ImageSoftmaxEngine( datamanager, model, optimizer=optimizer, scheduler=scheduler, use_gpu=cfg.use_gpu, label_smooth=cfg.loss.softmax.label_smooth, save_model_flag=cfg.model.save_model_flag, writer=writer ) elif cfg.loss.name == 'triplet': engine = torchreid.engine.ImageTripletEngine( datamanager, model, optimizer=optimizer, margin=cfg.loss.triplet.margin, weight_t=cfg.loss.triplet.weight_t, weight_x=cfg.loss.triplet.weight_x, scheduler=scheduler, use_gpu=cfg.use_gpu, label_smooth=cfg.loss.softmax.label_smooth, save_model_flag=cfg.model.save_model_flag, writer=writer ) elif cfg.loss.name == 'part_based': engine = torchreid.engine.ImagePartBasedEngine( datamanager, model, optimizer=optimizer, loss_name=cfg.loss.part_based.name, config=cfg, margin=cfg.loss.triplet.margin, scheduler=scheduler, use_gpu=cfg.use_gpu, save_model_flag=cfg.model.save_model_flag, writer=writer, mask_filtering_training=cfg.model.bpbreid.mask_filtering_training, mask_filtering_testing=cfg.model.bpbreid.mask_filtering_testing, mask_filtering_threshold=cfg.model.bpbreid.mask_filtering_threshold, batch_debug_freq=cfg.train.batch_debug_freq, batch_size_pairwise_dist_matrix=cfg.test.batch_size_pairwise_dist_matrix ) else: if cfg.loss.name == 'softmax': engine = torchreid.engine.VideoSoftmaxEngine( datamanager, model, optimizer=optimizer, scheduler=scheduler, use_gpu=cfg.use_gpu, label_smooth=cfg.loss.softmax.label_smooth, pooling_method=cfg.video.pooling_method, save_model_flag=cfg.model.save_model_flag, writer=writer ) else: engine = torchreid.engine.VideoTripletEngine( datamanager, model, optimizer=optimizer, margin=cfg.loss.triplet.margin, weight_t=cfg.loss.triplet.weight_t, weight_x=cfg.loss.triplet.weight_x, scheduler=scheduler, use_gpu=cfg.use_gpu, label_smooth=cfg.loss.softmax.label_smooth, save_model_flag=cfg.model.save_model_flag, writer=writer ) return engine def reset_config(cfg, args): if args.root: cfg.data.root = args.root if args.sources: cfg.data.sources = args.sources if args.targets: cfg.data.targets = args.targets if args.transforms: cfg.data.transforms = args.transforms def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument( '--config-file', type=str, default='', help='path to config file' ) parser.add_argument( '-s', '--sources', type=str, nargs='+', help='source datasets (delimited by space)' ) parser.add_argument( '-t', '--targets', type=str, nargs='+', help='target datasets (delimited by space)' ) parser.add_argument( '--transforms', type=str, nargs='+', help='data augmentation' ) parser.add_argument( '--root', type=str, default='', help='path to data root' ) parser.add_argument( 'opts', default=None, nargs=argparse.REMAINDER, help='Modify config options using the command-line' ) args = parser.parse_args() cfg = get_default_config() cfg.use_gpu = torch.cuda.is_available() if args.config_file: cfg.merge_from_file(args.config_file) cfg.project.config_file = os.path.basename(args.config_file) reset_config(cfg, args) cfg.merge_from_list(args.opts) params = [] for pooling in cfg.tuning.model.bpbreid.pooling: for normalization in cfg.tuning.model.bpbreid.normalization: for mask_filtering_training in cfg.tuning.model.bpbreid.mask_filtering_training: for mask_filtering_testing in cfg.tuning.model.bpbreid.mask_filtering_testing: for mask_filtering_threshold in cfg.tuning.model.bpbreid.mask_filtering_threshold: for transforms in cfg.tuning.data.transforms: for pose_name in cfg.tuning.loss.part_based.name: params.append([pooling, normalization, mask_filtering_training, mask_filtering_testing, mask_filtering_threshold, transforms, pose_name]) print("Will optimize #{} params combination".format(len(params))) for count, param in enumerate(params): cfg.model.bpbreid.pooling = param[0] cfg.model.bpbreid.normalization = param[1] cfg.model.bpbreid.mask_filtering_training = param[2] cfg.model.bpbreid.mask_filtering_testing = param[3] cfg.model.bpbreid.mask_filtering_threshold = param[4] cfg.data.transforms = param[5] cfg.loss.part_based.name = param[6] if cfg.project.debug_mode: torch.autograd.set_detect_anomaly(True) writer = Writer(cfg) print("Hyper param tuning {}/{}".format(count+1, len(param))) print("Hyper param = {}".format(params)) set_random_seed(cfg.train.seed) log_name = 'test_log' if cfg.test.evaluate else 'train_log' log_name += time.strftime('-%Y-%m-%d-%H-%M-%S') log_name += '.txt' sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name)) print('Show configuration\n{}\n'.format(cfg)) print('Collecting env info ...') print('** System info **\n{}\n'.format(collect_env_info())) if cfg.use_gpu: torch.backends.cudnn.benchmark = True datamanager = build_datamanager(cfg) print('Building model: {}'.format(cfg.model.name)) model = torchreid.models.build_model( name=cfg.model.name, num_classes=datamanager.num_train_pids, loss=cfg.loss.name, pretrained=cfg.model.pretrained, use_gpu=cfg.use_gpu, pooling=cfg.model.bpbreid.pooling, normalization=cfg.model.bpbreid.normalization ) num_params, flops = compute_model_complexity( model, (1, 3, cfg.data.height, cfg.data.width) ) print('Model complexity: params={:,} flops={:,}'.format(num_params, flops)) if cfg.model.load_weights and check_isfile(cfg.model.load_weights): load_pretrained_weights(model, cfg.model.load_weights) if cfg.use_gpu: model = nn.DataParallel(model).cuda() optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(cfg)) scheduler = torchreid.optim.build_lr_scheduler( optimizer, **lr_scheduler_kwargs(cfg) ) if cfg.model.resume and check_isfile(cfg.model.resume): cfg.train.start_epoch = resume_from_checkpoint( cfg.model.resume, model, optimizer=optimizer, scheduler=scheduler ) print( 'Building {}-engine for {}-reid'.format(cfg.loss.name, cfg.data.type) ) engine = build_engine(cfg, datamanager, model, optimizer, scheduler, writer) engine.run(**engine_run_kwargs(cfg)) if __name__ == '__main__': main() ================================================ FILE: torchreid/hyperparameter/hyperparameter_optimizer.py ================================================ import logging from clearml import Task from clearml.automation import (HyperParameterOptimizer, GridSearch, UniformIntegerParameterRange, DiscreteParameterRange, ParameterSet) def job_complete_callback( job_id, # type: str objective_value, # type: float objective_iteration, # type: int job_parameters, # type: dict top_performance_job_id # type: str ): print('Job completed!', job_id, objective_value, objective_iteration, job_parameters) if job_id == top_performance_job_id: print('WOOT WOOT we broke the record! Objective reached {}'.format(objective_value)) # Connecting clearml task = Task.init(project_name='Hyper-Parameter Optimization', task_name='Automatic Hyper-Parameter Optimization', task_type=Task.TaskTypes.optimizer, reuse_last_task_id=False) # experiment template to optimize in the hyper-parameter optimization args = { 'template_task_id': None, 'run_as_service': False, } args = task.connect(args) # Get the template task experiment that we want to optimize if not args['template_task_id']: args['template_task_id'] = '71379629c09449b8b3160d881c58657a' # Example use case: an_optimizer = HyperParameterOptimizer( # This is the experiment we want to optimize base_task_id=args['template_task_id'], # here we define the hyper-parameters to optimize hyper_parameters=[ DiscreteParameterRange('General/loss/part_based/name', ['inter_parts_triplet_loss', 'part_max_triplet_loss', 'part_averaged_triplet_loss', 'intra_parts_triplet_loss']), DiscreteParameterRange('General/train/batch_size', values=[32, 128]) ], # this is the objective metric we want to maximize/minimize objective_metric_title='Test/rank1', objective_metric_series='Test/rank1', # now we decide if we want to maximize it or minimize it (accuracy we maximize) objective_metric_sign='max', # let us limit the number of concurrent experiments, # this in turn will make sure we do dont bombard the scheduler with experiments. # if we have an auto-scaler connected, this, by proxy, will limit the number of machine max_number_of_concurrent_tasks=1, # this is the optimizer class (actually doing the optimization) # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band) # more are coming soon... optimizer_class=GridSearch, # Select an execution queue to schedule the experiments for execution execution_queue='default', # Optional: Limit the execution time of a single experiment, in minutes. # (this is optional, and if using OptimizerBOHB, it is ignored) time_limit_per_job=None, # Check the experiments every 12 seconds is way too often, we should probably set it to 5 min, # assuming a single experiment is usually hours... pool_period_min=5, # set the maximum number of jobs to launch for the optimization, default (None) unlimited # If OptimizerBOHB is used, it defined the maximum budget in terms of full jobs # basically the cumulative number of iterations will not exceed total_max_jobs * max_iteration_per_job total_max_jobs=10, # set the minimum number of iterations for an experiment, before early stopping. # Does not apply for simple strategies such as RandomSearch or GridSearch min_iteration_per_job=10, # Set the maximum number of iterations for an experiment to execute # (This is optional, unless using OptimizerBOHB where this is a must) max_iteration_per_job=None, ) # if we are running as a service, just enqueue ourselves into the services queue and let it run the optimization if args['run_as_service']: # if this code is executed by `clearml-agent` the function call does nothing. # if executed locally, the local process will be terminated, and a remote copy will be executed instead task.execute_remotely(queue_name='services', exit_process=True) # report every 12 seconds, this is way too often, but we are testing here J an_optimizer.set_report_period(5) # start the optimization process, callback function to be called every time an experiment is completed # this function returns immediately an_optimizer.start(job_complete_callback=job_complete_callback) # set the time limit for the optimization process (2 hours) an_optimizer.set_time_limit(in_minutes=60.0 * 24) # wait until process is done (notice we are controlling the optimization process in the background) an_optimizer.wait() # optimization is completed, print the top performing experiments id top_exp = an_optimizer.get_top_experiments(top_k=3) print([t.id for t in top_exp]) # make sure background optimization stopped an_optimizer.stop() print('We are done, good bye') ================================================ FILE: torchreid/hyperparameter/optuna_hyperparameter_optimizer.py ================================================ # import os # import sys # import time # import os.path as osp # import argparse # # import cv2 # import optuna # import torch # import torch.nn as nn # from optuna import Trial # from optuna.samplers import GridSampler # # import torchreid # from torchreid.utils import ( # Logger, check_isfile, set_random_seed, collect_env_info, # resume_from_checkpoint, load_pretrained_weights, compute_model_complexity, Writer # ) # # from scripts.default_config import ( # imagedata_kwargs, optimizer_kwargs, videodata_kwargs, engine_run_kwargs, # get_default_config, lr_scheduler_kwargs # ) # # # def build_datamanager(cfg): # if cfg.data.type == 'image': # return torchreid.data.ImageDataManager(**imagedata_kwargs(cfg)) # else: # return torchreid.data.VideoDataManager(**videodata_kwargs(cfg)) # # # def build_engine(cfg, datamanager, model, optimizer, scheduler, writer): # if cfg.data.type == 'image': # if cfg.loss.name == 'softmax': # engine = torchreid.engine.ImageSoftmaxEngine( # datamanager, # model, # optimizer=optimizer, # scheduler=scheduler, # use_gpu=cfg.use_gpu, # label_smooth=cfg.loss.softmax.label_smooth, # save_model_flag=cfg.model.save_model_flag, # writer=writer # ) # # elif cfg.loss.name == 'triplet': # engine = torchreid.engine.ImageTripletEngine( # datamanager, # model, # optimizer=optimizer, # margin=cfg.loss.triplet.margin, # weight_t=cfg.loss.triplet.weight_t, # weight_x=cfg.loss.triplet.weight_x, # scheduler=scheduler, # use_gpu=cfg.use_gpu, # label_smooth=cfg.loss.softmax.label_smooth, # save_model_flag=cfg.model.save_model_flag, # writer=writer # ) # # elif cfg.loss.name == 'part_based': # engine = torchreid.engine.ImagePartBasedEngine( # datamanager, # model, # optimizer=optimizer, # loss_name=cfg.loss.part_based.name, # config=cfg, # margin=cfg.loss.triplet.margin, # scheduler=scheduler, # use_gpu=cfg.use_gpu, # save_model_flag=cfg.model.save_model_flag, # writer=writer, # mask_filtering_training=cfg.model.bpbreid.mask_filtering_training, # mask_filtering_testing=cfg.model.bpbreid.mask_filtering_testing, # mask_filtering_threshold=cfg.model.bpbreid.mask_filtering_threshold, # batch_debug_freq=cfg.train.batch_debug_freq, # batch_size_pairwise_dist_matrix=cfg.test.batch_size_pairwise_dist_matrix # ) # # else: # if cfg.loss.name == 'softmax': # engine = torchreid.engine.VideoSoftmaxEngine( # datamanager, # model, # optimizer=optimizer, # scheduler=scheduler, # use_gpu=cfg.use_gpu, # label_smooth=cfg.loss.softmax.label_smooth, # pooling_method=cfg.video.pooling_method, # save_model_flag=cfg.model.save_model_flag, # writer=writer # ) # # else: # engine = torchreid.engine.VideoTripletEngine( # datamanager, # model, # optimizer=optimizer, # margin=cfg.loss.triplet.margin, # weight_t=cfg.loss.triplet.weight_t, # weight_x=cfg.loss.triplet.weight_x, # scheduler=scheduler, # use_gpu=cfg.use_gpu, # label_smooth=cfg.loss.softmax.label_smooth, # save_model_flag=cfg.model.save_model_flag, # writer=writer # ) # # return engine # # # def reset_config(cfg, args): # if args.root: # cfg.data.root = args.root # if args.sources: # cfg.data.sources = args.sources # if args.targets: # cfg.data.targets = args.targets # if args.transforms: # cfg.data.transforms = args.transforms # # # def merge_optuna_hyperparams(cfg, trial): # # cfg.data.sources # # cfg.data.targets # # cfg.sampler.num_instances # # cfg.train.optim # # cfg.train.lr # # cfg.train.weight_decay # # cfg.train.max_epoch # # cfg.train.start_epoch # # cfg.train.batch_size # # cfg.train.fixbase_epoch # # cfg.train.open_layers # # cfg.train.staged_lr # # cfg.train.new_layers # # cfg.train.base_lr_mult # # cfg.train.lr_scheduler # # cfg.train.stepsize # # cfg.train.gamma # # cfg.train.seed # # cfg.train.eval_freq # # cfg.train.batch_debug_freq # # cfg.sgd.momentum # # cfg.loss.triplet.margin # # cfg.sgd.dampening # # cfg.sgd.nesterov # # cfg.rmsprop.alpha # # cfg.adam.beta1 # # cfg.adam.beta2 # # # Categorical parameter # # optimizer = trial.suggest_categorical('optimizer', ['MomentumSGD', 'Adam']) # # # Int parameter # # num_layers = trial.suggest_int('num_layers', 1, 3) # # # Uniform parameter # # dropout_rate = trial.suggest_uniform('dropout_rate', 0.0, 1.0) # # # Loguniform parameter # # learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2) # # # Discrete-uniform parameter # # drop_path_rate = trial.suggest_discrete_uniform('drop_path_rate', 0.0, 1.0, 0.1) # # if len(cfg.tuning.model.bpbreid.pooling) != 0: # cfg.model.bpbreid.pooling = trial.suggest_categorical("model.bpbreid.pooling", cfg.tuning.model.bpbreid.pooling) # if len(cfg.tuning.model.bpbreid.normalization) != 0: # cfg.model.bpbreid.normalization = trial.suggest_categorical("model.bpbreid.normalization", cfg.tuning.model.bpbreid.normalization) # if len(cfg.tuning.model.bpbreid.mask_filtering_training) != 0: # cfg.model.bpbreid.mask_filtering_training = trial.suggest_categorical("model.bpbreid.mask_filtering_training", cfg.tuning.model.bpbreid.mask_filtering_training) # if len(cfg.tuning.model.bpbreid.mask_filtering_testing) != 0: # cfg.model.bpbreid.mask_filtering_testing = trial.suggest_categorical("model.bpbreid.mask_filtering_testing", cfg.tuning.model.bpbreid.mask_filtering_testing) # if len(cfg.tuning.model.bpbreid.mask_filtering_threshold) != 0: # cfg.model.bpbreid.mask_filtering_threshold = trial.suggest_categorical("model.bpbreid.mask_filtering_threshold", cfg.tuning.model.bpbreid.mask_filtering_threshold) # if len(cfg.tuning.loss.part_based.name) != 0: # cfg.loss.part_based.name = trial.suggest_categorical("loss.part_based.name", cfg.tuning.loss.part_based.name) # # # def main(): # parser = argparse.ArgumentParser( # formatter_class=argparse.ArgumentDefaultsHelpFormatter # ) # parser.add_argument( # '--config-file', type=str, default='', help='path to config file' # ) # parser.add_argument( # '-s', # '--sources', # type=str, # nargs='+', # help='source datasets (delimited by space)' # ) # parser.add_argument( # '-t', # '--targets', # type=str, # nargs='+', # help='target datasets (delimited by space)' # ) # parser.add_argument( # '--transforms', type=str, nargs='+', help='data augmentation' # ) # parser.add_argument( # '--root', type=str, default='', help='path to data root' # ) # parser.add_argument( # 'opts', # default=None, # nargs=argparse.REMAINDER, # help='Modify config options using the command-line' # ) # args = parser.parse_args() # # cfg = get_default_config() # cfg.use_gpu = torch.cuda.is_available() # if args.config_file: # cfg.merge_from_file(args.config_file) # cfg.project.config_file = os.path.basename(args.config_file) # reset_config(cfg, args) # cfg.merge_from_list(args.opts) # # # Create objective function with access to config from main context # def objective(trial: Trial): # # import trial hyper parameters into corresponding config field # merge_optuna_hyperparams(cfg, trial) # # if cfg.project.debug_mode: # torch.autograd.set_detect_anomaly(True) # writer = Writer(cfg) # set_random_seed(cfg.train.seed) # log_name = 'test_log' if cfg.test.evaluate else 'train_log' # log_name += time.strftime('-%Y-%m-%d-%H-%M-%S') # log_name += '.txt' # sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name)) # print('Show configuration\n{}\n'.format(cfg)) # print('Collecting env info ...') # print('** System info **\n{}\n'.format(collect_env_info())) # if cfg.use_gpu: # torch.backends.cudnn.benchmark = True # datamanager = build_datamanager(cfg) # print('Building model: {}'.format(cfg.model.name)) # model = torchreid.models.build_model( # name=cfg.model.name, # num_classes=datamanager.num_train_pids, # loss=cfg.loss.name, # pretrained=cfg.model.pretrained, # use_gpu=cfg.use_gpu, # pooling=cfg.model.bpbreid.pooling, # normalization=cfg.model.bpbreid.normalization # ) # num_params, flops = compute_model_complexity( # model, (1, 3, cfg.data.height, cfg.data.width) # ) # print('Model complexity: params={:,} flops={:,}'.format(num_params, flops)) # if cfg.model.load_weights and check_isfile(cfg.model.load_weights): # load_pretrained_weights(model, cfg.model.load_weights) # if cfg.use_gpu: # model = nn.DataParallel(model).cuda() # optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(cfg)) # scheduler = torchreid.optim.build_lr_scheduler( # optimizer, **lr_scheduler_kwargs(cfg) # ) # if cfg.model.resume and check_isfile(cfg.model.resume): # cfg.train.start_epoch = resume_from_checkpoint( # cfg.model.resume, model, optimizer=optimizer, scheduler=scheduler # ) # print( # 'Building {}-engine for {}-reid'.format(cfg.loss.name, cfg.data.type) # ) # engine = build_engine(cfg, datamanager, model, optimizer, scheduler, writer) # mAP = engine.run(**engine_run_kwargs(cfg)) # return mAP # # # gridSampler = GridSampler() # # study = optuna.create_study(direction="maximize", sampler=gridSampler) # # TODO ETA # study = optuna.create_study(direction="maximize") # study.optimize(objective, n_trials=4, timeout=600) # # pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED] # complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE] # # print("Study statistics: ") # print(" Number of finished trials: ", len(study.trials)) # print(" Number of pruned trials: ", len(pruned_trials)) # print(" Number of complete trials: ", len(complete_trials)) # # print("Best trial:") # trial = study.best_trial # # print(" Value: ", trial.value) # # print(" Params: ") # for key, value in trial.params.items(): # print(" {}: {}".format(key, value)) # # # if __name__ == '__main__': # main() ================================================ FILE: torchreid/losses/GiLt_loss.py ================================================ from __future__ import division, absolute_import import torch import torch.nn as nn from collections import OrderedDict from torchmetrics import Accuracy from torchreid.losses import init_part_based_triplet_loss, CrossEntropyLoss from torchreid.utils.constants import GLOBAL, FOREGROUND, CONCAT_PARTS, PARTS class GiLtLoss(nn.Module): """ The Global-identity Local-triplet 'GiLt' loss as described in our paper: 'Somers V. & al, Body Part-Based Representation Learning for Occluded Person Re-Identification, WACV23'. Source: https://github.com/VlSomers/bpbreid The default weights for the GiLt strategy (as described in the paper) are provided in 'default_losses_weights': the identity loss is applied only on holistic embeddings and the triplet loss is applied only on part-based embeddings. 'tr' denotes 'triplet' for the triplet loss and 'id' denotes 'identity' for the identity cross-entropy loss. """ default_losses_weights = { GLOBAL: {'id': 1., 'tr': 0.}, FOREGROUND: {'id': 1., 'tr': 0.}, CONCAT_PARTS: {'id': 1., 'tr': 0.}, PARTS: {'id': 0., 'tr': 1.} } def __init__(self, losses_weights=None, use_visibility_scores=False, triplet_margin=0.3, loss_name='part_averaged_triplet_loss', use_gpu=False, writer=None): super().__init__() if losses_weights is None: losses_weights = self.default_losses_weights self.pred_accuracy = Accuracy(top_k=1) if use_gpu: self.pred_accuracy = self.pred_accuracy.cuda() self.losses_weights = losses_weights self.part_triplet_loss = init_part_based_triplet_loss(loss_name, margin=triplet_margin, writer=writer) self.identity_loss = CrossEntropyLoss(label_smooth=True) self.use_visibility_scores = use_visibility_scores def forward(self, embeddings_dict, visibility_scores_dict, id_cls_scores_dict, pids): """ Keys in the input dictionaries are from {'globl', 'foreg', 'conct', 'parts'} and correspond to the different types of embeddings. In the documentation below, we denote the batch size by 'N' and the number of parts by 'K'. :param embeddings_dict: a dictionary of embeddings, where the keys are the embedding types and the values are Tensors of size [N, D] or [N, K*D] or [N, K, D]. :param visibility_scores_dict: a dictionary of visibility scores, where the keys are the embedding types and the values are Tensors of size [N] or [N, K]. :param id_cls_scores_dict: a dictionary of identity classification scores, where the keys are the embedding types and the values are Tensors of size [N, num_classes] or [N, K, num_classes] :param pids: A Tensor of size [N] containing the person IDs. :return: a tupel with the total combined loss and a dictionnary with performance information for each individual loss. """ loss_summary = {} losses = [] # global, foreground and parts embeddings id loss for key in [GLOBAL, FOREGROUND, CONCAT_PARTS, PARTS]: loss_info = OrderedDict() if key not in loss_summary else loss_summary[key] ce_w = self.losses_weights[key]['id'] if ce_w > 0: parts_id_loss, parts_id_accuracy = self.compute_id_cls_loss(id_cls_scores_dict[key], visibility_scores_dict[key], pids) losses.append((ce_w, parts_id_loss)) loss_info['c'] = parts_id_loss loss_info['a'] = parts_id_accuracy loss_summary[key] = loss_info # global, foreground and parts embeddings triplet loss for key in [GLOBAL, FOREGROUND, CONCAT_PARTS, PARTS]: loss_info = OrderedDict() if key not in loss_summary else loss_summary[key] tr_w = self.losses_weights[key]['tr'] if tr_w > 0: parts_triplet_loss, parts_trivial_triplets_ratio, parts_valid_triplets_ratio = \ self.compute_triplet_loss(embeddings_dict[key], visibility_scores_dict[key], pids) losses.append((tr_w, parts_triplet_loss)) loss_info['t'] = parts_triplet_loss loss_info['tt'] = parts_trivial_triplets_ratio loss_info['vt'] = parts_valid_triplets_ratio loss_summary[key] = loss_info # weighted sum of all losses if len(losses) == 0: return torch.tensor(0., device=(pids.get_device() if pids.is_cuda else None)), loss_summary else: loss = torch.stack([weight * loss for weight, loss in losses]).sum() return loss, loss_summary def compute_triplet_loss(self, embeddings, visibility_scores, pids): if self.use_visibility_scores: visibility = visibility_scores if len(visibility_scores.shape) == 2 else visibility_scores.unsqueeze(1) else: visibility = None embeddings = embeddings if len(embeddings.shape) == 3 else embeddings.unsqueeze(1) triplet_loss, trivial_triplets_ratio, valid_triplets_ratio = self.part_triplet_loss(embeddings, pids, parts_visibility=visibility) return triplet_loss, trivial_triplets_ratio, valid_triplets_ratio def compute_id_cls_loss(self, id_cls_scores, visibility_scores, pids): if len(id_cls_scores.shape) == 3: M = id_cls_scores.shape[1] id_cls_scores = id_cls_scores.flatten(0, 1) pids = pids.unsqueeze(1).expand(-1, M).flatten(0, 1) visibility_scores = visibility_scores.flatten(0, 1) weights = None if self.use_visibility_scores and visibility_scores.dtype is torch.bool: id_cls_scores = id_cls_scores[visibility_scores] pids = pids[visibility_scores] elif self.use_visibility_scores and visibility_scores.dtype is not torch.bool: weights = visibility_scores cls_loss = self.identity_loss(id_cls_scores, pids, weights) accuracy = self.pred_accuracy(id_cls_scores, pids) return cls_loss, accuracy ================================================ FILE: torchreid/losses/__init__.py ================================================ from __future__ import division, print_function, absolute_import from .inter_parts_triplet_loss import InterPartsTripletLoss from .part_averaged_triplet_loss import PartAveragedTripletLoss from .part_max_triplet_loss import PartMaxTripletLoss from .part_max_min_triplet_loss import PartMaxMinTripletLoss from .part_averaged_triplet_loss import PartAveragedTripletLoss from .part_min_triplet_loss import PartMinTripletLoss from .part_random_max_min_triplet_loss import PartRandomMaxMinTripletLoss from .part_individual_triplet_loss import PartIndividualTripletLoss from .cross_entropy_loss import CrossEntropyLoss from .hard_mine_triplet_loss import TripletLoss __body_parts_losses = { 'part_averaged_triplet_loss': PartAveragedTripletLoss, # Part-Averaged triplet loss described in the paper 'part_max_triplet_loss': PartMaxTripletLoss, 'part_min_triplet_loss': PartMinTripletLoss, 'part_max_min_triplet_loss': PartMaxMinTripletLoss, 'part_random_max_min_triplet_loss': PartRandomMaxMinTripletLoss, 'inter_parts_triplet_loss': InterPartsTripletLoss, 'intra_parts_triplet_loss': PartIndividualTripletLoss, } def init_part_based_triplet_loss(name, **kwargs): """Initializes the part based triplet loss based on the part-based distance combination strategy.""" avai_body_parts_losses = list(__body_parts_losses.keys()) if name not in avai_body_parts_losses: raise ValueError( 'Invalid loss name. Received "{}", ' 'but expected to be one of {}'.format(name, avai_body_parts_losses) ) return __body_parts_losses[name](**kwargs) def deep_supervision(criterion, xs, y): """DeepSupervision Applies criterion to each element in a list. Args: criterion: loss function xs: tuple of inputs y: ground truth """ loss = 0. for x in xs: loss += criterion(x, y) loss /= len(xs) return loss ================================================ FILE: torchreid/losses/body_part_attention_loss.py ================================================ from __future__ import division, absolute_import import torch.nn as nn from collections import OrderedDict from monai.losses import FocalLoss, DiceLoss from torch.nn import CrossEntropyLoss from torchmetrics import Accuracy from torchreid.utils.constants import PIXELS class BodyPartAttentionLoss(nn.Module): """ A body part attention loss as described in our paper 'Somers V. & al, Body Part-Based Representation Learning for Occluded Person Re-Identification, WACV23'. Source: https://github.com/VlSomers/bpbreid """ def __init__(self, loss_type='cl', label_smoothing=0.1, use_gpu=False): super().__init__() self.pred_accuracy = Accuracy(top_k=1) if use_gpu: self.pred_accuracy = self.pred_accuracy.cuda() if loss_type == 'cl': self.part_prediction_loss_1 = CrossEntropyLoss(label_smoothing=label_smoothing) elif loss_type == 'fl': self.part_prediction_loss = FocalLoss(to_onehot_y=True, gamma=1.0) elif loss_type == 'dl': self.part_prediction_loss = DiceLoss(to_onehot_y=True, softmax=True) else: raise ValueError("Loss {} for part prediction is not supported".format(loss_type)) def forward(self, pixels_cls_scores, targets): """ Compute loss for body part attention prediction. Args: pixels_cls_scores [N, K, H, W] targets [N, H, W] Returns: """ loss_summary = {} loss_summary[PIXELS] = OrderedDict() pixels_cls_loss, pixels_cls_accuracy = self.compute_pixels_cls_loss(pixels_cls_scores, targets) loss_summary[PIXELS]['c'] = pixels_cls_loss loss_summary[PIXELS]['a'] = pixels_cls_accuracy return pixels_cls_loss, loss_summary def compute_pixels_cls_loss(self, pixels_cls_scores, targets): if pixels_cls_scores.is_cuda: targets = targets.cuda() pixels_cls_score_targets = targets.flatten() # [N*Hf*Wf] pixels_cls_scores = pixels_cls_scores.permute(0, 2, 3, 1).flatten(0, 2) # [N*Hf*Wf, M] loss = self.part_prediction_loss_1(pixels_cls_scores, pixels_cls_score_targets) accuracy = self.pred_accuracy(pixels_cls_scores, pixels_cls_score_targets) return loss, accuracy.item() ================================================ FILE: torchreid/losses/cross_entropy_loss.py ================================================ from __future__ import division, absolute_import import torch import torch.nn as nn class CrossEntropyLoss(nn.Module): r"""Cross entropy loss with label smoothing regularizer. Reference: Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016. With label smoothing, the label :math:`y` for a class is computed by .. math:: \begin{equation} (1 - \eps) \times y + \frac{\eps}{K}, \end{equation} where :math:`K` denotes the number of classes and :math:`\eps` is a weight. When :math:`\eps = 0`, the loss function reduces to the normal cross entropy. Args: num_classes (int): number of classes. eps (float, optional): weight. Default is 0.1. use_gpu (bool, optional): whether to use gpu devices. Default is True. label_smooth (bool, optional): whether to apply label smoothing. Default is True. """ def __init__(self, eps=0.1, label_smooth=True): super(CrossEntropyLoss, self).__init__() self.eps = eps if label_smooth else 0 self.logsoftmax = nn.LogSoftmax(dim=1) def forward(self, inputs, targets, weights=None): """ Args: inputs (torch.Tensor): prediction matrix (before softmax) with shape (batch_size, num_classes). targets (torch.LongTensor): ground truth labels with shape (batch_size). Each position contains the label index. """ assert inputs.shape[0] == targets.shape[0] num_classes = inputs.shape[1] log_probs = self.logsoftmax(inputs) zeros = torch.zeros(log_probs.size()) targets = zeros.scatter_(1, targets.unsqueeze(1).data.cpu(), 1) if inputs.is_cuda: targets = targets.cuda() targets = (1 - self.eps) * targets + self.eps / num_classes if weights is not None: result = (-targets * log_probs).sum(dim=1) result = result * nn.functional.normalize(weights, p=1, dim=0) result = result.sum() else: result = (-targets * log_probs).mean(0).sum() return result ================================================ FILE: torchreid/losses/hard_mine_triplet_loss.py ================================================ from __future__ import division, absolute_import import torch import torch.nn as nn class TripletLoss(nn.Module): """Triplet loss with hard positive/negative mining. Reference: Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737. Imported from ``_. Args: margin (float, optional): margin for triplet. Default is 0.3. """ def __init__(self, margin=0.3): super(TripletLoss, self).__init__() self.margin = margin self.ranking_loss = nn.MarginRankingLoss(margin=margin) def forward(self, inputs, targets): """ Args: inputs (torch.Tensor): feature matrix with shape (batch_size, feat_dim). targets (torch.LongTensor): ground truth labels with shape (num_classes). """ # Compute pairwise distance dist = self.compute_dist_matrix(inputs) # For each anchor, find the hardest positive and negative return self.compute_hard_mine_triplet_loss(dist, inputs, targets) def compute_hard_mine_triplet_loss(self, dist, inputs, targets): n = inputs.size(0) mask = targets.expand(n, n).eq(targets.expand(n, n).t()) dist_ap, dist_an = [], [] for i in range(n): dist_ap.append(dist[i][mask[i]].max().unsqueeze(0)) dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0)) dist_ap = torch.cat(dist_ap) dist_an = torch.cat(dist_an) # Compute ranking hinge loss y = torch.ones_like(dist_an) return self.ranking_loss(dist_an, dist_ap, y) def compute_dist_matrix(self, inputs): n = inputs.size(0) # dist(a, b) = sqrt(sum((a_i - b_i)^2)) = sqrt(sum(a_i^2) + sum(b_i^2) - 2*sum(a_i*b_i)) dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n) dist = dist + dist.t() # sum(a_i^2) + sum(b_i^2) dist.addmm_(inputs, inputs.t(), beta=1, alpha=-2) # sum(a_i^2) + sum(b_i^2) - 2*sum(a_i*b_i) dist = dist.clamp(min=1e-12) # for numerical stability dist = dist.sqrt() # sqrt(sum(a_i^2) + sum(b_i^2) - 2*sum(a_i*b_i)) return dist ================================================ FILE: torchreid/losses/inter_parts_triplet_loss.py ================================================ from __future__ import division, absolute_import from torchreid.losses.part_averaged_triplet_loss import PartAveragedTripletLoss import torch class InterPartsTripletLoss(PartAveragedTripletLoss): def __init__(self, **kwargs): super(InterPartsTripletLoss, self).__init__(**kwargs) def forward(self, body_parts_features, targets, n_iter=0, parts_visibility=None): # body_parts_features.shape = [M, N, N] body_parts_dist_matrices = self.compute_mixed_body_parts_dist_matrices(body_parts_features) return self.hard_mine_triplet_loss(body_parts_dist_matrices, targets) def compute_mixed_body_parts_dist_matrices(self, body_parts_features): body_parts_features = body_parts_features.flatten(start_dim=0, end_dim=1).unsqueeze(1) body_parts_dist_matrices = self._part_based_pairwise_distance_matrix(body_parts_features, False, self.epsilon).squeeze() return body_parts_dist_matrices def hard_mine_triplet_loss(self, dist, targets): # TODO extract code for mask generation into separate method # TODO cleanup nm = dist.shape[0] n = targets.size(0) m = int(nm / n) expanded_targets = targets.repeat(m).expand(nm, -1) pids_mask = expanded_targets.eq(expanded_targets.t()) body_parts_targets = [] for i in range(0, m): body_parts_targets.append(torch.full_like(targets, i)) body_parts_targets = torch.cat(body_parts_targets) expanded_body_parts_targets = body_parts_targets.expand(nm, -1) body_parts_mask = expanded_body_parts_targets.eq(expanded_body_parts_targets.t()) mask_p = torch.logical_and(pids_mask, body_parts_mask) mask_n = pids_mask == 0 # Create two big mask of size BxB with B = M*N (one entry per embedding). # positive mask cell = true if embedding of same identity and same body part # to create that, compute AND between classic pids M*N mask and a new body part mask : # 110000 112233.expand().eq(112233.expand().t()) # 110000 # 001100 # 001100 # 000011 # 000011 # negative mask cell = true if embeddings from different ids dist_ap, dist_an = [], [] for i in range(nm): i_pos_dist = dist[i][mask_p[i]] dist_ap.append(i_pos_dist.max().unsqueeze(0)) i_neg_dist = dist[i][mask_n[i]] assert i_neg_dist.nelement() != 0, "embedding %r should have at least one negative counterpart" % i dist_an.append(i_neg_dist.min().unsqueeze(0)) dist_ap = torch.cat(dist_ap) dist_an = torch.cat(dist_an) # Compute ranking hinge loss y = torch.ones_like(dist_an) return self.ranking_loss(dist_an, dist_ap, y) ================================================ FILE: torchreid/losses/part_averaged_triplet_loss.py ================================================ from __future__ import division, absolute_import import warnings import torch import torch.nn as nn import torch.nn.functional as F from torchreid.utils.tensortools import masked_mean class PartAveragedTripletLoss(nn.Module): """Compute the part-averaged triplet loss as described in our paper: 'Somers V. & al, Body Part-Based Representation Learning for Occluded Person Re-Identification, WACV23'. Source: https://github.com/VlSomers/bpbreid This class provides a generic implementation of the batch-hard triplet loss for part-based models, i.e. models outputting multiple embeddings (part-based/local representations) per input sample/image. When K=1 parts are provided and the parts_visiblity scores are set to one (or not provided), this implementation is strictly equal to the standard batch-hard triplet loss described in: 'Alexander Hermans, Lucas Beyer, and Bastian Leibe. In Defense of the Triplet Loss for Person Re-Identification.' It is therefore valid to use this implementation for global embeddings too. Part-based distances are combined into a global sample-to-sample distance using a 'mean' operation. Other subclasses of PartAveragedTripletLoss provide different strategies to combine local distances into a global one. This implementation is optimized, using only tensors operations and no Python loops. """ def __init__(self, margin=0.3, epsilon=1e-16, writer=None): super(PartAveragedTripletLoss, self).__init__() self.margin = margin self.writer = writer self.batch_debug = False self.imgs = None self.masks = None self.epsilon = epsilon def forward(self, part_based_embeddings, labels, parts_visibility=None): """ The part averaged triplet loss is computed in three steps. Firstly, we compute the part-based pairwise distance matrix of size [K, N, N] for the K parts and the N training samples. Secondly we compute the (samples) pairwise distance matrix of size [N, N] by combining the part-based distances. The part-based distances can be combined by averaging, max, min, etc. Thirdly, we compute the standard batch-hard triplet loss using the pairwise distance matrix. Compared to a standard triplet loss implementation, some entries in the pairwise distance matrix can have a value of -1. These entries correspond to pairs of samples that could not be compared, because there was no common visible parts for instance. Such pairs should be ignored for computing the batch hard triplets. Args: part_based_embeddings (torch.Tensor): feature matrix with shape (batch_size, parts_num, feat_dim). labels (torch.LongTensor): ground truth labels with shape (num_classes). """ # Compute pairwise distance matrix for each part part_based_pairwise_dist = self._part_based_pairwise_distance_matrix(part_based_embeddings.transpose(1, 0), squared=False) if parts_visibility is not None: parts_visibility = parts_visibility.t() valid_part_based_pairwise_dist_mask = parts_visibility.unsqueeze(1) * parts_visibility.unsqueeze(2) if valid_part_based_pairwise_dist_mask.dtype is not torch.bool: valid_part_based_pairwise_dist_mask = torch.sqrt(valid_part_based_pairwise_dist_mask) else: valid_part_based_pairwise_dist_mask = None pairwise_dist = self._combine_part_based_dist_matrices(part_based_pairwise_dist, valid_part_based_pairwise_dist_mask, labels) return self._hard_mine_triplet_loss(pairwise_dist, labels, self.margin) def _combine_part_based_dist_matrices(self, part_based_pairwise_dist, valid_part_based_pairwise_dist_mask, labels): if valid_part_based_pairwise_dist_mask is not None: self.writer.update_invalid_part_based_pairwise_distances_count(valid_part_based_pairwise_dist_mask) pairwise_dist = masked_mean(part_based_pairwise_dist, valid_part_based_pairwise_dist_mask) else: valid_part_based_pairwise_dist = part_based_pairwise_dist pairwise_dist = valid_part_based_pairwise_dist.mean(0) return pairwise_dist def _part_based_pairwise_distance_matrix(self, embeddings, squared=False): """ embeddings.shape = (K, N, C) ||a-b||^2 = |a|^2 - 2* + |b|^2 """ dot_product = torch.matmul(embeddings, embeddings.transpose(2, 1)) square_sum = dot_product.diagonal(dim1=1, dim2=2) distances = square_sum.unsqueeze(2) - 2 * dot_product + square_sum.unsqueeze(1) distances = F.relu(distances) if not squared: mask = torch.eq(distances, 0).float() distances = distances + mask * self.epsilon # for numerical stability (infinite derivative of sqrt in 0) distances = torch.sqrt(distances) distances = distances * (1 - mask) return distances def _hard_mine_triplet_loss(self, batch_pairwise_dist, labels, margin): """ A generic implementation of the batch-hard triplet loss. K (part-based) distance matrix between N samples are provided in tensor 'batch_pairwise_dist' of size [K, N, N]. The standard batch-hard triplet loss is then computed for each of the K distance matrix, yielding a total of KxN triplet losses. When a pairwise distance matrix of size [1, N, N] is provided with K=1, this function behave like a standard batch-hard triplet loss. When a pairwise distance matrix of size [K, N, N] is provided, this function will apply the batch-hard triplet loss strategy K times, i.e. one time for each of the K part-based distance matrix. It will then average all KxN triplet losses for all K parts into one loss value. For the part-averaged triplet loss described in the paper, all part-based distance are first averaged before calling this function, and a pairwise distance matrix of size [1, N, N] is provided here. When the triplet loss is applied individually for each part, without considering the global/combined distance between two training samples (as implemented by 'PartIndividualTripletLoss'), then a (part-based) pairwise distance matrix of size [K, N, N] is given as input. Compute distance matrix; i.e. for each anchor a_i with i=range(0, batch_size) : - find the (a_i,p_i) pair with greatest distance s.t. a_i and p_i have the same label - find the (a_i,n_i) pair with smallest distance s.t. a_i and n_i have different label - compute triplet loss for each triplet (a_i, p_i, n_i), average them Source : - https://github.com/lyakaap/NetVLAD-pytorch/blob/master/hard_triplet_loss.py - https://github.com/Yuol96/pytorch-triplet-loss/blob/master/model/triplet_loss.py Args: batch_pairwise_dist: pairwise distances between samples, of size (K, N, N). A value of -1 means no distance could be computed between the two sample, that pair should therefore not be considered for triplet mining. labels: id labels for the batch, of size (N,) Returns: triplet_loss: scalar tensor containing the batch hard triplet loss, which is the result of the average of a maximum of KxN triplet losses. Triplets are generated for anchors with at least one valid negative and one valid positive. Invalid negatives and invalid positives are marked with a -1 distance in batch_pairwise_dist input tensor. trivial_triplets_ratio: scalar between [0, 1] indicating the ratio of hard triplets that are 'trivial', i.e. for which the triplet loss value is 0 because the margin condition is already satisfied. valid_triplets_ratio: scalar between [0, 1] indicating the ratio of hard triplets that are valid. A triplet is invalid if the anchor could not be compared with any positive or negative sample. Two samples cannot be compared if they have no mutually visible parts (therefore no distance could be computed). """ max_value = torch.finfo(batch_pairwise_dist.dtype).max valid_pairwise_dist_mask = (batch_pairwise_dist != float(-1)) self.writer.update_invalid_pairwise_distances_count(batch_pairwise_dist) # Get the hardest positive pairs # invalid positive distance were set to -1 to mask_anchor_positive = self._get_anchor_positive_mask(labels).unsqueeze(0) mask_anchor_positive = mask_anchor_positive * valid_pairwise_dist_mask valid_positive_dist = batch_pairwise_dist * mask_anchor_positive.float() - (~mask_anchor_positive).float() hardest_positive_dist, _ = torch.max(valid_positive_dist, dim=-1) # [K, N] # Get the hardest negative pairs mask_anchor_negative = self._get_anchor_negative_mask(labels).unsqueeze(0) mask_anchor_negative = mask_anchor_negative * valid_pairwise_dist_mask valid_negative_dist = batch_pairwise_dist * mask_anchor_negative.float() + (~mask_anchor_negative).float() * max_value hardest_negative_dist, _ = torch.min(valid_negative_dist, dim=-1) # [K, N] # Hardest negative/positive with dist=float.max/-1 are invalid: no valid negative/positive found for this anchor # Do not generate triplet for such anchor valid_hardest_positive_dist_mask = hardest_positive_dist != -1 valid_hardest_negative_dist_mask = hardest_negative_dist != max_value valid_triplets_mask = valid_hardest_positive_dist_mask * valid_hardest_negative_dist_mask # [K, N] hardest_dist = torch.stack([hardest_positive_dist, hardest_negative_dist], 2) # [K, N, 2] valid_hardest_dist = hardest_dist[valid_triplets_mask, :] # [K*N, 2] if valid_hardest_dist.nelement() == 0: warnings.warn("CRITICAL WARNING: no valid triplets were generated for current batch") return None # Build valid triplets and compute triplet loss if self.margin > 0: triplet_loss, trivial_triplets_ratio, valid_triplets_ratio = self.hard_margin_triplet_loss(margin, valid_hardest_dist, valid_triplets_mask) else: triplet_loss, trivial_triplets_ratio, valid_triplets_ratio = self.soft_margin_triplet_loss(0.3, valid_hardest_dist, valid_triplets_mask) return triplet_loss, trivial_triplets_ratio, valid_triplets_ratio def hard_margin_triplet_loss(self, margin, valid_hardest_dist, valid_triplets_mask): triplet_losses = F.relu(valid_hardest_dist[:, 0] - valid_hardest_dist[:, 1] + margin) triplet_loss = torch.mean(triplet_losses) trivial_triplets_ratio = (triplet_losses == 0.).sum() / triplet_losses.nelement() valid_triplets_ratio = valid_triplets_mask.sum() / valid_triplets_mask.nelement() return triplet_loss, trivial_triplets_ratio, valid_triplets_ratio def soft_margin_triplet_loss(self, margin, valid_hardest_dist, valid_triplets_mask): triplet_losses = F.relu(valid_hardest_dist[:, 0] - valid_hardest_dist[:, 1] + margin) hard_margin_triplet_loss = torch.mean(triplet_losses) trivial_triplets_ratio = (triplet_losses == 0.).sum() / triplet_losses.nelement() valid_triplets_ratio = valid_triplets_mask.sum() / valid_triplets_mask.nelement() # valid_hardest_dist[:, 0] = hardest positive dist # valid_hardest_dist[:, 1] = hardest negative dist y = valid_hardest_dist[:, 0].new().resize_as_(valid_hardest_dist[:, 0]).fill_(1) soft_margin_triplet_loss = F.soft_margin_loss(valid_hardest_dist[:, 1] - valid_hardest_dist[:, 0], y) if soft_margin_triplet_loss == float('Inf'): print("soft_margin_triplet_loss = inf") return hard_margin_triplet_loss, trivial_triplets_ratio, valid_triplets_ratio return soft_margin_triplet_loss, trivial_triplets_ratio, valid_triplets_ratio @staticmethod def _get_anchor_positive_mask(labels): """ To be a valid positive pair (a,p) : - a and p are different embeddings - a and p have the same label """ indices_equal_mask = torch.eye(labels.shape[0], dtype=torch.bool, device=(labels.get_device() if labels.is_cuda else None)) indices_not_equal_mask = ~indices_equal_mask # Check if labels[i] == labels[j] labels_equal_mask = torch.eq(labels.unsqueeze(0), labels.unsqueeze(1)) mask_anchor_positive = indices_not_equal_mask * labels_equal_mask return mask_anchor_positive @staticmethod def _get_anchor_negative_mask(labels): """ To be a valid negative pair (a,n) : - a and n have different labels (and therefore are different embeddings) """ # Check if labels[i] != labels[k] labels_not_equal_mask = torch.ne(torch.unsqueeze(labels, 0), torch.unsqueeze(labels, 1)) return labels_not_equal_mask ================================================ FILE: torchreid/losses/part_individual_triplet_loss.py ================================================ from __future__ import division, absolute_import from torchreid.losses.part_averaged_triplet_loss import PartAveragedTripletLoss from torchreid.utils.tensortools import replace_values class PartIndividualTripletLoss(PartAveragedTripletLoss): """A triplet loss applied individually for each part, without considering the global/combined distance between two training samples. If the model outputs K embeddings (for K parts), this loss will compute the batch-hard triplet loss K times and output the average of them. With the part-averaged triplet loss, the global distance between two training samples is used in the triplet loss equation: that global distance is obtained by combining all K part-based distance between two samples into one value ('combining' = mean, max, min, etc). With the part-individual triplet loss, the triplet loss is applied only on local distance individually, i.e., the distance between two local parts is used in the triplet loss equation. This part-individual triplet loss is therefore more sensitive to occluded parts (if 'valid_part_based_pairwise_dist_mask' is not used) and to non-discriminative parts, i.e. parts from two different identities having similar appearance. 'Somers V. & al, Body Part-Based Representation Learning for Occluded Person Re-Identification, WACV23'. Source: https://github.com/VlSomers/bpbreid """ def __init__(self, **kwargs): super(PartIndividualTripletLoss, self).__init__(**kwargs) def _combine_part_based_dist_matrices(self, part_based_pairwise_dist, valid_part_based_pairwise_dist_mask, labels): """Do not combine part-based distance, simply return the input part-based pairwise distances, and optionally replace non-valid part-based distance with -1""" if valid_part_based_pairwise_dist_mask is not None: valid_part_based_pairwise_dist = replace_values(part_based_pairwise_dist, ~valid_part_based_pairwise_dist_mask, -1) self.writer.update_invalid_part_based_pairwise_distances_count(valid_part_based_pairwise_dist_mask) else: valid_part_based_pairwise_dist = part_based_pairwise_dist return valid_part_based_pairwise_dist ================================================ FILE: torchreid/losses/part_max_min_triplet_loss.py ================================================ from __future__ import division, absolute_import import torch from torchreid.losses.part_averaged_triplet_loss import PartAveragedTripletLoss from torchreid.utils.tensortools import replace_values class PartMaxMinTripletLoss(PartAveragedTripletLoss): def __init__(self, **kwargs): super(PartMaxMinTripletLoss, self).__init__(**kwargs) def _combine_part_based_dist_matrices(self, part_based_pairwise_dist, valid_part_based_pairwise_dist_mask, labels): if valid_part_based_pairwise_dist_mask is not None: valid_part_based_pairwise_dist_for_max = replace_values(part_based_pairwise_dist, ~valid_part_based_pairwise_dist_mask, -1) self.writer.update_invalid_part_based_pairwise_distances_count(valid_part_based_pairwise_dist_mask) else: valid_part_based_pairwise_dist_for_max = part_based_pairwise_dist max_pairwise_dist, part_id_for_max = valid_part_based_pairwise_dist_for_max.max(0) if valid_part_based_pairwise_dist_mask is not None: max_value = torch.finfo(part_based_pairwise_dist.dtype).max valid_part_based_pairwise_dist_for_min = replace_values(part_based_pairwise_dist, ~valid_part_based_pairwise_dist_mask, max_value) self.writer.update_invalid_part_based_pairwise_distances_count(valid_part_based_pairwise_dist_mask) else: valid_part_based_pairwise_dist_for_min = part_based_pairwise_dist min_pairwise_dist, part_id_for_min = valid_part_based_pairwise_dist_for_min.min(0) labels_equal_mask = torch.eq(labels.unsqueeze(0), labels.unsqueeze(1)) pairwise_dist = max_pairwise_dist * labels_equal_mask + min_pairwise_dist * ~labels_equal_mask part_id = part_id_for_max * labels_equal_mask + part_id_for_min * ~labels_equal_mask if valid_part_based_pairwise_dist_mask is not None: invalid_pairwise_dist_mask = valid_part_based_pairwise_dist_mask.sum(dim=0) == 0 pairwise_dist = replace_values(pairwise_dist, invalid_pairwise_dist_mask, -1) if part_based_pairwise_dist.shape[0] > 1: self.writer.used_parts_statistics(part_based_pairwise_dist.shape[0], part_id) return pairwise_dist ================================================ FILE: torchreid/losses/part_max_triplet_loss.py ================================================ from __future__ import division, absolute_import from torchreid.losses.part_averaged_triplet_loss import PartAveragedTripletLoss from torchreid.utils.tensortools import replace_values class PartMaxTripletLoss(PartAveragedTripletLoss): def __init__(self, **kwargs): super(PartMaxTripletLoss, self).__init__(**kwargs) def _combine_part_based_dist_matrices(self, part_based_pairwise_dist, valid_part_based_pairwise_dist_mask, labels): if valid_part_based_pairwise_dist_mask is not None: valid_part_based_pairwise_dist = replace_values(part_based_pairwise_dist, ~valid_part_based_pairwise_dist_mask, -1) self.writer.update_invalid_part_based_pairwise_distances_count(valid_part_based_pairwise_dist_mask) else: valid_part_based_pairwise_dist = part_based_pairwise_dist pairwise_dist, part_id = valid_part_based_pairwise_dist.max(0) parts_count = part_based_pairwise_dist.shape[0] if part_based_pairwise_dist.shape[0] > 1: self.writer.used_parts_statistics(parts_count, part_id) return pairwise_dist ================================================ FILE: torchreid/losses/part_min_triplet_loss.py ================================================ from __future__ import division, absolute_import import torch from torchreid.losses.part_averaged_triplet_loss import PartAveragedTripletLoss from torchreid.utils.tensortools import replace_values class PartMinTripletLoss(PartAveragedTripletLoss): def __init__(self, **kwargs): super(PartMinTripletLoss, self).__init__(**kwargs) def _combine_part_based_dist_matrices(self, part_based_pairwise_dist, valid_part_based_pairwise_dist_mask, labels): if valid_part_based_pairwise_dist_mask is not None: max_value = torch.finfo(part_based_pairwise_dist.dtype).max valid_part_based_pairwise_dist = replace_values(part_based_pairwise_dist, ~valid_part_based_pairwise_dist_mask, max_value) self.writer.update_invalid_part_based_pairwise_distances_count(valid_part_based_pairwise_dist_mask) else: valid_part_based_pairwise_dist = part_based_pairwise_dist pairwise_dist, part_id = valid_part_based_pairwise_dist.min(0) if valid_part_based_pairwise_dist_mask is not None: invalid_pairwise_dist_mask = valid_part_based_pairwise_dist_mask.sum(dim=0) == 0 pairwise_dist = replace_values(pairwise_dist, invalid_pairwise_dist_mask, -1) parts_count = part_based_pairwise_dist.shape[0] if part_based_pairwise_dist.shape[0] > 1: self.writer.used_parts_statistics(parts_count, part_id) return pairwise_dist ================================================ FILE: torchreid/losses/part_random_max_min_triplet_loss.py ================================================ from __future__ import division, absolute_import import torch from torchreid.losses.part_averaged_triplet_loss import PartAveragedTripletLoss from torchreid.utils.tensortools import replace_values class PartRandomMaxMinTripletLoss(PartAveragedTripletLoss): def __init__(self, **kwargs): super(PartRandomMaxMinTripletLoss, self).__init__(**kwargs) def _combine_part_based_dist_matrices(self, part_based_pairwise_dist, valid_part_based_pairwise_dist_mask, labels): if valid_part_based_pairwise_dist_mask is None: valid_part_based_pairwise_dist_mask = torch.ones(part_based_pairwise_dist.shape, dtype=torch.bool, device=(labels.get_device() if labels.is_cuda else None)) # put some random entries to 0 (20%) dropout_mask = torch.rand(size=valid_part_based_pairwise_dist_mask.shape, device=(labels.get_device() if labels.is_cuda else None)) > 0.5 # FIXME should be symmetric? # dropout_mask = torch.rand(size=valid_body_part_pairwise_dist_mask.shape[0:2], device=(labels.get_device() if labels.is_cuda else None)) > 0.3 # dropout_mask = dropout_mask.unsqueeze(1) * dropout_mask.unsqueeze(2) valid_part_based_pairwise_dist_mask *= dropout_mask valid_part_based_pairwise_dist_for_max = replace_values(part_based_pairwise_dist, ~valid_part_based_pairwise_dist_mask, -1) self.writer.update_invalid_part_based_pairwise_distances_count(valid_part_based_pairwise_dist_mask) max_pairwise_dist, part_id_for_max = valid_part_based_pairwise_dist_for_max.max(0) max_value = torch.finfo(part_based_pairwise_dist.dtype).max valid_part_based_pairwise_dist_for_min = replace_values(part_based_pairwise_dist, ~valid_part_based_pairwise_dist_mask, max_value) self.writer.update_invalid_part_based_pairwise_distances_count(valid_part_based_pairwise_dist_mask) min_pairwise_dist, part_id_for_min = valid_part_based_pairwise_dist_for_min.min(0) labels_equal_mask = torch.eq(labels.unsqueeze(0), labels.unsqueeze(1)) pairwise_dist = max_pairwise_dist * labels_equal_mask + min_pairwise_dist * ~labels_equal_mask part_id = part_id_for_max * labels_equal_mask + part_id_for_min * ~labels_equal_mask invalid_pairwise_dist_mask = valid_part_based_pairwise_dist_mask.sum(dim=0) == 0 pairwise_dist = replace_values(pairwise_dist, invalid_pairwise_dist_mask, -1) if part_based_pairwise_dist.shape[0] > 1: self.writer.used_parts_statistics(part_based_pairwise_dist.shape[0], part_id) return pairwise_dist ================================================ FILE: torchreid/metrics/__init__.py ================================================ from __future__ import absolute_import from .rank import evaluate_rank from .accuracy import accuracy from .distance import compute_distance_matrix ================================================ FILE: torchreid/metrics/accuracy.py ================================================ from __future__ import division, print_function, absolute_import def accuracy(output, target, topk=(1, )): """Computes the accuracy over the k top predictions for the specified values of k. Args: output (torch.Tensor): prediction matrix with shape (batch_size, num_classes). target (torch.LongTensor): ground truth labels with shape (batch_size). topk (tuple, optional): accuracy at top-k will be computed. For example, topk=(1, 5) means accuracy at top-1 and top-5 will be computed. Returns: list: accuracy at top-k. Examples:: >>> from torchreid import metrics >>> metrics.accuracy(output, target) """ maxk = max(topk) batch_size = target.size(0) if isinstance(output, (tuple, list)): output = output[0] _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) res = [] for k in topk: correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) acc = correct_k.mul_(100.0 / batch_size) res.append(acc) return res ================================================ FILE: torchreid/metrics/distance.py ================================================ from __future__ import division, print_function, absolute_import import torch from torch.nn import functional as F from torchreid.utils.writer import Writer from torchreid.utils.tensortools import replace_values, masked_mean def compute_distance_matrix(input1, input2, metric='euclidean'): """A wrapper function for computing distance matrix. Args: input1 (torch.Tensor): 2-D feature matrix. input2 (torch.Tensor): 2-D feature matrix. metric (str, optional): "euclidean" or "cosine". Default is "euclidean". Returns: torch.Tensor: distance matrix. Examples:: >>> from torchreid import metrics >>> input1 = torch.rand(10, 2048) >>> input2 = torch.rand(100, 2048) >>> distmat = metrics.compute_distance_matrix(input1, input2) >>> distmat.size() # (10, 100) """ # check input assert isinstance(input1, torch.Tensor) assert isinstance(input2, torch.Tensor) assert input1.dim() == 2, 'Expected 2-D tensor, but got {}-D'.format( input1.dim() ) assert input2.dim() == 2, 'Expected 2-D tensor, but got {}-D'.format( input2.dim() ) assert input1.size(1) == input2.size(1) if metric == 'euclidean': distmat = euclidean_squared_distance(input1, input2) elif metric == 'cosine': distmat = cosine_distance(input1, input2) else: raise ValueError( 'Unknown distance metric: {}. ' 'Please choose either "euclidean" or "cosine"'.format(metric) ) return distmat def euclidean_squared_distance(input1, input2): """Computes euclidean squared distance. Args: input1 (torch.Tensor): 2-D feature matrix. input2 (torch.Tensor): 2-D feature matrix. Returns: torch.Tensor: distance matrix. """ # dist(a, b) = sum((a_i - b_i)^2) = sum(a_i^2) + sum(b_i^2) - 2*sum(a_i*b_i) m, n = input1.size(0), input2.size(0) mat1 = torch.pow(input1, 2).sum(dim=1, keepdim=True).expand(m, n) # sum(a_i^2) mat2 = torch.pow(input2, 2).sum(dim=1, keepdim=True).expand(n, m).t() # sum(b_i^2) distmat = mat1 + mat2 # sum(a_i^2) + sum(b_i^2) distmat.addmm_(input1, input2.t(), beta=1, alpha=-2) # sum(a_i^2) + sum(b_i^2) - 2*sum(a_i*b_i) return distmat def cosine_distance(input1, input2): """Computes cosine distance. Args: input1 (torch.Tensor): 2-D feature matrix. input2 (torch.Tensor): 2-D feature matrix. Returns: torch.Tensor: distance matrix. """ input1_normed = F.normalize(input1, p=2, dim=1) input2_normed = F.normalize(input2, p=2, dim=1) distmat = 1 - torch.mm(input1_normed, input2_normed.t()) return distmat def compute_distance_matrix_using_bp_features(qf, gf, qf_parts_visibility=None, gf_parts_visibility=None, dist_combine_strat='mean', batch_size_pairwise_dist_matrix=5000, use_gpu=False, metric='euclidean'): """Computes distance matrix between each pair of samples using their part-based features. 3 implementations here: without visibility scores, with boolean/binary visibility scores and with continuous [0, 1] visibility scores.""" # TODO keep only one generic implementation if qf_parts_visibility is not None and gf_parts_visibility is not None: if qf_parts_visibility.dtype is torch.bool and gf_parts_visibility.dtype is torch.bool: # boolean visibility scores return _compute_distance_matrix_using_bp_features_and_masks(qf, gf, qf_parts_visibility, gf_parts_visibility, dist_combine_strat, batch_size_pairwise_dist_matrix, use_gpu, metric) else: # continuous visibility scores return _compute_distance_matrix_using_bp_features_and_visibility_scores(qf, gf, qf_parts_visibility, gf_parts_visibility, dist_combine_strat, batch_size_pairwise_dist_matrix, use_gpu, metric) else: # no visibility scores return _compute_distance_matrix_using_bp_features(qf, gf, dist_combine_strat, batch_size_pairwise_dist_matrix, use_gpu, metric) def _compute_distance_matrix_using_bp_features(qf, gf, dist_combine_strat, batch_size_pairwise_dist_matrix, use_gpu, metric): if use_gpu: qf = qf.cuda() pairwise_dist_, body_part_pairwise_dist_ = [], [] for batch_gf in torch.split(gf, batch_size_pairwise_dist_matrix): if use_gpu: batch_gf = batch_gf.cuda() batch_body_part_pairwise_dist = _compute_body_parts_dist_matrices(qf, batch_gf, metric) if dist_combine_strat == 'max': batch_pairwise_dist, _ = batch_body_part_pairwise_dist.max(dim=0) elif dist_combine_strat == 'mean': batch_pairwise_dist = batch_body_part_pairwise_dist.mean(dim=0) else: raise ValueError('Body parts distance combination strategy "{}" not supported'.format(dist_combine_strat)) batch_body_part_pairwise_dist = batch_body_part_pairwise_dist.cpu() body_part_pairwise_dist_.append(batch_body_part_pairwise_dist) batch_pairwise_dist = batch_pairwise_dist.cpu() pairwise_dist_.append(batch_pairwise_dist) pairwise_dist = torch.cat(pairwise_dist_, 1) body_part_pairwise_dist = torch.cat(body_part_pairwise_dist_, 2) if Writer.current_writer() is not None: Writer.current_writer().qg_pairwise_dist_statistics(pairwise_dist, body_part_pairwise_dist, None, None) return pairwise_dist, body_part_pairwise_dist def _compute_distance_matrix_using_bp_features_and_masks(qf, gf, qf_parts_visibility, gf_parts_visibility, dist_combine_strat, batch_size_pairwise_dist_matrix, use_gpu, metric): batch_gf_list = torch.split(gf, batch_size_pairwise_dist_matrix) batch_gf_parts_visibility_list = torch.split(gf_parts_visibility, batch_size_pairwise_dist_matrix) qf_parts_visibility_cpu = qf_parts_visibility if use_gpu: qf = qf.cuda() qf_parts_visibility = qf_parts_visibility.cuda() qf_parts_visibility = qf_parts_visibility.t() pairwise_dist_, body_part_pairwise_dist_ = [], [] for batch_gf, batch_gf_parts_visibility in zip(batch_gf_list, batch_gf_parts_visibility_list): if use_gpu: batch_gf = batch_gf.cuda() batch_gf_parts_visibility = batch_gf_parts_visibility.cuda() batch_body_part_pairwise_dist = _compute_body_parts_dist_matrices(qf, batch_gf, metric) assert qf_parts_visibility.dtype is torch.bool and batch_gf_parts_visibility.dtype is torch.bool batch_gf_parts_visibility = batch_gf_parts_visibility.t() valid_body_part_pairwise_dist_mask = qf_parts_visibility.unsqueeze(2) * batch_gf_parts_visibility.unsqueeze(1) if dist_combine_strat == 'max': valid_body_part_pairwise_dist = replace_values(batch_body_part_pairwise_dist, ~valid_body_part_pairwise_dist_mask, -1) batch_pairwise_dist, _ = valid_body_part_pairwise_dist.max(dim=0) elif dist_combine_strat == 'mean': batch_pairwise_dist = masked_mean(batch_body_part_pairwise_dist, valid_body_part_pairwise_dist_mask) valid_body_part_pairwise_dist = replace_values(batch_body_part_pairwise_dist, ~valid_body_part_pairwise_dist_mask, -1) else: raise ValueError('Body parts distance combination strategy "{}" not supported'.format(dist_combine_strat)) valid_body_part_pairwise_dist = valid_body_part_pairwise_dist.cpu() body_part_pairwise_dist_.append(valid_body_part_pairwise_dist) batch_pairwise_dist = batch_pairwise_dist.cpu() pairwise_dist_.append(batch_pairwise_dist) pairwise_dist = torch.cat(pairwise_dist_, 1) body_part_pairwise_dist = torch.cat(body_part_pairwise_dist_, 2) if Writer.current_writer() is not None: Writer.current_writer().qg_pairwise_dist_statistics(pairwise_dist, body_part_pairwise_dist, qf_parts_visibility_cpu, gf_parts_visibility) max_value = body_part_pairwise_dist.max() + 1 # FIXME not clean with cosine dist valid_pairwise_dist_mask = (pairwise_dist != float(-1)) pairwise_dist = replace_values(pairwise_dist, ~valid_pairwise_dist_mask, max_value) body_part_pairwise_dist = replace_values(body_part_pairwise_dist, (body_part_pairwise_dist == -1), max_value) return pairwise_dist, body_part_pairwise_dist def _compute_distance_matrix_using_bp_features_and_visibility_scores(qf, gf, qf_parts_visibility, gf_parts_visibility, dist_combine_strat, batch_size_pairwise_dist_matrix, use_gpu, metric): batch_gf_list = torch.split(gf, batch_size_pairwise_dist_matrix) batch_gf_parts_visibility_list = torch.split(gf_parts_visibility, batch_size_pairwise_dist_matrix) qf_parts_visibility_cpu = qf_parts_visibility if use_gpu: qf = qf.cuda() qf_parts_visibility = qf_parts_visibility.cuda() qf_parts_visibility = qf_parts_visibility.t() pairwise_dist_, body_part_pairwise_dist_ = [], [] for batch_gf, batch_gf_parts_visibility in zip(batch_gf_list, batch_gf_parts_visibility_list): if use_gpu: batch_gf = batch_gf.cuda() batch_gf_parts_visibility = batch_gf_parts_visibility.cuda() batch_body_part_pairwise_dist = _compute_body_parts_dist_matrices(qf, batch_gf, metric) batch_gf_parts_visibility = batch_gf_parts_visibility.t() valid_body_part_pairwise_dist_mask = torch.sqrt(qf_parts_visibility.unsqueeze(2) * batch_gf_parts_visibility.unsqueeze(1)) batch_pairwise_dist = masked_mean(batch_body_part_pairwise_dist, valid_body_part_pairwise_dist_mask) valid_body_part_pairwise_dist = batch_body_part_pairwise_dist valid_body_part_pairwise_dist = valid_body_part_pairwise_dist.cpu() body_part_pairwise_dist_.append(valid_body_part_pairwise_dist) batch_pairwise_dist = batch_pairwise_dist.cpu() pairwise_dist_.append(batch_pairwise_dist) pairwise_dist = torch.cat(pairwise_dist_, 1) body_part_pairwise_dist = torch.cat(body_part_pairwise_dist_, 2) # TODO check if still valid: if Writer.current_writer() is not None: Writer.current_writer().qg_pairwise_dist_statistics(pairwise_dist, body_part_pairwise_dist, qf_parts_visibility_cpu, gf_parts_visibility) max_value = body_part_pairwise_dist.max() + 1 valid_pairwise_dist_mask = (pairwise_dist != float(-1)) pairwise_dist = replace_values(pairwise_dist, ~valid_pairwise_dist_mask, max_value) return pairwise_dist, body_part_pairwise_dist def _compute_body_parts_dist_matrices(qf, gf, metric='euclidean'): """ gf, qf shapes = (N, M, C) ||a-b||^2 = |a|^2 - 2* + |b|^2 """ if metric == 'euclidean': qf = qf.transpose(1, 0) gf = gf.transpose(1, 0) dot_product = torch.matmul(qf, gf.transpose(2, 1)) qf_square_sum = qf.pow(2).sum(dim=-1) gf_square_sum = gf.pow(2).sum(dim=-1) distances = qf_square_sum.unsqueeze(2) - 2 * dot_product + gf_square_sum.unsqueeze(1) distances = F.relu(distances) distances = torch.sqrt(distances) elif metric == 'cosine': qf = qf.transpose(1, 0) gf = gf.transpose(1, 0) distances = 1 - torch.matmul(qf, gf.transpose(2, 1)) else: raise ValueError( 'Unknown distance metric: {}. ' 'Please choose either "euclidean" or "cosine"'.format(metric) ) return distances ================================================ FILE: torchreid/metrics/rank.py ================================================ from __future__ import division, print_function, absolute_import import numpy as np import warnings from collections import defaultdict try: from torchreid.metrics.rank_cylib.rank_cy import evaluate_cy IS_CYTHON_AVAI = True except ImportError: IS_CYTHON_AVAI = False warnings.warn( 'Cython evaluation (very fast so highly recommended) is ' 'unavailable, now use python evaluation.' ) def eval_cuhk03(distmat, q_pids, g_pids, q_camids, g_camids, max_rank): """Evaluation with cuhk03 metric Key: one image for each gallery identity is randomly sampled for each query identity. Random sampling is performed num_repeats times. """ num_repeats = 10 num_q, num_g = distmat.shape if num_g < max_rank: max_rank = num_g print( 'Note: number of gallery samples is quite small, got {}'. format(num_g) ) indices = np.argsort(distmat, axis=1) matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) # compute cmc curve for each query all_cmc = [] all_AP = [] num_valid_q = 0. # number of valid query for q_idx in range(num_q): # get query pid and camid q_pid = q_pids[q_idx] q_camid = q_camids[q_idx] # remove gallery samples that have the same pid and camid with query order = indices[q_idx] remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid) keep = np.invert(remove) # compute cmc curve raw_cmc = matches[q_idx][ keep] # binary vector, positions with value 1 are correct matches if not np.any(raw_cmc): # this condition is true when query identity does not appear in gallery continue kept_g_pids = g_pids[order][keep] g_pids_dict = defaultdict(list) for idx, pid in enumerate(kept_g_pids): g_pids_dict[pid].append(idx) cmc = 0. for repeat_idx in range(num_repeats): mask = np.zeros(len(raw_cmc), dtype=np.bool) for _, idxs in g_pids_dict.items(): # randomly sample one image for each gallery person rnd_idx = np.random.choice(idxs) mask[rnd_idx] = True masked_raw_cmc = raw_cmc[mask] _cmc = masked_raw_cmc.cumsum() _cmc[_cmc > 1] = 1 cmc += _cmc[:max_rank].astype(np.float32) cmc /= num_repeats all_cmc.append(cmc) # compute AP num_rel = raw_cmc.sum() tmp_cmc = raw_cmc.cumsum() tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)] tmp_cmc = np.asarray(tmp_cmc) * raw_cmc AP = tmp_cmc.sum() / num_rel all_AP.append(AP) num_valid_q += 1. assert num_valid_q > 0, 'Error: all query identities do not appear in gallery' all_cmc = np.asarray(all_cmc).astype(np.float32) cmc = all_cmc.sum(0) / num_valid_q mAP = np.mean(all_AP) return { 'cmc': cmc, 'mAP': mAP, } def eval_market1501(distmat, q_pids, g_pids, q_camids, g_camids, max_rank): """Evaluation with market1501 metric Key: for each query identity, its gallery images from the same camera view are discarded. """ num_q, num_g = distmat.shape if num_g < max_rank: max_rank = num_g print( 'Note: number of gallery samples is quite small, got {}'. format(num_g) ) indices = np.argsort(distmat, axis=1) matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32) # compute cmc curve for each query all_cmc = [] all_AP = [] num_valid_q = 0. # number of valid query for q_idx in range(num_q): # get query pid and camid q_pid = q_pids[q_idx] q_camid = q_camids[q_idx] # remove gallery samples that have the same pid and camid with query order = indices[q_idx] remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid) keep = np.invert(remove) # compute cmc curve raw_cmc = matches[q_idx][ keep] # binary vector, positions with value 1 are correct matches if not np.any(raw_cmc): # this condition is true when query identity does not appear in gallery continue cmc = raw_cmc.cumsum() cmc[cmc > 1] = 1 all_cmc.append(cmc[:max_rank]) num_valid_q += 1. # compute average precision # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision num_rel = raw_cmc.sum() tmp_cmc = raw_cmc.cumsum() tmp_cmc = [x / (i+1.) for i, x in enumerate(tmp_cmc)] tmp_cmc = np.asarray(tmp_cmc) * raw_cmc AP = tmp_cmc.sum() / num_rel all_AP.append(AP) assert num_valid_q > 0, 'Error: all query identities do not appear in gallery' all_cmc = np.asarray(all_cmc).astype(np.float32) cmc = all_cmc.sum(0) / num_valid_q mAP = np.mean(all_AP) return { 'cmc': cmc, 'mAP': mAP, } def evaluate_py( distmat, q_pids, g_pids, q_camids, g_camids, max_rank, eval_metric, q_anns=None, g_anns=None, ): if eval_metric == 'default': return eval_market1501(distmat, q_pids, g_pids, q_camids, g_camids, max_rank) elif eval_metric == 'cuhk03': return eval_cuhk03(distmat, q_pids, g_pids, q_camids, g_camids, max_rank) else: raise ValueError("Incorrect eval_metric value '{}'".format(eval_metric)) def evaluate_rank( distmat, q_pids, g_pids, q_camids, g_camids, max_rank=50, eval_metric='default', q_anns=None, g_anns=None, use_cython=True ): """Evaluates CMC rank. Args: distmat (numpy.ndarray): distance matrix of shape (num_query, num_gallery). q_pids (numpy.ndarray): 1-D array containing person identities of each query instance. g_pids (numpy.ndarray): 1-D array containing person identities of each gallery instance. q_camids (numpy.ndarray): 1-D array containing camera views under which each query instance is captured. g_camids (numpy.ndarray): 1-D array containing camera views under which each gallery instance is captured. max_rank (int, optional): maximum CMC rank to be computed. Default is 50. eval_metric (str, optional): use multi-gallery-shot setting with 'default', single-gallery-shot setting with 'cuhk03' or action-to-replay setting with 'soccernetv3'. Default is 'default'. use_cython (bool, optional): use cython code for evaluation. Default is True. This is highly recommended as the cython code can speed up the cmc computation by more than 10x. This requires Cython to be installed. """ if use_cython and IS_CYTHON_AVAI and (eval_metric == 'default' or eval_metric == 'cuhk03'): return evaluate_py( distmat, q_pids, g_pids, q_camids, g_camids, max_rank, eval_metric ) else: return evaluate_py( distmat, q_pids, g_pids, q_camids, g_camids, max_rank, eval_metric, q_anns=q_anns, g_anns=g_anns ) ================================================ FILE: torchreid/metrics/rank_cylib/Makefile ================================================ all: $(PYTHON) setup.py build_ext --inplace rm -rf build clean: rm -rf build rm -f rank_cy.c *.so ================================================ FILE: torchreid/metrics/rank_cylib/__init__.py ================================================ ================================================ FILE: torchreid/metrics/rank_cylib/rank_cy.pyx ================================================ # cython: boundscheck=False, wraparound=False, nonecheck=False, cdivision=True from __future__ import print_function import cython import numpy as np cimport numpy as np from collections import defaultdict import random """ Compiler directives: https://github.com/cython/cython/wiki/enhancements-compilerdirectives Cython tutorial: https://cython.readthedocs.io/en/latest/src/userguide/numpy_tutorial.html Credit to https://github.com/luzai """ # Main interface cpdef evaluate_cy(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_metric_cuhk03=False): distmat = np.asarray(distmat, dtype=np.float32) q_pids = np.asarray(q_pids, dtype=np.int64) g_pids = np.asarray(g_pids, dtype=np.int64) q_camids = np.asarray(q_camids, dtype=np.int64) g_camids = np.asarray(g_camids, dtype=np.int64) if use_metric_cuhk03: return eval_cuhk03_cy(distmat, q_pids, g_pids, q_camids, g_camids, max_rank) return eval_market1501_cy(distmat, q_pids, g_pids, q_camids, g_camids, max_rank) cpdef eval_cuhk03_cy(float[:,:] distmat, long[:] q_pids, long[:]g_pids, long[:]q_camids, long[:]g_camids, long max_rank): cdef long num_q = distmat.shape[0] cdef long num_g = distmat.shape[1] if num_g < max_rank: max_rank = num_g print('Note: number of gallery samples is quite small, got {}'.format(num_g)) cdef: long num_repeats = 10 long[:,:] indices = np.argsort(distmat, axis=1) long[:,:] matches = (np.asarray(g_pids)[np.asarray(indices)] == np.asarray(q_pids)[:, np.newaxis]).astype(np.int64) float[:,:] all_cmc = np.zeros((num_q, max_rank), dtype=np.float32) float[:] all_AP = np.zeros(num_q, dtype=np.float32) float num_valid_q = 0. # number of valid query long q_idx, q_pid, q_camid, g_idx long[:] order = np.zeros(num_g, dtype=np.int64) long keep float[:] raw_cmc = np.zeros(num_g, dtype=np.float32) # binary vector, positions with value 1 are correct matches float[:] masked_raw_cmc = np.zeros(num_g, dtype=np.float32) float[:] cmc, masked_cmc long num_g_real, num_g_real_masked, rank_idx, rnd_idx unsigned long meet_condition float AP long[:] kept_g_pids, mask float num_rel float[:] tmp_cmc = np.zeros(num_g, dtype=np.float32) float tmp_cmc_sum for q_idx in range(num_q): # get query pid and camid q_pid = q_pids[q_idx] q_camid = q_camids[q_idx] # remove gallery samples that have the same pid and camid with query for g_idx in range(num_g): order[g_idx] = indices[q_idx, g_idx] num_g_real = 0 meet_condition = 0 kept_g_pids = np.zeros(num_g, dtype=np.int64) for g_idx in range(num_g): if (g_pids[order[g_idx]] != q_pid) or (g_camids[order[g_idx]] != q_camid): raw_cmc[num_g_real] = matches[q_idx][g_idx] kept_g_pids[num_g_real] = g_pids[order[g_idx]] num_g_real += 1 if matches[q_idx][g_idx] > 1e-31: meet_condition = 1 if not meet_condition: # this condition is true when query identity does not appear in gallery continue # cuhk03-specific setting g_pids_dict = defaultdict(list) # overhead! for g_idx in range(num_g_real): g_pids_dict[kept_g_pids[g_idx]].append(g_idx) cmc = np.zeros(max_rank, dtype=np.float32) for _ in range(num_repeats): mask = np.zeros(num_g_real, dtype=np.int64) for _, idxs in g_pids_dict.items(): # randomly sample one image for each gallery person rnd_idx = np.random.choice(idxs) #rnd_idx = idxs[0] # use deterministic for debugging mask[rnd_idx] = 1 num_g_real_masked = 0 for g_idx in range(num_g_real): if mask[g_idx] == 1: masked_raw_cmc[num_g_real_masked] = raw_cmc[g_idx] num_g_real_masked += 1 masked_cmc = np.zeros(num_g, dtype=np.float32) function_cumsum(masked_raw_cmc, masked_cmc, num_g_real_masked) for g_idx in range(num_g_real_masked): if masked_cmc[g_idx] > 1: masked_cmc[g_idx] = 1 for rank_idx in range(max_rank): cmc[rank_idx] += masked_cmc[rank_idx] / num_repeats for rank_idx in range(max_rank): all_cmc[q_idx, rank_idx] = cmc[rank_idx] # compute average precision # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision function_cumsum(raw_cmc, tmp_cmc, num_g_real) num_rel = 0 tmp_cmc_sum = 0 for g_idx in range(num_g_real): tmp_cmc_sum += (tmp_cmc[g_idx] / (g_idx + 1.)) * raw_cmc[g_idx] num_rel += raw_cmc[g_idx] all_AP[q_idx] = tmp_cmc_sum / num_rel num_valid_q += 1. assert num_valid_q > 0, 'Error: all query identities do not appear in gallery' # compute averaged cmc cdef float[:] avg_cmc = np.zeros(max_rank, dtype=np.float32) for rank_idx in range(max_rank): for q_idx in range(num_q): avg_cmc[rank_idx] += all_cmc[q_idx, rank_idx] avg_cmc[rank_idx] /= num_valid_q cdef float mAP = 0 for q_idx in range(num_q): mAP += all_AP[q_idx] mAP /= num_valid_q return np.asarray(avg_cmc).astype(np.float32), mAP cpdef eval_market1501_cy(float[:,:] distmat, long[:] q_pids, long[:]g_pids, long[:]q_camids, long[:]g_camids, long max_rank): cdef long num_q = distmat.shape[0] cdef long num_g = distmat.shape[1] if num_g < max_rank: max_rank = num_g print('Note: number of gallery samples is quite small, got {}'.format(num_g)) cdef: long[:,:] indices = np.argsort(distmat, axis=1) long[:,:] matches = (np.asarray(g_pids)[np.asarray(indices)] == np.asarray(q_pids)[:, np.newaxis]).astype(np.int64) float[:,:] all_cmc = np.zeros((num_q, max_rank), dtype=np.float32) float[:] all_AP = np.zeros(num_q, dtype=np.float32) float num_valid_q = 0. # number of valid query long q_idx, q_pid, q_camid, g_idx long[:] order = np.zeros(num_g, dtype=np.int64) long keep float[:] raw_cmc = np.zeros(num_g, dtype=np.float32) # binary vector, positions with value 1 are correct matches float[:] cmc = np.zeros(num_g, dtype=np.float32) long num_g_real, rank_idx unsigned long meet_condition float num_rel float[:] tmp_cmc = np.zeros(num_g, dtype=np.float32) float tmp_cmc_sum for q_idx in range(num_q): # get query pid and camid q_pid = q_pids[q_idx] q_camid = q_camids[q_idx] # remove gallery samples that have the same pid and camid with query for g_idx in range(num_g): order[g_idx] = indices[q_idx, g_idx] num_g_real = 0 meet_condition = 0 for g_idx in range(num_g): if (g_pids[order[g_idx]] != q_pid) or (g_camids[order[g_idx]] != q_camid): raw_cmc[num_g_real] = matches[q_idx][g_idx] num_g_real += 1 if matches[q_idx][g_idx] > 1e-31: meet_condition = 1 if not meet_condition: # this condition is true when query identity does not appear in gallery continue # compute cmc function_cumsum(raw_cmc, cmc, num_g_real) for g_idx in range(num_g_real): if cmc[g_idx] > 1: cmc[g_idx] = 1 for rank_idx in range(max_rank): all_cmc[q_idx, rank_idx] = cmc[rank_idx] num_valid_q += 1. # compute average precision # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision function_cumsum(raw_cmc, tmp_cmc, num_g_real) num_rel = 0 tmp_cmc_sum = 0 for g_idx in range(num_g_real): tmp_cmc_sum += (tmp_cmc[g_idx] / (g_idx + 1.)) * raw_cmc[g_idx] num_rel += raw_cmc[g_idx] all_AP[q_idx] = tmp_cmc_sum / num_rel assert num_valid_q > 0, 'Error: all query identities do not appear in gallery' # compute averaged cmc cdef float[:] avg_cmc = np.zeros(max_rank, dtype=np.float32) for rank_idx in range(max_rank): for q_idx in range(num_q): avg_cmc[rank_idx] += all_cmc[q_idx, rank_idx] avg_cmc[rank_idx] /= num_valid_q cdef float mAP = 0 for q_idx in range(num_q): mAP += all_AP[q_idx] mAP /= num_valid_q return np.asarray(avg_cmc).astype(np.float32), mAP # Compute the cumulative sum cdef void function_cumsum(cython.numeric[:] src, cython.numeric[:] dst, long n): cdef long i dst[0] = src[0] for i in range(1, n): dst[i] = src[i] + dst[i - 1] ================================================ FILE: torchreid/metrics/rank_cylib/setup.py ================================================ import numpy as np from distutils.core import setup from distutils.extension import Extension from Cython.Build import cythonize def numpy_include(): try: numpy_include = np.get_include() except AttributeError: numpy_include = np.get_numpy_include() return numpy_include ext_modules = [ Extension( 'rank_cy', ['rank_cy.pyx'], include_dirs=[numpy_include()], ) ] setup( name='Cython-based reid evaluation code', ext_modules=cythonize(ext_modules) ) ================================================ FILE: torchreid/metrics/rank_cylib/test_cython.py ================================================ from __future__ import print_function import sys import numpy as np import timeit import os.path as osp from torchreid import metrics sys.path.insert(0, osp.dirname(osp.abspath(__file__)) + '/../../..') """ Test the speed of cython-based evaluation code. The speed improvements can be much bigger when using the real reid data, which contains a larger amount of query and gallery images. Note: you might encounter the following error: 'AssertionError: Error: all query identities do not appear in gallery'. This is normal because the inputs are random numbers. Just try again. """ print('*** Compare running time ***') setup = ''' import sys import os.path as osp import numpy as np sys.path.insert(0, osp.dirname(osp.abspath(__file__)) + '/../../..') from torchreid import metrics num_q = 30 num_g = 300 max_rank = 5 distmat = np.random.rand(num_q, num_g) * 20 q_pids = np.random.randint(0, num_q, size=num_q) g_pids = np.random.randint(0, num_g, size=num_g) q_camids = np.random.randint(0, 5, size=num_q) g_camids = np.random.randint(0, 5, size=num_g) ''' print('=> Using market1501\'s metric') pytime = timeit.timeit( 'metrics.evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_cython=False)', setup=setup, number=20 ) cytime = timeit.timeit( 'metrics.evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_cython=True)', setup=setup, number=20 ) print('Python time: {} s'.format(pytime)) print('Cython time: {} s'.format(cytime)) print('Cython is {} times faster than python\n'.format(pytime / cytime)) print('=> Using cuhk03\'s metric') pytime = timeit.timeit( 'metrics.evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_metric_cuhk03=True, use_cython=False)', setup=setup, number=20 ) cytime = timeit.timeit( 'metrics.evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_metric_cuhk03=True, use_cython=True)', setup=setup, number=20 ) print('Python time: {} s'.format(pytime)) print('Cython time: {} s'.format(cytime)) print('Cython is {} times faster than python\n'.format(pytime / cytime)) """ print("=> Check precision") num_q = 30 num_g = 300 max_rank = 5 distmat = np.random.rand(num_q, num_g) * 20 q_pids = np.random.randint(0, num_q, size=num_q) g_pids = np.random.randint(0, num_g, size=num_g) q_camids = np.random.randint(0, 5, size=num_q) g_camids = np.random.randint(0, 5, size=num_g) cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_cython=False) print("Python:\nmAP = {} \ncmc = {}\n".format(mAP, cmc)) cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_cython=True) print("Cython:\nmAP = {} \ncmc = {}\n".format(mAP, cmc)) """ ================================================ FILE: torchreid/models/__init__.py ================================================ from __future__ import absolute_import from .hrnet import hrnet32 from .pcb import * from .mlfn import * from .hacnn import * from .osnet import * from .pvpm import pose_resnet50_256_p4, pose_resnet50_256_p6, pose_resnet50_256_p6_pscore_reg, \ pose_resnet50_256_p4_pscore_reg from .resnet_fastreid import build_resnet_backbone, fastreid_resnet, fastreid_resnet_ibn, fastreid_resnet_nl, \ fastreid_resnet_ibn_nl from .senet import * from .mudeep import * from .nasnet import * from .resnet import * from .densenet import * from .xception import * from .osnet_ain import * from .resnetmid import * from .shufflenet import * from .squeezenet import * from .inceptionv4 import * from .mobilenetv2 import * from .resnet_ibn_a import * from .resnet_ibn_b import * from .shufflenetv2 import * from .inceptionresnetv2 import * from .bpbreid import * __model_factory = { # image classification models 'resnet18': resnet18, 'resnet34': resnet34, 'resnet50': resnet50, 'resnet101': resnet101, 'resnet152': resnet152, 'resnext50_32x4d': resnext50_32x4d, 'resnext101_32x8d': resnext101_32x8d, 'resnet50_fc512': resnet50_fc512, 'se_resnet50': se_resnet50, 'se_resnet50_fc512': se_resnet50_fc512, 'se_resnet101': se_resnet101, 'se_resnext50_32x4d': se_resnext50_32x4d, 'se_resnext101_32x4d': se_resnext101_32x4d, 'densenet121': densenet121, 'densenet169': densenet169, 'densenet201': densenet201, 'densenet161': densenet161, 'densenet121_fc512': densenet121_fc512, 'inceptionresnetv2': inceptionresnetv2, 'inceptionv4': inceptionv4, 'xception': xception, 'resnet50_ibn_a': resnet50_ibn_a, 'resnet50_ibn_b': resnet50_ibn_b, # lightweight models 'nasnsetmobile': nasnetamobile, 'mobilenetv2_x1_0': mobilenetv2_x1_0, 'mobilenetv2_x1_4': mobilenetv2_x1_4, 'shufflenet': shufflenet, 'squeezenet1_0': squeezenet1_0, 'squeezenet1_0_fc512': squeezenet1_0_fc512, 'squeezenet1_1': squeezenet1_1, 'shufflenet_v2_x0_5': shufflenet_v2_x0_5, 'shufflenet_v2_x1_0': shufflenet_v2_x1_0, 'shufflenet_v2_x1_5': shufflenet_v2_x1_5, 'shufflenet_v2_x2_0': shufflenet_v2_x2_0, # reid-specific models 'mudeep': MuDeep, 'resnet50mid': resnet50mid, 'hacnn': HACNN, 'pcb_p6': pcb_p6, 'pcb_p4': pcb_p4, 'mlfn': mlfn, 'osnet_x1_0': osnet_x1_0, 'osnet_x0_75': osnet_x0_75, 'osnet_x0_5': osnet_x0_5, 'osnet_x0_25': osnet_x0_25, 'osnet_ibn_x1_0': osnet_ibn_x1_0, 'osnet_ain_x1_0': osnet_ain_x1_0, 'pose_p4': pose_resnet50_256_p4, 'pose_p6': pose_resnet50_256_p6, 'pose_p6s': pose_resnet50_256_p6_pscore_reg, 'pose_p4s': pose_resnet50_256_p4_pscore_reg, 'hrnet32': hrnet32, 'bpbreid': bpbreid, 'fastreid_resnet': fastreid_resnet, 'fastreid_resnet_ibn': fastreid_resnet_ibn, 'fastreid_resnet_nl': fastreid_resnet_nl, 'fastreid_resnet_ibn_nl': fastreid_resnet_ibn_nl, } def show_avai_models(): """Displays available models. Examples:: >>> from torchreid import models >>> models.show_avai_models() """ print(list(__model_factory.keys())) def build_model( name, num_classes, loss='softmax', pretrained=True, use_gpu=True, **kwargs ): """A function wrapper for building a model. Args: name (str): model name. num_classes (int): number of training identities. loss (str, optional): loss function to optimize the model. Currently supports "softmax" and "triplet". Default is "softmax". pretrained (bool, optional): whether to load ImageNet-pretrained weights. Default is True. use_gpu (bool, optional): whether to use gpu. Default is True. Returns: nn.Module Examples:: >>> from torchreid import models >>> model = models.build_model('resnet50', 751, loss='softmax') """ avai_models = list(__model_factory.keys()) if name not in avai_models: raise KeyError( 'Unknown model: {}. Must be one of {}'.format(name, avai_models) ) return __model_factory[name]( num_classes=num_classes, loss=loss, pretrained=pretrained, use_gpu=use_gpu, **kwargs ) ================================================ FILE: torchreid/models/bpbreid.py ================================================ from __future__ import division, absolute_import import torch import torch.nn.functional as F import numpy as np from torch import nn from torchreid import models from torchreid.utils.constants import * __all__ = [ 'bpbreid' ] class BPBreID(nn.Module): """Posed based feature extraction network """ def __init__(self, num_classes, pretrained, loss, model_cfg, horizontal_stripes=False, **kwargs): super(BPBreID, self).__init__() # Init config self.model_cfg = model_cfg # number of training classes/identities self.num_classes = num_classes # number of parts K self.parts_num = self.model_cfg.masks.parts_num # whether to perform horizontal stripes pooling similar to PCB self.horizontal_stripes = horizontal_stripes # use shared weights/parameters between each part branch for the identity classifier self.shared_parts_id_classifier = self.model_cfg.shared_parts_id_classifier # at test time, perform a 'soft' or 'hard' merging of the learned attention maps with the external part masks self.test_use_target_segmentation = self.model_cfg.test_use_target_segmentation # use continuous or binary visibility scores at train time: self.training_binary_visibility_score = self.model_cfg.training_binary_visibility_score # use continuous or binary visibility scores at test time: self.testing_binary_visibility_score = self.model_cfg.testing_binary_visibility_score # Init backbone feature extractor self.backbone_appearance_feature_extractor = models.build_model(self.model_cfg.backbone, num_classes, loss=loss, pretrained=pretrained, last_stride=self.model_cfg.last_stride, enable_dim_reduction=(self.model_cfg.dim_reduce=='before_pooling'), dim_reduction_channels=self.model_cfg.dim_reduce_output, pretrained_path=self.model_cfg.hrnet_pretrained_path ) self.spatial_feature_size = self.backbone_appearance_feature_extractor.feature_dim # Init dim reduce layers self.init_dim_reduce_layers(self.model_cfg.dim_reduce, self.spatial_feature_size, self.model_cfg.dim_reduce_output) # Init pooling layers self.global_pooling_head = nn.AdaptiveAvgPool2d(1) self.foreground_attention_pooling_head = GlobalAveragePoolingHead(self.dim_reduce_output) self.background_attention_pooling_head = GlobalAveragePoolingHead(self.dim_reduce_output) self.parts_attention_pooling_head = init_part_attention_pooling_head(self.model_cfg.normalization, self.model_cfg.pooling, self.dim_reduce_output) # Init parts classifier self.learnable_attention_enabled = self.model_cfg.learnable_attention_enabled self.pixel_classifier = PixelToPartClassifier(self.spatial_feature_size, self.parts_num) # Init id classifier self.global_identity_classifier = BNClassifier(self.dim_reduce_output, self.num_classes) self.background_identity_classifier = BNClassifier(self.dim_reduce_output, self.num_classes) self.foreground_identity_classifier = BNClassifier(self.dim_reduce_output, self.num_classes) self.concat_parts_identity_classifier = BNClassifier(self.parts_num * self.dim_reduce_output, self.num_classes) if self.shared_parts_id_classifier: # the same identity classifier weights are used for each part branch self.parts_identity_classifier = BNClassifier(self.dim_reduce_output, self.num_classes) else: # each part branch has its own identity classifier self.parts_identity_classifier = nn.ModuleList( [ BNClassifier(self.dim_reduce_output, self.num_classes) for _ in range(self.parts_num) ] ) def init_dim_reduce_layers(self, dim_reduce_mode, spatial_feature_size, dim_reduce_output): self.dim_reduce_output = dim_reduce_output self.after_pooling_dim_reduce = False self.before_pooling_dim_reduce = None if dim_reduce_mode == 'before_pooling': self.before_pooling_dim_reduce = BeforePoolingDimReduceLayer(spatial_feature_size, dim_reduce_output) self.spatial_feature_size = dim_reduce_output elif dim_reduce_mode == 'after_pooling': self.after_pooling_dim_reduce = True self.global_after_pooling_dim_reduce = AfterPoolingDimReduceLayer(spatial_feature_size, dim_reduce_output) self.foreground_after_pooling_dim_reduce = AfterPoolingDimReduceLayer(spatial_feature_size, dim_reduce_output) self.background_after_pooling_dim_reduce = AfterPoolingDimReduceLayer(spatial_feature_size, dim_reduce_output) self.parts_after_pooling_dim_reduce = AfterPoolingDimReduceLayer(spatial_feature_size, dim_reduce_output) elif dim_reduce_mode == 'before_and_after_pooling': self.before_pooling_dim_reduce = BeforePoolingDimReduceLayer(spatial_feature_size, dim_reduce_output * 2) spatial_feature_size = dim_reduce_output * 2 self.spatial_feature_size = spatial_feature_size self.after_pooling_dim_reduce = True self.global_after_pooling_dim_reduce = AfterPoolingDimReduceLayer(spatial_feature_size, dim_reduce_output) self.foreground_after_pooling_dim_reduce = AfterPoolingDimReduceLayer(spatial_feature_size, dim_reduce_output) self.background_after_pooling_dim_reduce = AfterPoolingDimReduceLayer(spatial_feature_size, dim_reduce_output) self.parts_after_pooling_dim_reduce = AfterPoolingDimReduceLayer(spatial_feature_size, dim_reduce_output) elif dim_reduce_mode == 'after_pooling_with_dropout': self.after_pooling_dim_reduce = True self.global_after_pooling_dim_reduce = AfterPoolingDimReduceLayer(spatial_feature_size, dim_reduce_output, 0.5) self.foreground_after_pooling_dim_reduce = AfterPoolingDimReduceLayer(spatial_feature_size, dim_reduce_output, 0.5) self.background_after_pooling_dim_reduce = AfterPoolingDimReduceLayer(spatial_feature_size, dim_reduce_output, 0.5) self.parts_after_pooling_dim_reduce = AfterPoolingDimReduceLayer(spatial_feature_size, dim_reduce_output, 0.5) else: self.dim_reduce_output = spatial_feature_size def forward(self, images, external_parts_masks=None): """ :param images: images tensor of size [N, C, Hi, Wi], where N is the batch size, C channel depth (3 for RGB), and (Hi, Wi) are the image height and width. :param external_parts_masks: masks tensor of size [N, K+1, Hm, Wm], where N is the batch size, K is the number parts, and (Hm, Wm) are the masks height and width. The first index (index 0) along the parts K+1 dimension is the background by convention. The masks are expected to have values in the range [0, 1]. Spatial entry at location external_parts_masks[i, k+1, h, w] is the probability that the pixel at location (h, w) belongs to part k for batch sample i. The masks are NOT expected to be of the same size as the images. :return: """ # Global spatial_features spatial_features = self.backbone_appearance_feature_extractor(images) # [N, D, Hf, Wf] N, _, Hf, Wf = spatial_features.shape if self.before_pooling_dim_reduce is not None \ and spatial_features.shape[1] != self.dim_reduce_output: # When HRNet used as backbone, already done spatial_features = self.before_pooling_dim_reduce(spatial_features) # [N, dim_reduce_output, Hf, Wf] # Pixels classification and parts attention weights if self.horizontal_stripes: pixels_cls_scores = None feature_map_shape = (Hf, Wf) stripes_range = np.round(np.arange(0, self.parts_num + 1) * feature_map_shape[0] / self.parts_num).astype(int) pcb_masks = torch.zeros((self.parts_num, feature_map_shape[0], feature_map_shape[1])) for i in range(0, stripes_range.size - 1): pcb_masks[i, stripes_range[i]:stripes_range[i + 1], :] = 1 pixels_parts_probabilities = pcb_masks pixels_parts_probabilities.requires_grad = False elif self.learnable_attention_enabled: pixels_cls_scores = self.pixel_classifier(spatial_features) # [N, K, Hf, Wf] pixels_parts_probabilities = F.softmax(pixels_cls_scores, dim=1) else: pixels_cls_scores = None assert external_parts_masks is not None external_parts_masks = external_parts_masks.type(spatial_features.dtype) pixels_parts_probabilities = nn.functional.interpolate(external_parts_masks, (Hf, Wf), mode='bilinear', align_corners=True) pixels_parts_probabilities.requires_grad = False assert pixels_parts_probabilities.max() <= 1 and pixels_parts_probabilities.min() >= 0 background_masks = pixels_parts_probabilities[:, 0] parts_masks = pixels_parts_probabilities[:, 1:] # Explicit pixels segmentation of re-id target using external part masks if not self.training and self.test_use_target_segmentation == 'hard': assert external_parts_masks is not None # hard masking external_parts_masks = nn.functional.interpolate(external_parts_masks, (Hf, Wf), mode='bilinear', align_corners=True) target_segmentation_mask = external_parts_masks[:, 1::].max(dim=1)[0] > external_parts_masks[:, 0] background_masks = ~target_segmentation_mask parts_masks[background_masks.unsqueeze(1).expand_as(parts_masks)] = 1e-12 if not self.training and self.test_use_target_segmentation == 'soft': assert external_parts_masks is not None # soft masking external_parts_masks = nn.functional.interpolate(external_parts_masks, (Hf, Wf), mode='bilinear', align_corners=True) parts_masks = parts_masks * external_parts_masks[:, 1::] # foreground_masks = parts_masks.sum(dim=1) foreground_masks = parts_masks.max(dim=1)[0] global_masks = torch.ones_like(foreground_masks) # Parts visibility if (self.training and self.training_binary_visibility_score) or (not self.training and self.testing_binary_visibility_score): pixels_parts_predictions = pixels_parts_probabilities.argmax(dim=1) # [N, Hf, Wf] pixels_parts_predictions_one_hot = F.one_hot(pixels_parts_predictions, self.parts_num + 1).permute(0, 3, 1, 2) # [N, K+1, Hf, Wf] parts_visibility = pixels_parts_predictions_one_hot.amax(dim=(2, 3)).to(torch.bool) # [N, K+1] else: parts_visibility = pixels_parts_probabilities.amax(dim=(2, 3)) # [N, K+1] background_visibility = parts_visibility[:, 0] # [N] foreground_visibility = parts_visibility.amax(dim=1) # [N] parts_visibility = parts_visibility[:, 1:] # [N, K] concat_parts_visibility = foreground_visibility global_visibility = torch.ones_like(foreground_visibility) # [N] # Global embedding global_embeddings = self.global_pooling_head(spatial_features).view(N, -1) # [N, D] # Foreground and background embeddings foreground_embeddings = self.foreground_attention_pooling_head(spatial_features, foreground_masks.unsqueeze(1)).flatten(1, 2) # [N, D] background_embeddings = self.background_attention_pooling_head(spatial_features, background_masks.unsqueeze(1)).flatten(1, 2) # [N, D] # Part features parts_embeddings = self.parts_attention_pooling_head(spatial_features, parts_masks) # [N, K, D] # Dim reduction if self.after_pooling_dim_reduce: global_embeddings = self.global_after_pooling_dim_reduce(global_embeddings) # [N, D] foreground_embeddings = self.foreground_after_pooling_dim_reduce(foreground_embeddings) # [N, D] background_embeddings = self.background_after_pooling_dim_reduce(background_embeddings) # [N, D] parts_embeddings = self.parts_after_pooling_dim_reduce(parts_embeddings) # [N, M, D] # Concatenated part features concat_parts_embeddings = parts_embeddings.flatten(1, 2) # [N, K*D] # Identity classification scores bn_global_embeddings, global_cls_score = self.global_identity_classifier(global_embeddings) # [N, D], [N, num_classes] bn_background_embeddings, background_cls_score = self.background_identity_classifier(background_embeddings) # [N, D], [N, num_classes] bn_foreground_embeddings, foreground_cls_score = self.foreground_identity_classifier(foreground_embeddings) # [N, D], [N, num_classes] bn_concat_parts_embeddings, concat_parts_cls_score = self.concat_parts_identity_classifier(concat_parts_embeddings) # [N, K*D], [N, num_classes] bn_parts_embeddings, parts_cls_score = self.parts_identity_classification(self.dim_reduce_output, N, parts_embeddings) # [N, K, D], [N, K, num_classes] # Outputs embeddings = { GLOBAL: global_embeddings, # [N, D] BACKGROUND: background_embeddings, # [N, D] FOREGROUND: foreground_embeddings, # [N, D] CONCAT_PARTS: concat_parts_embeddings, # [N, K*D] PARTS: parts_embeddings, # [N, K, D] BN_GLOBAL: bn_global_embeddings, # [N, D] BN_BACKGROUND: bn_background_embeddings, # [N, D] BN_FOREGROUND: bn_foreground_embeddings, # [N, D] BN_CONCAT_PARTS: bn_concat_parts_embeddings, # [N, K*D] BN_PARTS: bn_parts_embeddings, # [N, K, D] } visibility_scores = { GLOBAL: global_visibility, # [N] BACKGROUND: background_visibility, # [N] FOREGROUND: foreground_visibility, # [N] CONCAT_PARTS: concat_parts_visibility, # [N] PARTS: parts_visibility, # [N, K] } id_cls_scores = { GLOBAL: global_cls_score, # [N, num_classes] BACKGROUND: background_cls_score, # [N, num_classes] FOREGROUND: foreground_cls_score, # [N, num_classes] CONCAT_PARTS: concat_parts_cls_score, # [N, num_classes] PARTS: parts_cls_score, # [N, K, num_classes] } masks = { GLOBAL: global_masks, # [N, Hf, Wf] BACKGROUND: background_masks, # [N, Hf, Wf] FOREGROUND: foreground_masks, # [N, Hf, Wf] CONCAT_PARTS: foreground_masks, # [N, Hf, Wf] PARTS: parts_masks, # [N, K, Hf, Wf] } return embeddings, visibility_scores, id_cls_scores, pixels_cls_scores, spatial_features, masks def parts_identity_classification(self, D, N, parts_embeddings): if self.shared_parts_id_classifier: # apply the same classifier on each part embedding, classifier weights are therefore shared across parts parts_embeddings = parts_embeddings.flatten(0, 1) # [N*K, D] bn_part_embeddings, part_cls_score = self.parts_identity_classifier(parts_embeddings) bn_part_embeddings = bn_part_embeddings.view([N, self.parts_num, D]) part_cls_score = part_cls_score.view([N, self.parts_num, -1]) else: # apply K classifiers on each of the K part embedding, each part has therefore it's own classifier weights scores = [] embeddings = [] for i, parts_identity_classifier in enumerate(self.parts_identity_classifier): bn_part_embeddings, part_cls_score = parts_identity_classifier(parts_embeddings[:, i]) scores.append(part_cls_score.unsqueeze(1)) embeddings.append(bn_part_embeddings.unsqueeze(1)) part_cls_score = torch.cat(scores, 1) bn_part_embeddings = torch.cat(embeddings, 1) return bn_part_embeddings, part_cls_score ######################################## # Dimensionality reduction layers # ######################################## class BeforePoolingDimReduceLayer(nn.Module): def __init__(self, input_dim, output_dim): super(BeforePoolingDimReduceLayer, self).__init__() layers = [] layers.append( nn.Conv2d( input_dim, output_dim, 1, stride=1, padding=0 ) ) layers.append(nn.BatchNorm2d(output_dim)) layers.append(nn.ReLU(inplace=True)) self.layers = nn.Sequential(*layers) self._init_params() def forward(self, x): return self.layers(x) def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) class AfterPoolingDimReduceLayer(nn.Module): def __init__(self, input_dim, output_dim, dropout_p=None): super(AfterPoolingDimReduceLayer, self).__init__() # dim reduction used in ResNet and PCB layers = [] layers.append( nn.Linear( input_dim, output_dim, bias=True ) ) layers.append(nn.BatchNorm1d(output_dim)) layers.append(nn.ReLU(inplace=True)) if dropout_p is not None: layers.append(nn.opout(p=dropout_p)) self.layers = nn.Sequential(*layers) self._init_params() def forward(self, x): if len(x.size()) == 3: N, K, _ = x.size() # [N, K, input_dim] x = x.flatten(0, 1) x = self.layers(x) x = x.view(N, K, -1) else: x = self.layers(x) return x def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) ######################################## # Classifiers # ######################################## class PixelToPartClassifier(nn.Module): def __init__(self, dim_reduce_output, parts_num): super(PixelToPartClassifier, self).__init__() self.bn = torch.nn.BatchNorm2d(dim_reduce_output) self.classifier = nn.Conv2d(in_channels=dim_reduce_output, out_channels=parts_num + 1, kernel_size=1, stride=1, padding=0) self._init_params() def forward(self, x): x = self.bn(x) return self.classifier(x) def _init_params(self): for m in self.modules(): if isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Conv2d): nn.init.normal_(m.weight, 0, 0.001) # ResNet = 0.01, Bof and ISP-reid = 0.001 if m.bias is not None: nn.init.constant_(m.bias, 0) class BNClassifier(nn.Module): # Source: https://github.com/upgirlnana/Pytorch-Person-REID-Baseline-Bag-of-Tricks def __init__(self, in_dim, class_num): super(BNClassifier, self).__init__() self.in_dim = in_dim self.class_num = class_num self.bn = nn.BatchNorm1d(self.in_dim) self.bn.bias.requires_grad_(False) # BoF: this doesn't have a big impact on perf according to author on github self.classifier = nn.Linear(self.in_dim, self.class_num, bias=False) self._init_params() def forward(self, x): feature = self.bn(x) cls_score = self.classifier(feature) return feature, cls_score def _init_params(self): for m in self.modules(): if isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.001) # ResNet = 0.01, Bof and ISP-reid = 0.001 if m.bias is not None: nn.init.constant_(m.bias, 0) ######################################## # Pooling heads # ######################################## def init_part_attention_pooling_head(normalization, pooling, dim_reduce_output): if pooling == 'gap': parts_attention_pooling_head = GlobalAveragePoolingHead(dim_reduce_output, normalization) elif pooling == 'gmp': parts_attention_pooling_head = GlobalMaxPoolingHead(dim_reduce_output, normalization) elif pooling == 'gwap': parts_attention_pooling_head = GlobalWeightedAveragePoolingHead(dim_reduce_output, normalization) else: raise ValueError('pooling type {} not supported'.format(pooling)) return parts_attention_pooling_head class GlobalMaskWeightedPoolingHead(nn.Module): def __init__(self, depth, normalization='identity'): super().__init__() if normalization == 'identity': self.normalization = nn.Identity() elif normalization == 'batch_norm_3d': self.normalization = torch.nn.BatchNorm3d(depth, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) elif normalization == 'batch_norm_2d': self.normalization = torch.nn.BatchNorm2d(depth, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) elif normalization == 'batch_norm_1d': self.normalization = torch.nn.BatchNorm1d(depth, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) else: raise ValueError('normalization type {} not supported'.format(normalization)) def forward(self, features, part_masks): part_masks = torch.unsqueeze(part_masks, 2) features = torch.unsqueeze(features, 1) parts_features = torch.mul(part_masks, features) N, M, _, _, _ = parts_features.size() parts_features = parts_features.flatten(0, 1) parts_features = self.normalization(parts_features) parts_features = self.global_pooling(parts_features) parts_features = parts_features.view(N, M, -1) return parts_features def _init_params(self): for m in self.modules(): if isinstance(m, nn.BatchNorm1d) or isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm3d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.001) # ResNet = 0.01, Bof and ISP-reid = 0.001 if m.bias is not None: nn.init.constant_(m.bias, 0) class GlobalMaxPoolingHead(GlobalMaskWeightedPoolingHead): global_pooling = nn.AdaptiveMaxPool2d((1, 1)) class GlobalAveragePoolingHead(GlobalMaskWeightedPoolingHead): global_pooling = nn.AdaptiveAvgPool2d((1, 1)) class GlobalWeightedAveragePoolingHead(GlobalMaskWeightedPoolingHead): def forward(self, features, part_masks): part_masks = torch.unsqueeze(part_masks, 2) features = torch.unsqueeze(features, 1) parts_features = torch.mul(part_masks, features) N, M, _, _, _ = parts_features.size() parts_features = parts_features.flatten(0, 1) parts_features = self.normalization(parts_features) parts_features = torch.sum(parts_features, dim=(-2, -1)) part_masks_sum = torch.sum(part_masks.flatten(0, 1), dim=(-2, -1)) part_masks_sum = torch.clamp(part_masks_sum, min=1e-6) parts_features_avg = torch.div(parts_features, part_masks_sum) parts_features = parts_features_avg.view(N, M, -1) return parts_features ######################################## # Constructors # ######################################## def bpbreid(num_classes, loss='part_based', pretrained=True, config=None, **kwargs): model = BPBreID( num_classes, pretrained, loss, config.model.bpbreid, **kwargs ) return model def pcb(num_classes, loss='part_based', pretrained=True, config=None, **kwargs): config.model.bpbreid.learnable_attention_enabled = False model = BPBreID( num_classes, pretrained, loss, config.model.bpbreid, horizontal_stipes=True, config=config, **kwargs ) return model def bot(num_classes, loss='part_based', pretrained=True, config=None, **kwargs): config.model.bpbreid.masks.parts_num = 1 config.model.bpbreid.learnable_attention_enabled = False model = BPBreID( num_classes, pretrained, loss, config.model.bpbreid, horizontal_stipes=True, config=config, **kwargs ) return model ================================================ FILE: torchreid/models/compact_bilinear_pooling.py ================================================ import types import torch import torch.nn as nn from torch.autograd import Function def CountSketchFn_forward(h, s, output_size, x, force_cpu_scatter_add=False): x_size = tuple(x.size()) s_view = (1,) * (len(x_size) - 1) + (x_size[-1],) out_size = x_size[:-1] + (output_size,) # Broadcast s and compute x * s s = s.view(s_view) xs = x * s # Broadcast h then compute h: # out[h_i] += x_i * s_i h = h.view(s_view).expand(x_size) if force_cpu_scatter_add: out = x.new(*out_size).zero_().cpu() return out.scatter_add_(-1, h.cpu(), xs.cpu()).cuda() else: out = x.new(*out_size).zero_() return out.scatter_add_(-1, h, xs) def CountSketchFn_backward(h, s, x_size, grad_output): s_view = (1,) * (len(x_size) - 1) + (x_size[-1],) s = s.view(s_view) h = h.view(s_view).expand(x_size) grad_x = grad_output.gather(-1, h) grad_x = grad_x * s return grad_x class CountSketchFn(Function): @staticmethod def forward(ctx, h, s, output_size, x, force_cpu_scatter_add=False): x_size = tuple(x.size()) ctx.save_for_backward(h, s) ctx.x_size = tuple(x.size()) return CountSketchFn_forward(h, s, output_size, x, force_cpu_scatter_add) @staticmethod def backward(ctx, grad_output): h, s = ctx.saved_variables grad_x = CountSketchFn_backward(h, s, ctx.x_size, grad_output) return None, None, None, grad_x class CountSketch(nn.Module): r"""Compute the count sketch over an input signal. .. math:: out_j = \sum_{i : j = h_i} s_i x_i Args: input_size (int): Number of channels in the input array output_size (int): Number of channels in the output sketch h (array, optional): Optional array of size input_size of indices in the range [0,output_size] s (array, optional): Optional array of size input_size of -1 and 1. .. note:: If h and s are None, they will be automatically be generated using LongTensor.random_. Shape: - Input: (...,input_size) - Output: (...,output_size) References: Yang Gao et al. "Compact Bilinear Pooling" in Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2016). Akira Fukui et al. "Multimodal Compact Bilinear Pooling for Visual Question Answering and Visual Grounding", arXiv:1606.01847 (2016). """ def __init__(self, input_size, output_size, h=None, s=None): super(CountSketch, self).__init__() self.input_size = input_size self.output_size = output_size if h is None: h = torch.LongTensor(input_size).random_(0, output_size) if s is None: s = 2 * torch.Tensor(input_size).random_(0, 2) - 1 # The Variable h being a list of indices, # If the type of this module is changed (e.g. float to double), # the variable h should remain a LongTensor # therefore we force float() and double() to be no-ops on the variable h. def identity(self): return self h.float = types.MethodType(identity, h) h.double = types.MethodType(identity, h) self.register_buffer('h', h) self.register_buffer('s', s) def forward(self, x): x_size = list(x.size()) assert (x_size[-1] == self.input_size) return CountSketchFn.apply(self.h, self.s, self.output_size, x) def ComplexMultiply_forward(X_re, X_im, Y_re, Y_im): Z_re = torch.addcmul(X_re * Y_re, -1, X_im, Y_im) Z_im = torch.addcmul(X_re * Y_im, 1, X_im, Y_re) return Z_re, Z_im def ComplexMultiply_backward(X_re, X_im, Y_re, Y_im, grad_Z_re, grad_Z_im): grad_X_re = torch.addcmul(grad_Z_re * Y_re, 1, grad_Z_im, Y_im) grad_X_im = torch.addcmul(grad_Z_im * Y_re, -1, grad_Z_re, Y_im) grad_Y_re = torch.addcmul(grad_Z_re * X_re, 1, grad_Z_im, X_im) grad_Y_im = torch.addcmul(grad_Z_im * X_re, -1, grad_Z_re, X_im) return grad_X_re, grad_X_im, grad_Y_re, grad_Y_im class ComplexMultiply(torch.autograd.Function): @staticmethod def forward(ctx, X_re, X_im, Y_re, Y_im): ctx.save_for_backward(X_re, X_im, Y_re, Y_im) return ComplexMultiply_forward(X_re, X_im, Y_re, Y_im) @staticmethod def backward(ctx, grad_Z_re, grad_Z_im): X_re, X_im, Y_re, Y_im = ctx.saved_tensors return ComplexMultiply_backward(X_re, X_im, Y_re, Y_im, grad_Z_re, grad_Z_im) class CompactBilinearPoolingFn(Function): @staticmethod def forward(ctx, h1, s1, h2, s2, output_size, x, y, force_cpu_scatter_add=False): ctx.save_for_backward(h1, s1, h2, s2, x, y) ctx.x_size = tuple(x.size()) ctx.y_size = tuple(y.size()) ctx.force_cpu_scatter_add = force_cpu_scatter_add ctx.output_size = output_size # Compute the count sketch of each input px = CountSketchFn_forward(h1, s1, output_size, x, force_cpu_scatter_add) fx = torch.rfft(px, 1) re_fx = fx.select(-1, 0) im_fx = fx.select(-1, 1) del px py = CountSketchFn_forward(h2, s2, output_size, y, force_cpu_scatter_add) fy = torch.rfft(py, 1) re_fy = fy.select(-1, 0) im_fy = fy.select(-1, 1) del py # Convolution of the two sketch using an FFT. # Compute the FFT of each sketch # Complex multiplication re_prod, im_prod = ComplexMultiply_forward(re_fx, im_fx, re_fy, im_fy) # Back to real domain # The imaginary part should be zero's re = torch.irfft(torch.stack((re_prod, im_prod), re_prod.dim()), 1, signal_sizes=(output_size,)) return re @staticmethod def backward(ctx, grad_output): h1, s1, h2, s2, x, y = ctx.saved_tensors # Recompute part of the forward pass to get the input to the complex product # Compute the count sketch of each input px = CountSketchFn_forward(h1, s1, ctx.output_size, x, ctx.force_cpu_scatter_add) py = CountSketchFn_forward(h2, s2, ctx.output_size, y, ctx.force_cpu_scatter_add) # Then convert the output to Fourier domain grad_output = grad_output.contiguous() grad_prod = torch.rfft(grad_output, 1) grad_re_prod = grad_prod.select(-1, 0) grad_im_prod = grad_prod.select(-1, 1) # Compute the gradient of x first then y # Gradient of x # Recompute fy fy = torch.rfft(py, 1) re_fy = fy.select(-1, 0) im_fy = fy.select(-1, 1) del py # Compute the gradient of fx, then back to temporal space grad_re_fx = torch.addcmul(grad_re_prod * re_fy, 1, grad_im_prod, im_fy) grad_im_fx = torch.addcmul(grad_im_prod * re_fy, -1, grad_re_prod, im_fy) grad_fx = torch.irfft(torch.stack((grad_re_fx, grad_im_fx), grad_re_fx.dim()), 1, signal_sizes=(ctx.output_size,)) # Finally compute the gradient of x grad_x = CountSketchFn_backward(h1, s1, ctx.x_size, grad_fx) del re_fy, im_fy, grad_re_fx, grad_im_fx, grad_fx # Gradient of y # Recompute fx fx = torch.rfft(px, 1) re_fx = fx.select(-1, 0) im_fx = fx.select(-1, 1) del px # Compute the gradient of fy, then back to temporal space grad_re_fy = torch.addcmul(grad_re_prod * re_fx, 1, grad_im_prod, im_fx) grad_im_fy = torch.addcmul(grad_im_prod * re_fx, -1, grad_re_prod, im_fx) grad_fy = torch.irfft(torch.stack((grad_re_fy, grad_im_fy), grad_re_fy.dim()), 1, signal_sizes=(ctx.output_size,)) # Finally compute the gradient of y grad_y = CountSketchFn_backward(h2, s2, ctx.y_size, grad_fy) del re_fx, im_fx, grad_re_fy, grad_im_fy, grad_fy return None, None, None, None, None, grad_x, grad_y, None class CompactBilinearPooling(nn.Module): r"""Compute the compact bilinear pooling between two input array x and y .. math:: out = \Psi (x,h_1,s_1) \ast \Psi (y,h_2,s_2) Args: input_size1 (int): Number of channels in the first input array input_size2 (int): Number of channels in the second input array output_size (int): Number of channels in the output array h1 (array, optional): Optional array of size input_size of indices in the range [0,output_size] s1 (array, optional): Optional array of size input_size of -1 and 1. h2 (array, optional): Optional array of size input_size of indices in the range [0,output_size] s2 (array, optional): Optional array of size input_size of -1 and 1. force_cpu_scatter_add (boolean, optional): Force the scatter_add operation to run on CPU for testing purposes .. note:: If h1, s1, s2, h2 are None, they will be automatically be generated using LongTensor.random_. Shape: - Input 1: (...,input_size1) - Input 2: (...,input_size2) - Output: (...,output_size) References: Yang Gao et al. "Compact Bilinear Pooling" in Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (2016). Akira Fukui et al. "Multimodal Compact Bilinear Pooling for Visual Question Answering and Visual Grounding", arXiv:1606.01847 (2016). """ def __init__(self, input1_size, input2_size, output_size, h1=None, s1=None, h2=None, s2=None, force_cpu_scatter_add=False): super(CompactBilinearPooling, self).__init__() self.add_module('sketch1', CountSketch(input1_size, output_size, h1, s1)) self.add_module('sketch2', CountSketch(input2_size, output_size, h2, s2)) self.output_size = output_size self.force_cpu_scatter_add = force_cpu_scatter_add def forward(self, x, y=None): if y is None: y = x return CompactBilinearPoolingFn.apply(self.sketch1.h, self.sketch1.s, self.sketch2.h, self.sketch2.s, self.output_size, x, y, self.force_cpu_scatter_add) if __name__ == '__main__': input_size = 2048 output_size = 16000 mcb = CompactBilinearPooling(input_size, input_size, output_size).cuda() x = torch.rand(4, input_size).cuda() y = torch.rand(4, input_size).cuda() z = mcb(x, y) print(z) ================================================ FILE: torchreid/models/densenet.py ================================================ """ Code source: https://github.com/pytorch/vision """ from __future__ import division, absolute_import import re from collections import OrderedDict import torch import torch.nn as nn from torch.nn import functional as F from torch.utils import model_zoo __all__ = [ 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'densenet121_fc512' ] model_urls = { 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth', 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth', 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth', 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth', } class _DenseLayer(nn.Sequential): def __init__(self, num_input_features, growth_rate, bn_size, drop_rate): super(_DenseLayer, self).__init__() self.add_module('norm1', nn.BatchNorm2d(num_input_features)), self.add_module('relu1', nn.ReLU(inplace=True)), self.add_module( 'conv1', nn.Conv2d( num_input_features, bn_size * growth_rate, kernel_size=1, stride=1, bias=False ) ), self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)), self.add_module('relu2', nn.ReLU(inplace=True)), self.add_module( 'conv2', nn.Conv2d( bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False ) ), self.drop_rate = drop_rate def forward(self, x): new_features = super(_DenseLayer, self).forward(x) if self.drop_rate > 0: new_features = F.dropout( new_features, p=self.drop_rate, training=self.training ) return torch.cat([x, new_features], 1) class _DenseBlock(nn.Sequential): def __init__( self, num_layers, num_input_features, bn_size, growth_rate, drop_rate ): super(_DenseBlock, self).__init__() for i in range(num_layers): layer = _DenseLayer( num_input_features + i*growth_rate, growth_rate, bn_size, drop_rate ) self.add_module('denselayer%d' % (i+1), layer) class _Transition(nn.Sequential): def __init__(self, num_input_features, num_output_features): super(_Transition, self).__init__() self.add_module('norm', nn.BatchNorm2d(num_input_features)) self.add_module('relu', nn.ReLU(inplace=True)) self.add_module( 'conv', nn.Conv2d( num_input_features, num_output_features, kernel_size=1, stride=1, bias=False ) ) self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) class DenseNet(nn.Module): """Densely connected network. Reference: Huang et al. Densely Connected Convolutional Networks. CVPR 2017. Public keys: - ``densenet121``: DenseNet121. - ``densenet169``: DenseNet169. - ``densenet201``: DenseNet201. - ``densenet161``: DenseNet161. - ``densenet121_fc512``: DenseNet121 + FC. """ def __init__( self, num_classes, loss, growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, bn_size=4, drop_rate=0, fc_dims=None, dropout_p=None, **kwargs ): super(DenseNet, self).__init__() self.loss = loss # First convolution self.features = nn.Sequential( OrderedDict( [ ( 'conv0', nn.Conv2d( 3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False ) ), ('norm0', nn.BatchNorm2d(num_init_features)), ('relu0', nn.ReLU(inplace=True)), ( 'pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1) ), ] ) ) # Each denseblock num_features = num_init_features for i, num_layers in enumerate(block_config): block = _DenseBlock( num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate ) self.features.add_module('denseblock%d' % (i+1), block) num_features = num_features + num_layers*growth_rate if i != len(block_config) - 1: trans = _Transition( num_input_features=num_features, num_output_features=num_features // 2 ) self.features.add_module('transition%d' % (i+1), trans) num_features = num_features // 2 # Final batch norm self.features.add_module('norm5', nn.BatchNorm2d(num_features)) self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.feature_dim = num_features self.fc = self._construct_fc_layer(fc_dims, num_features, dropout_p) # Linear layer self.classifier = nn.Linear(self.feature_dim, num_classes) self._init_params() def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): """Constructs fully connected layer. Args: fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed input_dim (int): input dimension dropout_p (float): dropout probability, if None, dropout is unused """ if fc_dims is None: self.feature_dim = input_dim return None assert isinstance( fc_dims, (list, tuple) ), 'fc_dims must be either list or tuple, but got {}'.format( type(fc_dims) ) layers = [] for dim in fc_dims: layers.append(nn.Linear(input_dim, dim)) layers.append(nn.BatchNorm1d(dim)) layers.append(nn.ReLU(inplace=True)) if dropout_p is not None: layers.append(nn.Dropout(p=dropout_p)) input_dim = dim self.feature_dim = fc_dims[-1] return nn.Sequential(*layers) def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) def forward(self, x): f = self.features(x) f = F.relu(f, inplace=True) v = self.global_avgpool(f) v = v.view(v.size(0), -1) if self.fc is not None: v = self.fc(v) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError('Unsupported loss: {}'.format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) # '.'s are no longer allowed in module names, but pervious _DenseLayer # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'. # They are also in the checkpoints in model_urls. This pattern is used # to find such keys. pattern = re.compile( r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$' ) for key in list(pretrain_dict.keys()): res = pattern.match(key) if res: new_key = res.group(1) + res.group(2) pretrain_dict[new_key] = pretrain_dict[key] del pretrain_dict[key] model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) """ Dense network configurations: -- densenet121: num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16) densenet169: num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32) densenet201: num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32) densenet161: num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24) """ def densenet121(num_classes, loss='softmax', pretrained=True, **kwargs): model = DenseNet( num_classes=num_classes, loss=loss, num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), fc_dims=None, dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['densenet121']) return model def densenet169(num_classes, loss='softmax', pretrained=True, **kwargs): model = DenseNet( num_classes=num_classes, loss=loss, num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32), fc_dims=None, dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['densenet169']) return model def densenet201(num_classes, loss='softmax', pretrained=True, **kwargs): model = DenseNet( num_classes=num_classes, loss=loss, num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32), fc_dims=None, dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['densenet201']) return model def densenet161(num_classes, loss='softmax', pretrained=True, **kwargs): model = DenseNet( num_classes=num_classes, loss=loss, num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24), fc_dims=None, dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['densenet161']) return model def densenet121_fc512(num_classes, loss='softmax', pretrained=True, **kwargs): model = DenseNet( num_classes=num_classes, loss=loss, num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), fc_dims=[512], dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['densenet121']) return model ================================================ FILE: torchreid/models/hacnn.py ================================================ from __future__ import division, absolute_import import torch from torch import nn from torch.nn import functional as F __all__ = ['HACNN'] class ConvBlock(nn.Module): """Basic convolutional block. convolution + batch normalization + relu. Args: in_c (int): number of input channels. out_c (int): number of output channels. k (int or tuple): kernel size. s (int or tuple): stride. p (int or tuple): padding. """ def __init__(self, in_c, out_c, k, s=1, p=0): super(ConvBlock, self).__init__() self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p) self.bn = nn.BatchNorm2d(out_c) def forward(self, x): return F.relu(self.bn(self.conv(x))) class InceptionA(nn.Module): def __init__(self, in_channels, out_channels): super(InceptionA, self).__init__() mid_channels = out_channels // 4 self.stream1 = nn.Sequential( ConvBlock(in_channels, mid_channels, 1), ConvBlock(mid_channels, mid_channels, 3, p=1), ) self.stream2 = nn.Sequential( ConvBlock(in_channels, mid_channels, 1), ConvBlock(mid_channels, mid_channels, 3, p=1), ) self.stream3 = nn.Sequential( ConvBlock(in_channels, mid_channels, 1), ConvBlock(mid_channels, mid_channels, 3, p=1), ) self.stream4 = nn.Sequential( nn.AvgPool2d(3, stride=1, padding=1), ConvBlock(in_channels, mid_channels, 1), ) def forward(self, x): s1 = self.stream1(x) s2 = self.stream2(x) s3 = self.stream3(x) s4 = self.stream4(x) y = torch.cat([s1, s2, s3, s4], dim=1) return y class InceptionB(nn.Module): def __init__(self, in_channels, out_channels): super(InceptionB, self).__init__() mid_channels = out_channels // 4 self.stream1 = nn.Sequential( ConvBlock(in_channels, mid_channels, 1), ConvBlock(mid_channels, mid_channels, 3, s=2, p=1), ) self.stream2 = nn.Sequential( ConvBlock(in_channels, mid_channels, 1), ConvBlock(mid_channels, mid_channels, 3, p=1), ConvBlock(mid_channels, mid_channels, 3, s=2, p=1), ) self.stream3 = nn.Sequential( nn.MaxPool2d(3, stride=2, padding=1), ConvBlock(in_channels, mid_channels * 2, 1), ) def forward(self, x): s1 = self.stream1(x) s2 = self.stream2(x) s3 = self.stream3(x) y = torch.cat([s1, s2, s3], dim=1) return y class SpatialAttn(nn.Module): """Spatial Attention (Sec. 3.1.I.1)""" def __init__(self): super(SpatialAttn, self).__init__() self.conv1 = ConvBlock(1, 1, 3, s=2, p=1) self.conv2 = ConvBlock(1, 1, 1) def forward(self, x): # global cross-channel averaging x = x.mean(1, keepdim=True) # 3-by-3 conv x = self.conv1(x) # bilinear resizing x = F.upsample( x, (x.size(2) * 2, x.size(3) * 2), mode='bilinear', align_corners=True ) # scaling conv x = self.conv2(x) return x class ChannelAttn(nn.Module): """Channel Attention (Sec. 3.1.I.2)""" def __init__(self, in_channels, reduction_rate=16): super(ChannelAttn, self).__init__() assert in_channels % reduction_rate == 0 self.conv1 = ConvBlock(in_channels, in_channels // reduction_rate, 1) self.conv2 = ConvBlock(in_channels // reduction_rate, in_channels, 1) def forward(self, x): # squeeze operation (global average pooling) x = F.avg_pool2d(x, x.size()[2:]) # excitation operation (2 conv layers) x = self.conv1(x) x = self.conv2(x) return x class SoftAttn(nn.Module): """Soft Attention (Sec. 3.1.I) Aim: Spatial Attention + Channel Attention Output: attention maps with shape identical to input. """ def __init__(self, in_channels): super(SoftAttn, self).__init__() self.spatial_attn = SpatialAttn() self.channel_attn = ChannelAttn(in_channels) self.conv = ConvBlock(in_channels, in_channels, 1) def forward(self, x): y_spatial = self.spatial_attn(x) y_channel = self.channel_attn(x) y = y_spatial * y_channel y = torch.sigmoid(self.conv(y)) return y class HardAttn(nn.Module): """Hard Attention (Sec. 3.1.II)""" def __init__(self, in_channels): super(HardAttn, self).__init__() self.fc = nn.Linear(in_channels, 4 * 2) self.init_params() def init_params(self): self.fc.weight.data.zero_() self.fc.bias.data.copy_( torch.tensor( [0, -0.75, 0, -0.25, 0, 0.25, 0, 0.75], dtype=torch.float ) ) def forward(self, x): # squeeze operation (global average pooling) x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), x.size(1)) # predict transformation parameters theta = torch.tanh(self.fc(x)) theta = theta.view(-1, 4, 2) return theta class HarmAttn(nn.Module): """Harmonious Attention (Sec. 3.1)""" def __init__(self, in_channels): super(HarmAttn, self).__init__() self.soft_attn = SoftAttn(in_channels) self.hard_attn = HardAttn(in_channels) def forward(self, x): y_soft_attn = self.soft_attn(x) theta = self.hard_attn(x) return y_soft_attn, theta class HACNN(nn.Module): """Harmonious Attention Convolutional Neural Network. Reference: Li et al. Harmonious Attention Network for Person Re-identification. CVPR 2018. Public keys: - ``hacnn``: HACNN. """ # Args: # num_classes (int): number of classes to predict # nchannels (list): number of channels AFTER concatenation # feat_dim (int): feature dimension for a single stream # learn_region (bool): whether to learn region features (i.e. local branch) def __init__( self, num_classes, loss='softmax', nchannels=[128, 256, 384], feat_dim=512, learn_region=True, use_gpu=True, **kwargs ): super(HACNN, self).__init__() self.loss = loss self.learn_region = learn_region self.use_gpu = use_gpu self.conv = ConvBlock(3, 32, 3, s=2, p=1) # Construct Inception + HarmAttn blocks # ============== Block 1 ============== self.inception1 = nn.Sequential( InceptionA(32, nchannels[0]), InceptionB(nchannels[0], nchannels[0]), ) self.ha1 = HarmAttn(nchannels[0]) # ============== Block 2 ============== self.inception2 = nn.Sequential( InceptionA(nchannels[0], nchannels[1]), InceptionB(nchannels[1], nchannels[1]), ) self.ha2 = HarmAttn(nchannels[1]) # ============== Block 3 ============== self.inception3 = nn.Sequential( InceptionA(nchannels[1], nchannels[2]), InceptionB(nchannels[2], nchannels[2]), ) self.ha3 = HarmAttn(nchannels[2]) self.fc_global = nn.Sequential( nn.Linear(nchannels[2], feat_dim), nn.BatchNorm1d(feat_dim), nn.ReLU(), ) self.classifier_global = nn.Linear(feat_dim, num_classes) if self.learn_region: self.init_scale_factors() self.local_conv1 = InceptionB(32, nchannels[0]) self.local_conv2 = InceptionB(nchannels[0], nchannels[1]) self.local_conv3 = InceptionB(nchannels[1], nchannels[2]) self.fc_local = nn.Sequential( nn.Linear(nchannels[2] * 4, feat_dim), nn.BatchNorm1d(feat_dim), nn.ReLU(), ) self.classifier_local = nn.Linear(feat_dim, num_classes) self.feat_dim = feat_dim * 2 else: self.feat_dim = feat_dim def init_scale_factors(self): # initialize scale factors (s_w, s_h) for four regions self.scale_factors = [] self.scale_factors.append( torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float) ) self.scale_factors.append( torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float) ) self.scale_factors.append( torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float) ) self.scale_factors.append( torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float) ) def stn(self, x, theta): """Performs spatial transform x: (batch, channel, height, width) theta: (batch, 2, 3) """ grid = F.affine_grid(theta, x.size()) x = F.grid_sample(x, grid) return x def transform_theta(self, theta_i, region_idx): """Transforms theta to include (s_w, s_h), resulting in (batch, 2, 3)""" scale_factors = self.scale_factors[region_idx] theta = torch.zeros(theta_i.size(0), 2, 3) theta[:, :, :2] = scale_factors theta[:, :, -1] = theta_i if self.use_gpu: theta = theta.cuda() return theta def forward(self, x): assert x.size(2) == 160 and x.size(3) == 64, \ 'Input size does not match, expected (160, 64) but got ({}, {})'.format(x.size(2), x.size(3)) x = self.conv(x) # ============== Block 1 ============== # global branch x1 = self.inception1(x) x1_attn, x1_theta = self.ha1(x1) x1_out = x1 * x1_attn # local branch if self.learn_region: x1_local_list = [] for region_idx in range(4): x1_theta_i = x1_theta[:, region_idx, :] x1_theta_i = self.transform_theta(x1_theta_i, region_idx) x1_trans_i = self.stn(x, x1_theta_i) x1_trans_i = F.upsample( x1_trans_i, (24, 28), mode='bilinear', align_corners=True ) x1_local_i = self.local_conv1(x1_trans_i) x1_local_list.append(x1_local_i) # ============== Block 2 ============== # Block 2 # global branch x2 = self.inception2(x1_out) x2_attn, x2_theta = self.ha2(x2) x2_out = x2 * x2_attn # local branch if self.learn_region: x2_local_list = [] for region_idx in range(4): x2_theta_i = x2_theta[:, region_idx, :] x2_theta_i = self.transform_theta(x2_theta_i, region_idx) x2_trans_i = self.stn(x1_out, x2_theta_i) x2_trans_i = F.upsample( x2_trans_i, (12, 14), mode='bilinear', align_corners=True ) x2_local_i = x2_trans_i + x1_local_list[region_idx] x2_local_i = self.local_conv2(x2_local_i) x2_local_list.append(x2_local_i) # ============== Block 3 ============== # Block 3 # global branch x3 = self.inception3(x2_out) x3_attn, x3_theta = self.ha3(x3) x3_out = x3 * x3_attn # local branch if self.learn_region: x3_local_list = [] for region_idx in range(4): x3_theta_i = x3_theta[:, region_idx, :] x3_theta_i = self.transform_theta(x3_theta_i, region_idx) x3_trans_i = self.stn(x2_out, x3_theta_i) x3_trans_i = F.upsample( x3_trans_i, (6, 7), mode='bilinear', align_corners=True ) x3_local_i = x3_trans_i + x2_local_list[region_idx] x3_local_i = self.local_conv3(x3_local_i) x3_local_list.append(x3_local_i) # ============== Feature generation ============== # global branch x_global = F.avg_pool2d(x3_out, x3_out.size()[2:] ).view(x3_out.size(0), x3_out.size(1)) x_global = self.fc_global(x_global) # local branch if self.learn_region: x_local_list = [] for region_idx in range(4): x_local_i = x3_local_list[region_idx] x_local_i = F.avg_pool2d(x_local_i, x_local_i.size()[2:] ).view(x_local_i.size(0), -1) x_local_list.append(x_local_i) x_local = torch.cat(x_local_list, 1) x_local = self.fc_local(x_local) if not self.training: # l2 normalization before concatenation if self.learn_region: x_global = x_global / x_global.normalization(p=2, dim=1, keepdim=True) x_local = x_local / x_local.normalization(p=2, dim=1, keepdim=True) return torch.cat([x_global, x_local], 1) else: return x_global prelogits_global = self.classifier_global(x_global) if self.learn_region: prelogits_local = self.classifier_local(x_local) if self.loss == 'softmax': if self.learn_region: return (prelogits_global, prelogits_local) else: return prelogits_global elif self.loss == 'triplet': if self.learn_region: return (prelogits_global, prelogits_local), (x_global, x_local) else: return prelogits_global, x_global else: raise KeyError("Unsupported loss: {}".format(self.loss)) ================================================ FILE: torchreid/models/hrnet.py ================================================ from __future__ import absolute_import from __future__ import division from __future__ import print_function import os from pathlib import Path import torch import torch.nn as nn import torch._utils import torch.nn.functional as F from yacs.config import CfgNode as CN # Source: # https://github.com/HRNet/HRNet-Image-Classification/blob/master/lib/models/cls_hrnet.py # https://github.com/CASIA-IVA-Lab/ISP-reID __all__ = [ 'hrnet32', ] model_paths = { 'hrnet-w32': 'hrnetv2_w32_imagenet_pretrained.pth', } def get_hrnet_config(): _C = CN() _C.MODEL = CN() _C.MODEL.EXTRA = CN(new_allowed=True) _C.MODEL.EXTRA.STAGE2 = CN() _C.MODEL.EXTRA.STAGE2.NUM_MODULES = 1 _C.MODEL.EXTRA.STAGE2.NUM_BRANCHES = 2 _C.MODEL.EXTRA.STAGE2.NUM_BLOCKS = [4, 4] _C.MODEL.EXTRA.STAGE2.NUM_CHANNELS = [32, 64] _C.MODEL.EXTRA.STAGE2.BLOCK = 'BASIC' _C.MODEL.EXTRA.STAGE2.FUSE_METHOD = 'SUM' _C.MODEL.EXTRA.STAGE3 = CN() _C.MODEL.EXTRA.STAGE3.NUM_MODULES = 4 _C.MODEL.EXTRA.STAGE3.NUM_BRANCHES = 3 _C.MODEL.EXTRA.STAGE3.NUM_BLOCKS = [4, 4, 4] _C.MODEL.EXTRA.STAGE3.NUM_CHANNELS = [32, 64, 128] _C.MODEL.EXTRA.STAGE3.BLOCK = 'BASIC' _C.MODEL.EXTRA.STAGE3.FUSE_METHOD = 'SUM' _C.MODEL.EXTRA.STAGE4 = CN() _C.MODEL.EXTRA.STAGE4.NUM_MODULES = 3 _C.MODEL.EXTRA.STAGE4.NUM_BRANCHES = 4 _C.MODEL.EXTRA.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] _C.MODEL.EXTRA.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] _C.MODEL.EXTRA.STAGE4.BLOCK = 'BASIC' _C.MODEL.EXTRA.STAGE4.FUSE_METHOD = 'SUM' return _C BN_MOMENTUM = 0.1 def conv3x3(in_planes, out_planes, stride=1): """3x3 convolution with padding""" return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class HighResolutionModule(nn.Module): def __init__(self, num_branches, blocks, num_blocks, num_inchannels, num_channels, fuse_method, multi_scale_output=True): super(HighResolutionModule, self).__init__() self._check_branches( num_branches, blocks, num_blocks, num_inchannels, num_channels) self.num_inchannels = num_inchannels self.fuse_method = fuse_method self.num_branches = num_branches self.multi_scale_output = multi_scale_output self.branches = self._make_branches( num_branches, blocks, num_blocks, num_channels) self.fuse_layers = self._make_fuse_layers() self.relu = nn.ReLU(False) def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels): if num_branches != len(num_blocks): error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format( num_branches, len(num_blocks)) print(error_msg) raise ValueError(error_msg) if num_branches != len(num_channels): error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format( num_branches, len(num_channels)) print(error_msg) raise ValueError(error_msg) if num_branches != len(num_inchannels): error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format( num_branches, len(num_inchannels)) print(error_msg) raise ValueError(error_msg) def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1): downsample = None if stride != 1 or \ self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.num_inchannels[branch_index], num_channels[branch_index] * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM), ) layers = [] layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)) self.num_inchannels[branch_index] = \ num_channels[branch_index] * block.expansion for i in range(1, num_blocks[branch_index]): layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index])) return nn.Sequential(*layers) def _make_branches(self, num_branches, block, num_blocks, num_channels): branches = [] for i in range(num_branches): branches.append( self._make_one_branch(i, block, num_blocks, num_channels)) return nn.ModuleList(branches) def _make_fuse_layers(self): if self.num_branches == 1: return None num_branches = self.num_branches num_inchannels = self.num_inchannels fuse_layers = [] for i in range(num_branches if self.multi_scale_output else 1): fuse_layer = [] for j in range(num_branches): if j > i: fuse_layer.append(nn.Sequential( nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False), nn.BatchNorm2d(num_inchannels[i], momentum=BN_MOMENTUM), nn.Upsample(scale_factor=2 ** (j - i), mode='nearest'))) elif j == i: fuse_layer.append(None) else: conv3x3s = [] for k in range(i - j): if k == i - j - 1: num_outchannels_conv3x3 = num_inchannels[i] conv3x3s.append(nn.Sequential( nn.Conv2d(num_inchannels[j], num_outchannels_conv3x3, 3, 2, 1, bias=False), nn.BatchNorm2d(num_outchannels_conv3x3, momentum=BN_MOMENTUM))) else: num_outchannels_conv3x3 = num_inchannels[j] conv3x3s.append(nn.Sequential( nn.Conv2d(num_inchannels[j], num_outchannels_conv3x3, 3, 2, 1, bias=False), nn.BatchNorm2d(num_outchannels_conv3x3, momentum=BN_MOMENTUM), nn.ReLU(False))) fuse_layer.append(nn.Sequential(*conv3x3s)) fuse_layers.append(nn.ModuleList(fuse_layer)) return nn.ModuleList(fuse_layers) def get_num_inchannels(self): return self.num_inchannels def forward(self, x): if self.num_branches == 1: return [self.branches[0](x[0])] for i in range(self.num_branches): x[i] = self.branches[i](x[i]) x_fuse = [] for i in range(len(self.fuse_layers)): y = x[0] if i == 0 else self.fuse_layers[i][0](x[0]) for j in range(1, self.num_branches): if i == j: y = y + x[j] else: y = y + self.fuse_layers[i][j](x[j]) x_fuse.append(self.relu(y)) return x_fuse blocks_dict = { 'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck } class ConvBlock(nn.Module): def __init__(self, in_c, out_c, k, s=1, p=0): super(ConvBlock, self).__init__() self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p) self.conv.apply(weights_init_kaiming) self.bn = nn.BatchNorm2d(out_c) def forward(self, x): return self.bn(self.conv(x)) def weights_init_kaiming(m): classname = m.__class__.__name__ if classname.find('Linear') != -1: nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out') nn.init.constant_(m.bias, 0.0) elif classname.find('Conv') != -1: nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in') if m.bias is not None: nn.init.constant_(m.bias, 0.0) elif classname.find('BatchNorm') != -1: if m.affine: nn.init.constant_(m.weight, 1.0) nn.init.constant_(m.bias, 0.0) class HighResolutionNet(nn.Module): def __init__(self, cfg, enable_dim_reduction, dim_reduction_channels, **kwargs): super(HighResolutionNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=True) self.layer1 = self._make_layer(Bottleneck, 64, 64, 4) self.stage2_cfg = cfg['MODEL']['EXTRA']['STAGE2'] num_channels = self.stage2_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage2_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels))] self.transition1 = self._make_transition_layer( [256], num_channels) self.stage2, pre_stage_channels = self._make_stage( self.stage2_cfg, num_channels) self.stage3_cfg = cfg['MODEL']['EXTRA']['STAGE3'] num_channels = self.stage3_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage3_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels))] self.transition2 = self._make_transition_layer( pre_stage_channels, num_channels) self.stage3, pre_stage_channels = self._make_stage( self.stage3_cfg, num_channels) self.stage4_cfg = cfg['MODEL']['EXTRA']['STAGE4'] num_channels = self.stage4_cfg['NUM_CHANNELS'] block = blocks_dict[self.stage4_cfg['BLOCK']] num_channels = [ num_channels[i] * block.expansion for i in range(len(num_channels))] self.transition3 = self._make_transition_layer( pre_stage_channels, num_channels) self.stage4, pre_stage_channels = self._make_stage( self.stage4_cfg, num_channels, multi_scale_output=True) self.incre_modules, _, _ = self._make_head(pre_stage_channels) self.layers_out_channels = 1920 self.dim_reduction_channels = dim_reduction_channels self.cls_head = nn.Sequential( nn.Conv2d( in_channels=self.layers_out_channels, out_channels=self.dim_reduction_channels, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(self.dim_reduction_channels, momentum=BN_MOMENTUM), nn.ReLU(inplace=True) ) self.gap = nn.AdaptiveAvgPool2d(1) self.enable_dim_reduction = enable_dim_reduction if self.enable_dim_reduction: self.feature_dim = self.dim_reduction_channels else: self.feature_dim = self.layers_out_channels self.random_init() def _make_incre_channel_nin(self): head_channels = [128, 256, 512, 1024] incre_modules = [] for i in range(3): incre_module = nn.Sequential( nn.Conv2d( in_channels=head_channels[i], out_channels=head_channels[i + 1], kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(head_channels[i + 1], momentum=BN_MOMENTUM), nn.ReLU(inplace=True) ) incre_modules.append(incre_module) incre_modules = nn.ModuleList(incre_modules) return incre_modules def _make_head(self, pre_stage_channels): head_block = Bottleneck head_channels = [32, 64, 128, 256] # Increasing the #channels on each resolution # from C, 2C, 4C, 8C to 128, 256, 512, 1024 incre_modules = [] for i, channels in enumerate(pre_stage_channels): incre_module = self._make_layer(head_block, channels, head_channels[i], 1, stride=1) incre_modules.append(incre_module) incre_modules = nn.ModuleList(incre_modules) # downsampling modules downsamp_modules = [] for i in range(len(pre_stage_channels) - 1): in_channels = head_channels[i] * head_block.expansion out_channels = head_channels[i + 1] * head_block.expansion downsamp_module = nn.Sequential( nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM), nn.ReLU(inplace=True) ) downsamp_modules.append(downsamp_module) downsamp_modules = nn.ModuleList(downsamp_modules) final_layer = nn.Sequential( nn.Conv2d( in_channels=head_channels[3] * head_block.expansion, out_channels=2048, kernel_size=1, stride=1, padding=0 ), nn.BatchNorm2d(2048, momentum=BN_MOMENTUM), nn.ReLU(inplace=True) ) return incre_modules, downsamp_modules, final_layer def _make_transition_layer( self, num_channels_pre_layer, num_channels_cur_layer): num_branches_cur = len(num_channels_cur_layer) num_branches_pre = len(num_channels_pre_layer) transition_layers = [] for i in range(num_branches_cur): if i < num_branches_pre: if num_channels_cur_layer[i] != num_channels_pre_layer[i]: transition_layers.append(nn.Sequential( nn.Conv2d(num_channels_pre_layer[i], num_channels_cur_layer[i], 3, 1, 1, bias=False), nn.BatchNorm2d( num_channels_cur_layer[i], momentum=BN_MOMENTUM), nn.ReLU(inplace=True))) else: transition_layers.append(None) else: conv3x3s = [] for j in range(i + 1 - num_branches_pre): inchannels = num_channels_pre_layer[-1] outchannels = num_channels_cur_layer[i] \ if j == i - num_branches_pre else inchannels conv3x3s.append(nn.Sequential( nn.Conv2d( inchannels, outchannels, 3, 2, 1, bias=False), nn.BatchNorm2d(outchannels, momentum=BN_MOMENTUM), nn.ReLU(inplace=True))) transition_layers.append(nn.Sequential(*conv3x3s)) return nn.ModuleList(transition_layers) def _make_layer(self, block, inplanes, planes, blocks, stride=1): downsample = None if stride != 1 or inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), ) layers = [] layers.append(block(inplanes, planes, stride, downsample)) inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(inplanes, planes)) return nn.Sequential(*layers) def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True): num_modules = layer_config['NUM_MODULES'] num_branches = layer_config['NUM_BRANCHES'] num_blocks = layer_config['NUM_BLOCKS'] num_channels = layer_config['NUM_CHANNELS'] block = blocks_dict[layer_config['BLOCK']] fuse_method = layer_config['FUSE_METHOD'] modules = [] for i in range(num_modules): # multi_scale_output is only used last module if not multi_scale_output and i == num_modules - 1: reset_multi_scale_output = False else: reset_multi_scale_output = True modules.append( HighResolutionModule(num_branches, block, num_blocks, num_inchannels, num_channels, fuse_method, reset_multi_scale_output) ) num_inchannels = modules[-1].get_num_inchannels() return nn.Sequential(*modules), num_inchannels def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.conv2(x) x = self.bn2(x) x = self.relu(x) x = self.layer1(x) x_list = [] for i in range(self.stage2_cfg['NUM_BRANCHES']): if self.transition1[i] is not None: x_list.append(self.transition1[i](x)) else: x_list.append(x) y_list = self.stage2(x_list) x_list = [] for i in range(self.stage3_cfg['NUM_BRANCHES']): if self.transition2[i] is not None: x_list.append(self.transition2[i](y_list[-1])) else: x_list.append(y_list[i]) y_list = self.stage3(x_list) x_list = [] for i in range(self.stage4_cfg['NUM_BRANCHES']): if self.transition3[i] is not None: x_list.append(self.transition3[i](y_list[-1])) else: x_list.append(y_list[i]) x = self.stage4(x_list) for i in range(len(self.incre_modules)): x[i] = self.incre_modules[i](x[i]) x0_h, x0_w = x[0].size(2), x[0].size(3) x1 = F.interpolate(x[1], size=(x0_h, x0_w), mode='bilinear', align_corners=True) # torch.Size([128, 256, 64, 32]) x2 = F.interpolate(x[2], size=(x0_h, x0_w), mode='bilinear', align_corners=True) # torch.Size([128, 512, 64, 32]) x3 = F.interpolate(x[3], size=(x0_h, x0_w), mode='bilinear', align_corners=True) # torch.Size([128, 1024, 64, 32]) x = torch.cat([x[0], x1, x2, x3], 1) # torch.Size([b, 1920, 64, 32]) if self.enable_dim_reduction: x = self.cls_head(x) # torch.Size([128, 256, 64, 32]) return x def random_init(self): print('=> init weights from normal distribution') for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def load_param(self, pretrained_path): if not Path(pretrained_path).exists(): raise FileNotFoundError(f'HRNet-W32-C pretrained weights not found under "{pretrained_path}", please download it ' f'first at https://github.com/HRNet/HRNet-Image-Classification or specify the correct ' f'weights dir location with the cfg.model.bpbreid.hrnet_pretrained_path config.') pretrained_dict = torch.load(pretrained_path) print('=> loading pretrained model {}'.format(pretrained_path)) model_dict = self.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict.keys()} # for k, _ in pretrained_dict.items(): # print( # '=> loading {} pretrained model {}'.format(k, pretrained_path)) model_dict.update(pretrained_dict) self.load_state_dict(model_dict) def init_pretrained_weights(model, pretrain_path, model_key): path = os.path.join(pretrain_path, model_paths[model_key]) print('Loading pretrained ImageNet HRNet32 model at {}'.format(path)) model.load_param(path) def hrnet32(num_classes, loss='part_based', pretrained=True, enable_dim_reduction=True, dim_reduction_channels=256, pretrained_path='', **kwargs): cfg = get_hrnet_config() model = HighResolutionNet( cfg, enable_dim_reduction, dim_reduction_channels ) if pretrained: init_pretrained_weights(model, pretrained_path, 'hrnet-w32') return model ================================================ FILE: torchreid/models/inceptionresnetv2.py ================================================ """ Code imported from https://github.com/Cadene/pretrained-models.pytorch """ from __future__ import division, absolute_import import torch import torch.nn as nn import torch.utils.model_zoo as model_zoo __all__ = ['inceptionresnetv2'] pretrained_settings = { 'inceptionresnetv2': { 'imagenet': { 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth', 'input_space': 'RGB', 'input_size': [3, 299, 299], 'input_range': [0, 1], 'mean': [0.5, 0.5, 0.5], 'std': [0.5, 0.5, 0.5], 'num_classes': 1000 }, 'imagenet+background': { 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth', 'input_space': 'RGB', 'input_size': [3, 299, 299], 'input_range': [0, 1], 'mean': [0.5, 0.5, 0.5], 'std': [0.5, 0.5, 0.5], 'num_classes': 1001 } } } class BasicConv2d(nn.Module): def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0): super(BasicConv2d, self).__init__() self.conv = nn.Conv2d( in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, bias=False ) # verify bias false self.bn = nn.BatchNorm2d( out_planes, eps=0.001, # value found in tensorflow momentum=0.1, # default pytorch value affine=True ) self.relu = nn.ReLU(inplace=False) def forward(self, x): x = self.conv(x) x = self.bn(x) x = self.relu(x) return x class Mixed_5b(nn.Module): def __init__(self): super(Mixed_5b, self).__init__() self.branch0 = BasicConv2d(192, 96, kernel_size=1, stride=1) self.branch1 = nn.Sequential( BasicConv2d(192, 48, kernel_size=1, stride=1), BasicConv2d(48, 64, kernel_size=5, stride=1, padding=2) ) self.branch2 = nn.Sequential( BasicConv2d(192, 64, kernel_size=1, stride=1), BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1), BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1) ) self.branch3 = nn.Sequential( nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), BasicConv2d(192, 64, kernel_size=1, stride=1) ) def forward(self, x): x0 = self.branch0(x) x1 = self.branch1(x) x2 = self.branch2(x) x3 = self.branch3(x) out = torch.cat((x0, x1, x2, x3), 1) return out class Block35(nn.Module): def __init__(self, scale=1.0): super(Block35, self).__init__() self.scale = scale self.branch0 = BasicConv2d(320, 32, kernel_size=1, stride=1) self.branch1 = nn.Sequential( BasicConv2d(320, 32, kernel_size=1, stride=1), BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1) ) self.branch2 = nn.Sequential( BasicConv2d(320, 32, kernel_size=1, stride=1), BasicConv2d(32, 48, kernel_size=3, stride=1, padding=1), BasicConv2d(48, 64, kernel_size=3, stride=1, padding=1) ) self.conv2d = nn.Conv2d(128, 320, kernel_size=1, stride=1) self.relu = nn.ReLU(inplace=False) def forward(self, x): x0 = self.branch0(x) x1 = self.branch1(x) x2 = self.branch2(x) out = torch.cat((x0, x1, x2), 1) out = self.conv2d(out) out = out * self.scale + x out = self.relu(out) return out class Mixed_6a(nn.Module): def __init__(self): super(Mixed_6a, self).__init__() self.branch0 = BasicConv2d(320, 384, kernel_size=3, stride=2) self.branch1 = nn.Sequential( BasicConv2d(320, 256, kernel_size=1, stride=1), BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1), BasicConv2d(256, 384, kernel_size=3, stride=2) ) self.branch2 = nn.MaxPool2d(3, stride=2) def forward(self, x): x0 = self.branch0(x) x1 = self.branch1(x) x2 = self.branch2(x) out = torch.cat((x0, x1, x2), 1) return out class Block17(nn.Module): def __init__(self, scale=1.0): super(Block17, self).__init__() self.scale = scale self.branch0 = BasicConv2d(1088, 192, kernel_size=1, stride=1) self.branch1 = nn.Sequential( BasicConv2d(1088, 128, kernel_size=1, stride=1), BasicConv2d( 128, 160, kernel_size=(1, 7), stride=1, padding=(0, 3) ), BasicConv2d( 160, 192, kernel_size=(7, 1), stride=1, padding=(3, 0) ) ) self.conv2d = nn.Conv2d(384, 1088, kernel_size=1, stride=1) self.relu = nn.ReLU(inplace=False) def forward(self, x): x0 = self.branch0(x) x1 = self.branch1(x) out = torch.cat((x0, x1), 1) out = self.conv2d(out) out = out * self.scale + x out = self.relu(out) return out class Mixed_7a(nn.Module): def __init__(self): super(Mixed_7a, self).__init__() self.branch0 = nn.Sequential( BasicConv2d(1088, 256, kernel_size=1, stride=1), BasicConv2d(256, 384, kernel_size=3, stride=2) ) self.branch1 = nn.Sequential( BasicConv2d(1088, 256, kernel_size=1, stride=1), BasicConv2d(256, 288, kernel_size=3, stride=2) ) self.branch2 = nn.Sequential( BasicConv2d(1088, 256, kernel_size=1, stride=1), BasicConv2d(256, 288, kernel_size=3, stride=1, padding=1), BasicConv2d(288, 320, kernel_size=3, stride=2) ) self.branch3 = nn.MaxPool2d(3, stride=2) def forward(self, x): x0 = self.branch0(x) x1 = self.branch1(x) x2 = self.branch2(x) x3 = self.branch3(x) out = torch.cat((x0, x1, x2, x3), 1) return out class Block8(nn.Module): def __init__(self, scale=1.0, noReLU=False): super(Block8, self).__init__() self.scale = scale self.noReLU = noReLU self.branch0 = BasicConv2d(2080, 192, kernel_size=1, stride=1) self.branch1 = nn.Sequential( BasicConv2d(2080, 192, kernel_size=1, stride=1), BasicConv2d( 192, 224, kernel_size=(1, 3), stride=1, padding=(0, 1) ), BasicConv2d( 224, 256, kernel_size=(3, 1), stride=1, padding=(1, 0) ) ) self.conv2d = nn.Conv2d(448, 2080, kernel_size=1, stride=1) if not self.noReLU: self.relu = nn.ReLU(inplace=False) def forward(self, x): x0 = self.branch0(x) x1 = self.branch1(x) out = torch.cat((x0, x1), 1) out = self.conv2d(out) out = out * self.scale + x if not self.noReLU: out = self.relu(out) return out # ---------------- # Model Definition # ---------------- class InceptionResNetV2(nn.Module): """Inception-ResNet-V2. Reference: Szegedy et al. Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning. AAAI 2017. Public keys: - ``inceptionresnetv2``: Inception-ResNet-V2. """ def __init__(self, num_classes, loss='softmax', **kwargs): super(InceptionResNetV2, self).__init__() self.loss = loss # Modules self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2) self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1) self.conv2d_2b = BasicConv2d( 32, 64, kernel_size=3, stride=1, padding=1 ) self.maxpool_3a = nn.MaxPool2d(3, stride=2) self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1) self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1) self.maxpool_5a = nn.MaxPool2d(3, stride=2) self.mixed_5b = Mixed_5b() self.repeat = nn.Sequential( Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17) ) self.mixed_6a = Mixed_6a() self.repeat_1 = nn.Sequential( Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10) ) self.mixed_7a = Mixed_7a() self.repeat_2 = nn.Sequential( Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20) ) self.block8 = Block8(noReLU=True) self.conv2d_7b = BasicConv2d(2080, 1536, kernel_size=1, stride=1) self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.classifier = nn.Linear(1536, num_classes) def load_imagenet_weights(self): settings = pretrained_settings['inceptionresnetv2']['imagenet'] pretrain_dict = model_zoo.load_url(settings['url']) model_dict = self.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) self.load_state_dict(model_dict) def featuremaps(self, x): x = self.conv2d_1a(x) x = self.conv2d_2a(x) x = self.conv2d_2b(x) x = self.maxpool_3a(x) x = self.conv2d_3b(x) x = self.conv2d_4a(x) x = self.maxpool_5a(x) x = self.mixed_5b(x) x = self.repeat(x) x = self.mixed_6a(x) x = self.repeat_1(x) x = self.mixed_7a(x) x = self.repeat_2(x) x = self.block8(x) x = self.conv2d_7b(x) return x def forward(self, x): f = self.featuremaps(x) v = self.global_avgpool(f) v = v.view(v.size(0), -1) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError('Unsupported loss: {}'.format(self.loss)) def inceptionresnetv2(num_classes, loss='softmax', pretrained=True, **kwargs): model = InceptionResNetV2(num_classes=num_classes, loss=loss, **kwargs) if pretrained: model.load_imagenet_weights() return model ================================================ FILE: torchreid/models/inceptionv4.py ================================================ from __future__ import division, absolute_import import torch import torch.nn as nn import torch.utils.model_zoo as model_zoo __all__ = ['inceptionv4'] """ Code imported from https://github.com/Cadene/pretrained-models.pytorch """ pretrained_settings = { 'inceptionv4': { 'imagenet': { 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth', 'input_space': 'RGB', 'input_size': [3, 299, 299], 'input_range': [0, 1], 'mean': [0.5, 0.5, 0.5], 'std': [0.5, 0.5, 0.5], 'num_classes': 1000 }, 'imagenet+background': { 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth', 'input_space': 'RGB', 'input_size': [3, 299, 299], 'input_range': [0, 1], 'mean': [0.5, 0.5, 0.5], 'std': [0.5, 0.5, 0.5], 'num_classes': 1001 } } } class BasicConv2d(nn.Module): def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0): super(BasicConv2d, self).__init__() self.conv = nn.Conv2d( in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, bias=False ) # verify bias false self.bn = nn.BatchNorm2d( out_planes, eps=0.001, # value found in tensorflow momentum=0.1, # default pytorch value affine=True ) self.relu = nn.ReLU(inplace=True) def forward(self, x): x = self.conv(x) x = self.bn(x) x = self.relu(x) return x class Mixed_3a(nn.Module): def __init__(self): super(Mixed_3a, self).__init__() self.maxpool = nn.MaxPool2d(3, stride=2) self.conv = BasicConv2d(64, 96, kernel_size=3, stride=2) def forward(self, x): x0 = self.maxpool(x) x1 = self.conv(x) out = torch.cat((x0, x1), 1) return out class Mixed_4a(nn.Module): def __init__(self): super(Mixed_4a, self).__init__() self.branch0 = nn.Sequential( BasicConv2d(160, 64, kernel_size=1, stride=1), BasicConv2d(64, 96, kernel_size=3, stride=1) ) self.branch1 = nn.Sequential( BasicConv2d(160, 64, kernel_size=1, stride=1), BasicConv2d(64, 64, kernel_size=(1, 7), stride=1, padding=(0, 3)), BasicConv2d(64, 64, kernel_size=(7, 1), stride=1, padding=(3, 0)), BasicConv2d(64, 96, kernel_size=(3, 3), stride=1) ) def forward(self, x): x0 = self.branch0(x) x1 = self.branch1(x) out = torch.cat((x0, x1), 1) return out class Mixed_5a(nn.Module): def __init__(self): super(Mixed_5a, self).__init__() self.conv = BasicConv2d(192, 192, kernel_size=3, stride=2) self.maxpool = nn.MaxPool2d(3, stride=2) def forward(self, x): x0 = self.conv(x) x1 = self.maxpool(x) out = torch.cat((x0, x1), 1) return out class Inception_A(nn.Module): def __init__(self): super(Inception_A, self).__init__() self.branch0 = BasicConv2d(384, 96, kernel_size=1, stride=1) self.branch1 = nn.Sequential( BasicConv2d(384, 64, kernel_size=1, stride=1), BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1) ) self.branch2 = nn.Sequential( BasicConv2d(384, 64, kernel_size=1, stride=1), BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1), BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1) ) self.branch3 = nn.Sequential( nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), BasicConv2d(384, 96, kernel_size=1, stride=1) ) def forward(self, x): x0 = self.branch0(x) x1 = self.branch1(x) x2 = self.branch2(x) x3 = self.branch3(x) out = torch.cat((x0, x1, x2, x3), 1) return out class Reduction_A(nn.Module): def __init__(self): super(Reduction_A, self).__init__() self.branch0 = BasicConv2d(384, 384, kernel_size=3, stride=2) self.branch1 = nn.Sequential( BasicConv2d(384, 192, kernel_size=1, stride=1), BasicConv2d(192, 224, kernel_size=3, stride=1, padding=1), BasicConv2d(224, 256, kernel_size=3, stride=2) ) self.branch2 = nn.MaxPool2d(3, stride=2) def forward(self, x): x0 = self.branch0(x) x1 = self.branch1(x) x2 = self.branch2(x) out = torch.cat((x0, x1, x2), 1) return out class Inception_B(nn.Module): def __init__(self): super(Inception_B, self).__init__() self.branch0 = BasicConv2d(1024, 384, kernel_size=1, stride=1) self.branch1 = nn.Sequential( BasicConv2d(1024, 192, kernel_size=1, stride=1), BasicConv2d( 192, 224, kernel_size=(1, 7), stride=1, padding=(0, 3) ), BasicConv2d( 224, 256, kernel_size=(7, 1), stride=1, padding=(3, 0) ) ) self.branch2 = nn.Sequential( BasicConv2d(1024, 192, kernel_size=1, stride=1), BasicConv2d( 192, 192, kernel_size=(7, 1), stride=1, padding=(3, 0) ), BasicConv2d( 192, 224, kernel_size=(1, 7), stride=1, padding=(0, 3) ), BasicConv2d( 224, 224, kernel_size=(7, 1), stride=1, padding=(3, 0) ), BasicConv2d( 224, 256, kernel_size=(1, 7), stride=1, padding=(0, 3) ) ) self.branch3 = nn.Sequential( nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), BasicConv2d(1024, 128, kernel_size=1, stride=1) ) def forward(self, x): x0 = self.branch0(x) x1 = self.branch1(x) x2 = self.branch2(x) x3 = self.branch3(x) out = torch.cat((x0, x1, x2, x3), 1) return out class Reduction_B(nn.Module): def __init__(self): super(Reduction_B, self).__init__() self.branch0 = nn.Sequential( BasicConv2d(1024, 192, kernel_size=1, stride=1), BasicConv2d(192, 192, kernel_size=3, stride=2) ) self.branch1 = nn.Sequential( BasicConv2d(1024, 256, kernel_size=1, stride=1), BasicConv2d( 256, 256, kernel_size=(1, 7), stride=1, padding=(0, 3) ), BasicConv2d( 256, 320, kernel_size=(7, 1), stride=1, padding=(3, 0) ), BasicConv2d(320, 320, kernel_size=3, stride=2) ) self.branch2 = nn.MaxPool2d(3, stride=2) def forward(self, x): x0 = self.branch0(x) x1 = self.branch1(x) x2 = self.branch2(x) out = torch.cat((x0, x1, x2), 1) return out class Inception_C(nn.Module): def __init__(self): super(Inception_C, self).__init__() self.branch0 = BasicConv2d(1536, 256, kernel_size=1, stride=1) self.branch1_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1) self.branch1_1a = BasicConv2d( 384, 256, kernel_size=(1, 3), stride=1, padding=(0, 1) ) self.branch1_1b = BasicConv2d( 384, 256, kernel_size=(3, 1), stride=1, padding=(1, 0) ) self.branch2_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1) self.branch2_1 = BasicConv2d( 384, 448, kernel_size=(3, 1), stride=1, padding=(1, 0) ) self.branch2_2 = BasicConv2d( 448, 512, kernel_size=(1, 3), stride=1, padding=(0, 1) ) self.branch2_3a = BasicConv2d( 512, 256, kernel_size=(1, 3), stride=1, padding=(0, 1) ) self.branch2_3b = BasicConv2d( 512, 256, kernel_size=(3, 1), stride=1, padding=(1, 0) ) self.branch3 = nn.Sequential( nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False), BasicConv2d(1536, 256, kernel_size=1, stride=1) ) def forward(self, x): x0 = self.branch0(x) x1_0 = self.branch1_0(x) x1_1a = self.branch1_1a(x1_0) x1_1b = self.branch1_1b(x1_0) x1 = torch.cat((x1_1a, x1_1b), 1) x2_0 = self.branch2_0(x) x2_1 = self.branch2_1(x2_0) x2_2 = self.branch2_2(x2_1) x2_3a = self.branch2_3a(x2_2) x2_3b = self.branch2_3b(x2_2) x2 = torch.cat((x2_3a, x2_3b), 1) x3 = self.branch3(x) out = torch.cat((x0, x1, x2, x3), 1) return out class InceptionV4(nn.Module): """Inception-v4. Reference: Szegedy et al. Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning. AAAI 2017. Public keys: - ``inceptionv4``: InceptionV4. """ def __init__(self, num_classes, loss, **kwargs): super(InceptionV4, self).__init__() self.loss = loss self.features = nn.Sequential( BasicConv2d(3, 32, kernel_size=3, stride=2), BasicConv2d(32, 32, kernel_size=3, stride=1), BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1), Mixed_3a(), Mixed_4a(), Mixed_5a(), Inception_A(), Inception_A(), Inception_A(), Inception_A(), Reduction_A(), # Mixed_6a Inception_B(), Inception_B(), Inception_B(), Inception_B(), Inception_B(), Inception_B(), Inception_B(), Reduction_B(), # Mixed_7a Inception_C(), Inception_C(), Inception_C() ) self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.classifier = nn.Linear(1536, num_classes) def forward(self, x): f = self.features(x) v = self.global_avgpool(f) v = v.view(v.size(0), -1) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError('Unsupported loss: {}'.format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def inceptionv4(num_classes, loss='softmax', pretrained=True, **kwargs): model = InceptionV4(num_classes, loss, **kwargs) if pretrained: model_url = pretrained_settings['inceptionv4']['imagenet']['url'] init_pretrained_weights(model, model_url) return model ================================================ FILE: torchreid/models/mlfn.py ================================================ from __future__ import division, absolute_import import torch import torch.utils.model_zoo as model_zoo from torch import nn from torch.nn import functional as F __all__ = ['mlfn'] model_urls = { # training epoch = 5, top1 = 51.6 'imagenet': 'https://mega.nz/#!YHxAhaxC!yu9E6zWl0x5zscSouTdbZu8gdFFytDdl-RAdD2DEfpk', } class MLFNBlock(nn.Module): def __init__( self, in_channels, out_channels, stride, fsm_channels, groups=32 ): super(MLFNBlock, self).__init__() self.groups = groups mid_channels = out_channels // 2 # Factor Modules self.fm_conv1 = nn.Conv2d(in_channels, mid_channels, 1, bias=False) self.fm_bn1 = nn.BatchNorm2d(mid_channels) self.fm_conv2 = nn.Conv2d( mid_channels, mid_channels, 3, stride=stride, padding=1, bias=False, groups=self.groups ) self.fm_bn2 = nn.BatchNorm2d(mid_channels) self.fm_conv3 = nn.Conv2d(mid_channels, out_channels, 1, bias=False) self.fm_bn3 = nn.BatchNorm2d(out_channels) # Factor Selection Module self.fsm = nn.Sequential( nn.AdaptiveAvgPool2d(1), nn.Conv2d(in_channels, fsm_channels[0], 1), nn.BatchNorm2d(fsm_channels[0]), nn.ReLU(inplace=True), nn.Conv2d(fsm_channels[0], fsm_channels[1], 1), nn.BatchNorm2d(fsm_channels[1]), nn.ReLU(inplace=True), nn.Conv2d(fsm_channels[1], self.groups, 1), nn.BatchNorm2d(self.groups), nn.Sigmoid(), ) self.downsample = None if in_channels != out_channels or stride > 1: self.downsample = nn.Sequential( nn.Conv2d( in_channels, out_channels, 1, stride=stride, bias=False ), nn.BatchNorm2d(out_channels), ) def forward(self, x): residual = x s = self.fsm(x) # reduce dimension x = self.fm_conv1(x) x = self.fm_bn1(x) x = F.relu(x, inplace=True) # group convolution x = self.fm_conv2(x) x = self.fm_bn2(x) x = F.relu(x, inplace=True) # factor selection b, c = x.size(0), x.size(1) n = c // self.groups ss = s.repeat(1, n, 1, 1) # from (b, g, 1, 1) to (b, g*n=c, 1, 1) ss = ss.view(b, n, self.groups, 1, 1) ss = ss.permute(0, 2, 1, 3, 4).contiguous() ss = ss.view(b, c, 1, 1) x = ss * x # recover dimension x = self.fm_conv3(x) x = self.fm_bn3(x) x = F.relu(x, inplace=True) if self.downsample is not None: residual = self.downsample(residual) return F.relu(residual + x, inplace=True), s class MLFN(nn.Module): """Multi-Level Factorisation Net. Reference: Chang et al. Multi-Level Factorisation Net for Person Re-Identification. CVPR 2018. Public keys: - ``mlfn``: MLFN (Multi-Level Factorisation Net). """ def __init__( self, num_classes, loss='softmax', groups=32, channels=[64, 256, 512, 1024, 2048], embed_dim=1024, **kwargs ): super(MLFN, self).__init__() self.loss = loss self.groups = groups # first convolutional layer self.conv1 = nn.Conv2d(3, channels[0], 7, stride=2, padding=3) self.bn1 = nn.BatchNorm2d(channels[0]) self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) # main body self.feature = nn.ModuleList( [ # layer 1-3 MLFNBlock(channels[0], channels[1], 1, [128, 64], self.groups), MLFNBlock(channels[1], channels[1], 1, [128, 64], self.groups), MLFNBlock(channels[1], channels[1], 1, [128, 64], self.groups), # layer 4-7 MLFNBlock( channels[1], channels[2], 2, [256, 128], self.groups ), MLFNBlock( channels[2], channels[2], 1, [256, 128], self.groups ), MLFNBlock( channels[2], channels[2], 1, [256, 128], self.groups ), MLFNBlock( channels[2], channels[2], 1, [256, 128], self.groups ), # layer 8-13 MLFNBlock( channels[2], channels[3], 2, [512, 128], self.groups ), MLFNBlock( channels[3], channels[3], 1, [512, 128], self.groups ), MLFNBlock( channels[3], channels[3], 1, [512, 128], self.groups ), MLFNBlock( channels[3], channels[3], 1, [512, 128], self.groups ), MLFNBlock( channels[3], channels[3], 1, [512, 128], self.groups ), MLFNBlock( channels[3], channels[3], 1, [512, 128], self.groups ), # layer 14-16 MLFNBlock( channels[3], channels[4], 2, [512, 128], self.groups ), MLFNBlock( channels[4], channels[4], 1, [512, 128], self.groups ), MLFNBlock( channels[4], channels[4], 1, [512, 128], self.groups ), ] ) self.global_avgpool = nn.AdaptiveAvgPool2d(1) # projection functions self.fc_x = nn.Sequential( nn.Conv2d(channels[4], embed_dim, 1, bias=False), nn.BatchNorm2d(embed_dim), nn.ReLU(inplace=True), ) self.fc_s = nn.Sequential( nn.Conv2d(self.groups * 16, embed_dim, 1, bias=False), nn.BatchNorm2d(embed_dim), nn.ReLU(inplace=True), ) self.classifier = nn.Linear(embed_dim, num_classes) self.init_params() def init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = F.relu(x, inplace=True) x = self.maxpool(x) s_hat = [] for block in self.feature: x, s = block(x) s_hat.append(s) s_hat = torch.cat(s_hat, 1) x = self.global_avgpool(x) x = self.fc_x(x) s_hat = self.fc_s(s_hat) v = (x+s_hat) * 0.5 v = v.view(v.size(0), -1) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError('Unsupported loss: {}'.format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def mlfn(num_classes, loss='softmax', pretrained=True, **kwargs): model = MLFN(num_classes, loss, **kwargs) if pretrained: # init_pretrained_weights(model, model_urls['imagenet']) import warnings warnings.warn( 'The imagenet pretrained weights need to be manually downloaded from {}' .format(model_urls['imagenet']) ) return model ================================================ FILE: torchreid/models/mobilenetv2.py ================================================ from __future__ import division, absolute_import import torch.utils.model_zoo as model_zoo from torch import nn from torch.nn import functional as F __all__ = ['mobilenetv2_x1_0', 'mobilenetv2_x1_4'] model_urls = { # 1.0: top-1 71.3 'mobilenetv2_x1_0': 'https://mega.nz/#!NKp2wAIA!1NH1pbNzY_M2hVk_hdsxNM1NUOWvvGPHhaNr-fASF6c', # 1.4: top-1 73.9 'mobilenetv2_x1_4': 'https://mega.nz/#!RGhgEIwS!xN2s2ZdyqI6vQ3EwgmRXLEW3khr9tpXg96G9SUJugGk', } class ConvBlock(nn.Module): """Basic convolutional block. convolution (bias discarded) + batch normalization + relu6. Args: in_c (int): number of input channels. out_c (int): number of output channels. k (int or tuple): kernel size. s (int or tuple): stride. p (int or tuple): padding. g (int): number of blocked connections from input channels to output channels (default: 1). """ def __init__(self, in_c, out_c, k, s=1, p=0, g=1): super(ConvBlock, self).__init__() self.conv = nn.Conv2d( in_c, out_c, k, stride=s, padding=p, bias=False, groups=g ) self.bn = nn.BatchNorm2d(out_c) def forward(self, x): return F.relu6(self.bn(self.conv(x))) class Bottleneck(nn.Module): def __init__(self, in_channels, out_channels, expansion_factor, stride=1): super(Bottleneck, self).__init__() mid_channels = in_channels * expansion_factor self.use_residual = stride == 1 and in_channels == out_channels self.conv1 = ConvBlock(in_channels, mid_channels, 1) self.dwconv2 = ConvBlock( mid_channels, mid_channels, 3, stride, 1, g=mid_channels ) self.conv3 = nn.Sequential( nn.Conv2d(mid_channels, out_channels, 1, bias=False), nn.BatchNorm2d(out_channels), ) def forward(self, x): m = self.conv1(x) m = self.dwconv2(m) m = self.conv3(m) if self.use_residual: return x + m else: return m class MobileNetV2(nn.Module): """MobileNetV2. Reference: Sandler et al. MobileNetV2: Inverted Residuals and Linear Bottlenecks. CVPR 2018. Public keys: - ``mobilenetv2_x1_0``: MobileNetV2 x1.0. - ``mobilenetv2_x1_4``: MobileNetV2 x1.4. """ def __init__( self, num_classes, width_mult=1, loss='softmax', fc_dims=None, dropout_p=None, **kwargs ): super(MobileNetV2, self).__init__() self.loss = loss self.in_channels = int(32 * width_mult) self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280 # construct layers self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1) self.conv2 = self._make_layer( Bottleneck, 1, int(16 * width_mult), 1, 1 ) self.conv3 = self._make_layer( Bottleneck, 6, int(24 * width_mult), 2, 2 ) self.conv4 = self._make_layer( Bottleneck, 6, int(32 * width_mult), 3, 2 ) self.conv5 = self._make_layer( Bottleneck, 6, int(64 * width_mult), 4, 2 ) self.conv6 = self._make_layer( Bottleneck, 6, int(96 * width_mult), 3, 1 ) self.conv7 = self._make_layer( Bottleneck, 6, int(160 * width_mult), 3, 2 ) self.conv8 = self._make_layer( Bottleneck, 6, int(320 * width_mult), 1, 1 ) self.conv9 = ConvBlock(self.in_channels, self.feature_dim, 1) self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.fc = self._construct_fc_layer( fc_dims, self.feature_dim, dropout_p ) self.classifier = nn.Linear(self.feature_dim, num_classes) self._init_params() def _make_layer(self, block, t, c, n, s): # t: expansion factor # c: output channels # n: number of blocks # s: stride for first layer layers = [] layers.append(block(self.in_channels, c, t, s)) self.in_channels = c for i in range(1, n): layers.append(block(self.in_channels, c, t)) return nn.Sequential(*layers) def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): """Constructs fully connected layer. Args: fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed input_dim (int): input dimension dropout_p (float): dropout probability, if None, dropout is unused """ if fc_dims is None: self.feature_dim = input_dim return None assert isinstance( fc_dims, (list, tuple) ), 'fc_dims must be either list or tuple, but got {}'.format( type(fc_dims) ) layers = [] for dim in fc_dims: layers.append(nn.Linear(input_dim, dim)) layers.append(nn.BatchNorm1d(dim)) layers.append(nn.ReLU(inplace=True)) if dropout_p is not None: layers.append(nn.Dropout(p=dropout_p)) input_dim = dim self.feature_dim = fc_dims[-1] return nn.Sequential(*layers) def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) def featuremaps(self, x): x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = self.conv4(x) x = self.conv5(x) x = self.conv6(x) x = self.conv7(x) x = self.conv8(x) x = self.conv9(x) return x def forward(self, x): f = self.featuremaps(x) v = self.global_avgpool(f) v = v.view(v.size(0), -1) if self.fc is not None: v = self.fc(v) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError("Unsupported loss: {}".format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def mobilenetv2_x1_0(num_classes, loss, pretrained=True, **kwargs): model = MobileNetV2( num_classes, loss=loss, width_mult=1, fc_dims=None, dropout_p=None, **kwargs ) if pretrained: # init_pretrained_weights(model, model_urls['mobilenetv2_x1_0']) import warnings warnings.warn( 'The imagenet pretrained weights need to be manually downloaded from {}' .format(model_urls['mobilenetv2_x1_0']) ) return model def mobilenetv2_x1_4(num_classes, loss, pretrained=True, **kwargs): model = MobileNetV2( num_classes, loss=loss, width_mult=1.4, fc_dims=None, dropout_p=None, **kwargs ) if pretrained: # init_pretrained_weights(model, model_urls['mobilenetv2_x1_4']) import warnings warnings.warn( 'The imagenet pretrained weights need to be manually downloaded from {}' .format(model_urls['mobilenetv2_x1_4']) ) return model ================================================ FILE: torchreid/models/mudeep.py ================================================ from __future__ import division, absolute_import import torch from torch import nn from torch.nn import functional as F __all__ = ['MuDeep'] class ConvBlock(nn.Module): """Basic convolutional block. convolution + batch normalization + relu. Args: in_c (int): number of input channels. out_c (int): number of output channels. k (int or tuple): kernel size. s (int or tuple): stride. p (int or tuple): padding. """ def __init__(self, in_c, out_c, k, s, p): super(ConvBlock, self).__init__() self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p) self.bn = nn.BatchNorm2d(out_c) def forward(self, x): return F.relu(self.bn(self.conv(x))) class ConvLayers(nn.Module): """Preprocessing layers.""" def __init__(self): super(ConvLayers, self).__init__() self.conv1 = ConvBlock(3, 48, k=3, s=1, p=1) self.conv2 = ConvBlock(48, 96, k=3, s=1, p=1) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = self.maxpool(x) return x class MultiScaleA(nn.Module): """Multi-scale stream layer A (Sec.3.1)""" def __init__(self): super(MultiScaleA, self).__init__() self.stream1 = nn.Sequential( ConvBlock(96, 96, k=1, s=1, p=0), ConvBlock(96, 24, k=3, s=1, p=1), ) self.stream2 = nn.Sequential( nn.AvgPool2d(kernel_size=3, stride=1, padding=1), ConvBlock(96, 24, k=1, s=1, p=0), ) self.stream3 = ConvBlock(96, 24, k=1, s=1, p=0) self.stream4 = nn.Sequential( ConvBlock(96, 16, k=1, s=1, p=0), ConvBlock(16, 24, k=3, s=1, p=1), ConvBlock(24, 24, k=3, s=1, p=1), ) def forward(self, x): s1 = self.stream1(x) s2 = self.stream2(x) s3 = self.stream3(x) s4 = self.stream4(x) y = torch.cat([s1, s2, s3, s4], dim=1) return y class Reduction(nn.Module): """Reduction layer (Sec.3.1)""" def __init__(self): super(Reduction, self).__init__() self.stream1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.stream2 = ConvBlock(96, 96, k=3, s=2, p=1) self.stream3 = nn.Sequential( ConvBlock(96, 48, k=1, s=1, p=0), ConvBlock(48, 56, k=3, s=1, p=1), ConvBlock(56, 64, k=3, s=2, p=1), ) def forward(self, x): s1 = self.stream1(x) s2 = self.stream2(x) s3 = self.stream3(x) y = torch.cat([s1, s2, s3], dim=1) return y class MultiScaleB(nn.Module): """Multi-scale stream layer B (Sec.3.1)""" def __init__(self): super(MultiScaleB, self).__init__() self.stream1 = nn.Sequential( nn.AvgPool2d(kernel_size=3, stride=1, padding=1), ConvBlock(256, 256, k=1, s=1, p=0), ) self.stream2 = nn.Sequential( ConvBlock(256, 64, k=1, s=1, p=0), ConvBlock(64, 128, k=(1, 3), s=1, p=(0, 1)), ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)), ) self.stream3 = ConvBlock(256, 256, k=1, s=1, p=0) self.stream4 = nn.Sequential( ConvBlock(256, 64, k=1, s=1, p=0), ConvBlock(64, 64, k=(1, 3), s=1, p=(0, 1)), ConvBlock(64, 128, k=(3, 1), s=1, p=(1, 0)), ConvBlock(128, 128, k=(1, 3), s=1, p=(0, 1)), ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)), ) def forward(self, x): s1 = self.stream1(x) s2 = self.stream2(x) s3 = self.stream3(x) s4 = self.stream4(x) return s1, s2, s3, s4 class Fusion(nn.Module): """Saliency-based learning fusion layer (Sec.3.2)""" def __init__(self): super(Fusion, self).__init__() self.a1 = nn.Parameter(torch.rand(1, 256, 1, 1)) self.a2 = nn.Parameter(torch.rand(1, 256, 1, 1)) self.a3 = nn.Parameter(torch.rand(1, 256, 1, 1)) self.a4 = nn.Parameter(torch.rand(1, 256, 1, 1)) # We add an average pooling layer to reduce the spatial dimension # of feature maps, which differs from the original paper. self.avgpool = nn.AvgPool2d(kernel_size=4, stride=4, padding=0) def forward(self, x1, x2, x3, x4): s1 = self.a1.expand_as(x1) * x1 s2 = self.a2.expand_as(x2) * x2 s3 = self.a3.expand_as(x3) * x3 s4 = self.a4.expand_as(x4) * x4 y = self.avgpool(s1 + s2 + s3 + s4) return y class MuDeep(nn.Module): """Multiscale deep neural network. Reference: Qian et al. Multi-scale Deep Learning Architectures for Person Re-identification. ICCV 2017. Public keys: - ``mudeep``: Multiscale deep neural network. """ def __init__(self, num_classes, loss='softmax', **kwargs): super(MuDeep, self).__init__() self.loss = loss self.block1 = ConvLayers() self.block2 = MultiScaleA() self.block3 = Reduction() self.block4 = MultiScaleB() self.block5 = Fusion() # Due to this fully connected layer, input image has to be fixed # in shape, i.e. (3, 256, 128), such that the last convolutional feature # maps are of shape (256, 16, 8). If input shape is changed, # the input dimension of this layer has to be changed accordingly. self.fc = nn.Sequential( nn.Linear(256 * 16 * 8, 4096), nn.BatchNorm1d(4096), nn.ReLU(), ) self.classifier = nn.Linear(4096, num_classes) self.feat_dim = 4096 def featuremaps(self, x): x = self.block1(x) x = self.block2(x) x = self.block3(x) x = self.block4(x) x = self.block5(*x) return x def forward(self, x): x = self.featuremaps(x) x = x.view(x.size(0), -1) x = self.fc(x) y = self.classifier(x) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, x else: raise KeyError('Unsupported loss: {}'.format(self.loss)) ================================================ FILE: torchreid/models/nasnet.py ================================================ from __future__ import division, absolute_import import torch import torch.nn as nn import torch.nn.functional as F import torch.utils.model_zoo as model_zoo __all__ = ['nasnetamobile'] """ NASNet Mobile Thanks to Anastasiia (https://github.com/DagnyT) for the great help, support and motivation! ------------------------------------------------------------------------------------ Architecture | Top-1 Acc | Top-5 Acc | Multiply-Adds | Params (M) ------------------------------------------------------------------------------------ | NASNet-A (4 @ 1056) | 74.08% | 91.74% | 564 M | 5.3 | ------------------------------------------------------------------------------------ # References: - [Learning Transferable Architectures for Scalable Image Recognition] (https://arxiv.org/abs/1707.07012) """ """ Code imported from https://github.com/Cadene/pretrained-models.pytorch """ pretrained_settings = { 'nasnetamobile': { 'imagenet': { # 'url': 'https://github.com/veronikayurchuk/pretrained-models.pytorch/releases/download/v1.0/nasnetmobile-7e03cead.pth.tar', 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/nasnetamobile-7e03cead.pth', 'input_space': 'RGB', 'input_size': [3, 224, 224], # resize 256 'input_range': [0, 1], 'mean': [0.5, 0.5, 0.5], 'std': [0.5, 0.5, 0.5], 'num_classes': 1000 }, # 'imagenet+background': { # # 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/nasnetalarge-a1897284.pth', # 'input_space': 'RGB', # 'input_size': [3, 224, 224], # resize 256 # 'input_range': [0, 1], # 'mean': [0.5, 0.5, 0.5], # 'std': [0.5, 0.5, 0.5], # 'num_classes': 1001 # } } } class MaxPoolPad(nn.Module): def __init__(self): super(MaxPoolPad, self).__init__() self.pad = nn.ZeroPad2d((1, 0, 1, 0)) self.pool = nn.MaxPool2d(3, stride=2, padding=1) def forward(self, x): x = self.pad(x) x = self.pool(x) x = x[:, :, 1:, 1:].contiguous() return x class AvgPoolPad(nn.Module): def __init__(self, stride=2, padding=1): super(AvgPoolPad, self).__init__() self.pad = nn.ZeroPad2d((1, 0, 1, 0)) self.pool = nn.AvgPool2d( 3, stride=stride, padding=padding, count_include_pad=False ) def forward(self, x): x = self.pad(x) x = self.pool(x) x = x[:, :, 1:, 1:].contiguous() return x class SeparableConv2d(nn.Module): def __init__( self, in_channels, out_channels, dw_kernel, dw_stride, dw_padding, bias=False ): super(SeparableConv2d, self).__init__() self.depthwise_conv2d = nn.Conv2d( in_channels, in_channels, dw_kernel, stride=dw_stride, padding=dw_padding, bias=bias, groups=in_channels ) self.pointwise_conv2d = nn.Conv2d( in_channels, out_channels, 1, stride=1, bias=bias ) def forward(self, x): x = self.depthwise_conv2d(x) x = self.pointwise_conv2d(x) return x class BranchSeparables(nn.Module): def __init__( self, in_channels, out_channels, kernel_size, stride, padding, name=None, bias=False ): super(BranchSeparables, self).__init__() self.relu = nn.ReLU() self.separable_1 = SeparableConv2d( in_channels, in_channels, kernel_size, stride, padding, bias=bias ) self.bn_sep_1 = nn.BatchNorm2d( in_channels, eps=0.001, momentum=0.1, affine=True ) self.relu1 = nn.ReLU() self.separable_2 = SeparableConv2d( in_channels, out_channels, kernel_size, 1, padding, bias=bias ) self.bn_sep_2 = nn.BatchNorm2d( out_channels, eps=0.001, momentum=0.1, affine=True ) self.name = name def forward(self, x): x = self.relu(x) if self.name == 'specific': x = nn.ZeroPad2d((1, 0, 1, 0))(x) x = self.separable_1(x) if self.name == 'specific': x = x[:, :, 1:, 1:].contiguous() x = self.bn_sep_1(x) x = self.relu1(x) x = self.separable_2(x) x = self.bn_sep_2(x) return x class BranchSeparablesStem(nn.Module): def __init__( self, in_channels, out_channels, kernel_size, stride, padding, bias=False ): super(BranchSeparablesStem, self).__init__() self.relu = nn.ReLU() self.separable_1 = SeparableConv2d( in_channels, out_channels, kernel_size, stride, padding, bias=bias ) self.bn_sep_1 = nn.BatchNorm2d( out_channels, eps=0.001, momentum=0.1, affine=True ) self.relu1 = nn.ReLU() self.separable_2 = SeparableConv2d( out_channels, out_channels, kernel_size, 1, padding, bias=bias ) self.bn_sep_2 = nn.BatchNorm2d( out_channels, eps=0.001, momentum=0.1, affine=True ) def forward(self, x): x = self.relu(x) x = self.separable_1(x) x = self.bn_sep_1(x) x = self.relu1(x) x = self.separable_2(x) x = self.bn_sep_2(x) return x class BranchSeparablesReduction(BranchSeparables): def __init__( self, in_channels, out_channels, kernel_size, stride, padding, z_padding=1, bias=False ): BranchSeparables.__init__( self, in_channels, out_channels, kernel_size, stride, padding, bias ) self.padding = nn.ZeroPad2d((z_padding, 0, z_padding, 0)) def forward(self, x): x = self.relu(x) x = self.padding(x) x = self.separable_1(x) x = x[:, :, 1:, 1:].contiguous() x = self.bn_sep_1(x) x = self.relu1(x) x = self.separable_2(x) x = self.bn_sep_2(x) return x class CellStem0(nn.Module): def __init__(self, stem_filters, num_filters=42): super(CellStem0, self).__init__() self.num_filters = num_filters self.stem_filters = stem_filters self.conv_1x1 = nn.Sequential() self.conv_1x1.add_module('relu', nn.ReLU()) self.conv_1x1.add_module( 'conv', nn.Conv2d( self.stem_filters, self.num_filters, 1, stride=1, bias=False ) ) self.conv_1x1.add_module( 'bn', nn.BatchNorm2d( self.num_filters, eps=0.001, momentum=0.1, affine=True ) ) self.comb_iter_0_left = BranchSeparables( self.num_filters, self.num_filters, 5, 2, 2 ) self.comb_iter_0_right = BranchSeparablesStem( self.stem_filters, self.num_filters, 7, 2, 3, bias=False ) self.comb_iter_1_left = nn.MaxPool2d(3, stride=2, padding=1) self.comb_iter_1_right = BranchSeparablesStem( self.stem_filters, self.num_filters, 7, 2, 3, bias=False ) self.comb_iter_2_left = nn.AvgPool2d( 3, stride=2, padding=1, count_include_pad=False ) self.comb_iter_2_right = BranchSeparablesStem( self.stem_filters, self.num_filters, 5, 2, 2, bias=False ) self.comb_iter_3_right = nn.AvgPool2d( 3, stride=1, padding=1, count_include_pad=False ) self.comb_iter_4_left = BranchSeparables( self.num_filters, self.num_filters, 3, 1, 1, bias=False ) self.comb_iter_4_right = nn.MaxPool2d(3, stride=2, padding=1) def forward(self, x): x1 = self.conv_1x1(x) x_comb_iter_0_left = self.comb_iter_0_left(x1) x_comb_iter_0_right = self.comb_iter_0_right(x) x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right x_comb_iter_1_left = self.comb_iter_1_left(x1) x_comb_iter_1_right = self.comb_iter_1_right(x) x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right x_comb_iter_2_left = self.comb_iter_2_left(x1) x_comb_iter_2_right = self.comb_iter_2_right(x) x_comb_iter_2 = x_comb_iter_2_left + x_comb_iter_2_right x_comb_iter_3_right = self.comb_iter_3_right(x_comb_iter_0) x_comb_iter_3 = x_comb_iter_3_right + x_comb_iter_1 x_comb_iter_4_left = self.comb_iter_4_left(x_comb_iter_0) x_comb_iter_4_right = self.comb_iter_4_right(x1) x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right x_out = torch.cat( [x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1 ) return x_out class CellStem1(nn.Module): def __init__(self, stem_filters, num_filters): super(CellStem1, self).__init__() self.num_filters = num_filters self.stem_filters = stem_filters self.conv_1x1 = nn.Sequential() self.conv_1x1.add_module('relu', nn.ReLU()) self.conv_1x1.add_module( 'conv', nn.Conv2d( 2 * self.num_filters, self.num_filters, 1, stride=1, bias=False ) ) self.conv_1x1.add_module( 'bn', nn.BatchNorm2d( self.num_filters, eps=0.001, momentum=0.1, affine=True ) ) self.relu = nn.ReLU() self.path_1 = nn.Sequential() self.path_1.add_module( 'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False) ) self.path_1.add_module( 'conv', nn.Conv2d( self.stem_filters, self.num_filters // 2, 1, stride=1, bias=False ) ) self.path_2 = nn.ModuleList() self.path_2.add_module('pad', nn.ZeroPad2d((0, 1, 0, 1))) self.path_2.add_module( 'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False) ) self.path_2.add_module( 'conv', nn.Conv2d( self.stem_filters, self.num_filters // 2, 1, stride=1, bias=False ) ) self.final_path_bn = nn.BatchNorm2d( self.num_filters, eps=0.001, momentum=0.1, affine=True ) self.comb_iter_0_left = BranchSeparables( self.num_filters, self.num_filters, 5, 2, 2, name='specific', bias=False ) self.comb_iter_0_right = BranchSeparables( self.num_filters, self.num_filters, 7, 2, 3, name='specific', bias=False ) # self.comb_iter_1_left = nn.MaxPool2d(3, stride=2, padding=1) self.comb_iter_1_left = MaxPoolPad() self.comb_iter_1_right = BranchSeparables( self.num_filters, self.num_filters, 7, 2, 3, name='specific', bias=False ) # self.comb_iter_2_left = nn.AvgPool2d(3, stride=2, padding=1, count_include_pad=False) self.comb_iter_2_left = AvgPoolPad() self.comb_iter_2_right = BranchSeparables( self.num_filters, self.num_filters, 5, 2, 2, name='specific', bias=False ) self.comb_iter_3_right = nn.AvgPool2d( 3, stride=1, padding=1, count_include_pad=False ) self.comb_iter_4_left = BranchSeparables( self.num_filters, self.num_filters, 3, 1, 1, name='specific', bias=False ) # self.comb_iter_4_right = nn.MaxPool2d(3, stride=2, padding=1) self.comb_iter_4_right = MaxPoolPad() def forward(self, x_conv0, x_stem_0): x_left = self.conv_1x1(x_stem_0) x_relu = self.relu(x_conv0) # path 1 x_path1 = self.path_1(x_relu) # path 2 x_path2 = self.path_2.pad(x_relu) x_path2 = x_path2[:, :, 1:, 1:] x_path2 = self.path_2.avgpool(x_path2) x_path2 = self.path_2.conv(x_path2) # final path x_right = self.final_path_bn(torch.cat([x_path1, x_path2], 1)) x_comb_iter_0_left = self.comb_iter_0_left(x_left) x_comb_iter_0_right = self.comb_iter_0_right(x_right) x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right x_comb_iter_1_left = self.comb_iter_1_left(x_left) x_comb_iter_1_right = self.comb_iter_1_right(x_right) x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right x_comb_iter_2_left = self.comb_iter_2_left(x_left) x_comb_iter_2_right = self.comb_iter_2_right(x_right) x_comb_iter_2 = x_comb_iter_2_left + x_comb_iter_2_right x_comb_iter_3_right = self.comb_iter_3_right(x_comb_iter_0) x_comb_iter_3 = x_comb_iter_3_right + x_comb_iter_1 x_comb_iter_4_left = self.comb_iter_4_left(x_comb_iter_0) x_comb_iter_4_right = self.comb_iter_4_right(x_left) x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right x_out = torch.cat( [x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1 ) return x_out class FirstCell(nn.Module): def __init__( self, in_channels_left, out_channels_left, in_channels_right, out_channels_right ): super(FirstCell, self).__init__() self.conv_1x1 = nn.Sequential() self.conv_1x1.add_module('relu', nn.ReLU()) self.conv_1x1.add_module( 'conv', nn.Conv2d( in_channels_right, out_channels_right, 1, stride=1, bias=False ) ) self.conv_1x1.add_module( 'bn', nn.BatchNorm2d( out_channels_right, eps=0.001, momentum=0.1, affine=True ) ) self.relu = nn.ReLU() self.path_1 = nn.Sequential() self.path_1.add_module( 'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False) ) self.path_1.add_module( 'conv', nn.Conv2d( in_channels_left, out_channels_left, 1, stride=1, bias=False ) ) self.path_2 = nn.ModuleList() self.path_2.add_module('pad', nn.ZeroPad2d((0, 1, 0, 1))) self.path_2.add_module( 'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False) ) self.path_2.add_module( 'conv', nn.Conv2d( in_channels_left, out_channels_left, 1, stride=1, bias=False ) ) self.final_path_bn = nn.BatchNorm2d( out_channels_left * 2, eps=0.001, momentum=0.1, affine=True ) self.comb_iter_0_left = BranchSeparables( out_channels_right, out_channels_right, 5, 1, 2, bias=False ) self.comb_iter_0_right = BranchSeparables( out_channels_right, out_channels_right, 3, 1, 1, bias=False ) self.comb_iter_1_left = BranchSeparables( out_channels_right, out_channels_right, 5, 1, 2, bias=False ) self.comb_iter_1_right = BranchSeparables( out_channels_right, out_channels_right, 3, 1, 1, bias=False ) self.comb_iter_2_left = nn.AvgPool2d( 3, stride=1, padding=1, count_include_pad=False ) self.comb_iter_3_left = nn.AvgPool2d( 3, stride=1, padding=1, count_include_pad=False ) self.comb_iter_3_right = nn.AvgPool2d( 3, stride=1, padding=1, count_include_pad=False ) self.comb_iter_4_left = BranchSeparables( out_channels_right, out_channels_right, 3, 1, 1, bias=False ) def forward(self, x, x_prev): x_relu = self.relu(x_prev) # path 1 x_path1 = self.path_1(x_relu) # path 2 x_path2 = self.path_2.pad(x_relu) x_path2 = x_path2[:, :, 1:, 1:] x_path2 = self.path_2.avgpool(x_path2) x_path2 = self.path_2.conv(x_path2) # final path x_left = self.final_path_bn(torch.cat([x_path1, x_path2], 1)) x_right = self.conv_1x1(x) x_comb_iter_0_left = self.comb_iter_0_left(x_right) x_comb_iter_0_right = self.comb_iter_0_right(x_left) x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right x_comb_iter_1_left = self.comb_iter_1_left(x_left) x_comb_iter_1_right = self.comb_iter_1_right(x_left) x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right x_comb_iter_2_left = self.comb_iter_2_left(x_right) x_comb_iter_2 = x_comb_iter_2_left + x_left x_comb_iter_3_left = self.comb_iter_3_left(x_left) x_comb_iter_3_right = self.comb_iter_3_right(x_left) x_comb_iter_3 = x_comb_iter_3_left + x_comb_iter_3_right x_comb_iter_4_left = self.comb_iter_4_left(x_right) x_comb_iter_4 = x_comb_iter_4_left + x_right x_out = torch.cat( [ x_left, x_comb_iter_0, x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4 ], 1 ) return x_out class NormalCell(nn.Module): def __init__( self, in_channels_left, out_channels_left, in_channels_right, out_channels_right ): super(NormalCell, self).__init__() self.conv_prev_1x1 = nn.Sequential() self.conv_prev_1x1.add_module('relu', nn.ReLU()) self.conv_prev_1x1.add_module( 'conv', nn.Conv2d( in_channels_left, out_channels_left, 1, stride=1, bias=False ) ) self.conv_prev_1x1.add_module( 'bn', nn.BatchNorm2d( out_channels_left, eps=0.001, momentum=0.1, affine=True ) ) self.conv_1x1 = nn.Sequential() self.conv_1x1.add_module('relu', nn.ReLU()) self.conv_1x1.add_module( 'conv', nn.Conv2d( in_channels_right, out_channels_right, 1, stride=1, bias=False ) ) self.conv_1x1.add_module( 'bn', nn.BatchNorm2d( out_channels_right, eps=0.001, momentum=0.1, affine=True ) ) self.comb_iter_0_left = BranchSeparables( out_channels_right, out_channels_right, 5, 1, 2, bias=False ) self.comb_iter_0_right = BranchSeparables( out_channels_left, out_channels_left, 3, 1, 1, bias=False ) self.comb_iter_1_left = BranchSeparables( out_channels_left, out_channels_left, 5, 1, 2, bias=False ) self.comb_iter_1_right = BranchSeparables( out_channels_left, out_channels_left, 3, 1, 1, bias=False ) self.comb_iter_2_left = nn.AvgPool2d( 3, stride=1, padding=1, count_include_pad=False ) self.comb_iter_3_left = nn.AvgPool2d( 3, stride=1, padding=1, count_include_pad=False ) self.comb_iter_3_right = nn.AvgPool2d( 3, stride=1, padding=1, count_include_pad=False ) self.comb_iter_4_left = BranchSeparables( out_channels_right, out_channels_right, 3, 1, 1, bias=False ) def forward(self, x, x_prev): x_left = self.conv_prev_1x1(x_prev) x_right = self.conv_1x1(x) x_comb_iter_0_left = self.comb_iter_0_left(x_right) x_comb_iter_0_right = self.comb_iter_0_right(x_left) x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right x_comb_iter_1_left = self.comb_iter_1_left(x_left) x_comb_iter_1_right = self.comb_iter_1_right(x_left) x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right x_comb_iter_2_left = self.comb_iter_2_left(x_right) x_comb_iter_2 = x_comb_iter_2_left + x_left x_comb_iter_3_left = self.comb_iter_3_left(x_left) x_comb_iter_3_right = self.comb_iter_3_right(x_left) x_comb_iter_3 = x_comb_iter_3_left + x_comb_iter_3_right x_comb_iter_4_left = self.comb_iter_4_left(x_right) x_comb_iter_4 = x_comb_iter_4_left + x_right x_out = torch.cat( [ x_left, x_comb_iter_0, x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4 ], 1 ) return x_out class ReductionCell0(nn.Module): def __init__( self, in_channels_left, out_channels_left, in_channels_right, out_channels_right ): super(ReductionCell0, self).__init__() self.conv_prev_1x1 = nn.Sequential() self.conv_prev_1x1.add_module('relu', nn.ReLU()) self.conv_prev_1x1.add_module( 'conv', nn.Conv2d( in_channels_left, out_channels_left, 1, stride=1, bias=False ) ) self.conv_prev_1x1.add_module( 'bn', nn.BatchNorm2d( out_channels_left, eps=0.001, momentum=0.1, affine=True ) ) self.conv_1x1 = nn.Sequential() self.conv_1x1.add_module('relu', nn.ReLU()) self.conv_1x1.add_module( 'conv', nn.Conv2d( in_channels_right, out_channels_right, 1, stride=1, bias=False ) ) self.conv_1x1.add_module( 'bn', nn.BatchNorm2d( out_channels_right, eps=0.001, momentum=0.1, affine=True ) ) self.comb_iter_0_left = BranchSeparablesReduction( out_channels_right, out_channels_right, 5, 2, 2, bias=False ) self.comb_iter_0_right = BranchSeparablesReduction( out_channels_right, out_channels_right, 7, 2, 3, bias=False ) self.comb_iter_1_left = MaxPoolPad() self.comb_iter_1_right = BranchSeparablesReduction( out_channels_right, out_channels_right, 7, 2, 3, bias=False ) self.comb_iter_2_left = AvgPoolPad() self.comb_iter_2_right = BranchSeparablesReduction( out_channels_right, out_channels_right, 5, 2, 2, bias=False ) self.comb_iter_3_right = nn.AvgPool2d( 3, stride=1, padding=1, count_include_pad=False ) self.comb_iter_4_left = BranchSeparablesReduction( out_channels_right, out_channels_right, 3, 1, 1, bias=False ) self.comb_iter_4_right = MaxPoolPad() def forward(self, x, x_prev): x_left = self.conv_prev_1x1(x_prev) x_right = self.conv_1x1(x) x_comb_iter_0_left = self.comb_iter_0_left(x_right) x_comb_iter_0_right = self.comb_iter_0_right(x_left) x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right x_comb_iter_1_left = self.comb_iter_1_left(x_right) x_comb_iter_1_right = self.comb_iter_1_right(x_left) x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right x_comb_iter_2_left = self.comb_iter_2_left(x_right) x_comb_iter_2_right = self.comb_iter_2_right(x_left) x_comb_iter_2 = x_comb_iter_2_left + x_comb_iter_2_right x_comb_iter_3_right = self.comb_iter_3_right(x_comb_iter_0) x_comb_iter_3 = x_comb_iter_3_right + x_comb_iter_1 x_comb_iter_4_left = self.comb_iter_4_left(x_comb_iter_0) x_comb_iter_4_right = self.comb_iter_4_right(x_right) x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right x_out = torch.cat( [x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1 ) return x_out class ReductionCell1(nn.Module): def __init__( self, in_channels_left, out_channels_left, in_channels_right, out_channels_right ): super(ReductionCell1, self).__init__() self.conv_prev_1x1 = nn.Sequential() self.conv_prev_1x1.add_module('relu', nn.ReLU()) self.conv_prev_1x1.add_module( 'conv', nn.Conv2d( in_channels_left, out_channels_left, 1, stride=1, bias=False ) ) self.conv_prev_1x1.add_module( 'bn', nn.BatchNorm2d( out_channels_left, eps=0.001, momentum=0.1, affine=True ) ) self.conv_1x1 = nn.Sequential() self.conv_1x1.add_module('relu', nn.ReLU()) self.conv_1x1.add_module( 'conv', nn.Conv2d( in_channels_right, out_channels_right, 1, stride=1, bias=False ) ) self.conv_1x1.add_module( 'bn', nn.BatchNorm2d( out_channels_right, eps=0.001, momentum=0.1, affine=True ) ) self.comb_iter_0_left = BranchSeparables( out_channels_right, out_channels_right, 5, 2, 2, name='specific', bias=False ) self.comb_iter_0_right = BranchSeparables( out_channels_right, out_channels_right, 7, 2, 3, name='specific', bias=False ) # self.comb_iter_1_left = nn.MaxPool2d(3, stride=2, padding=1) self.comb_iter_1_left = MaxPoolPad() self.comb_iter_1_right = BranchSeparables( out_channels_right, out_channels_right, 7, 2, 3, name='specific', bias=False ) # self.comb_iter_2_left = nn.AvgPool2d(3, stride=2, padding=1, count_include_pad=False) self.comb_iter_2_left = AvgPoolPad() self.comb_iter_2_right = BranchSeparables( out_channels_right, out_channels_right, 5, 2, 2, name='specific', bias=False ) self.comb_iter_3_right = nn.AvgPool2d( 3, stride=1, padding=1, count_include_pad=False ) self.comb_iter_4_left = BranchSeparables( out_channels_right, out_channels_right, 3, 1, 1, name='specific', bias=False ) # self.comb_iter_4_right = nn.MaxPool2d(3, stride=2, padding=1) self.comb_iter_4_right = MaxPoolPad() def forward(self, x, x_prev): x_left = self.conv_prev_1x1(x_prev) x_right = self.conv_1x1(x) x_comb_iter_0_left = self.comb_iter_0_left(x_right) x_comb_iter_0_right = self.comb_iter_0_right(x_left) x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right x_comb_iter_1_left = self.comb_iter_1_left(x_right) x_comb_iter_1_right = self.comb_iter_1_right(x_left) x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right x_comb_iter_2_left = self.comb_iter_2_left(x_right) x_comb_iter_2_right = self.comb_iter_2_right(x_left) x_comb_iter_2 = x_comb_iter_2_left + x_comb_iter_2_right x_comb_iter_3_right = self.comb_iter_3_right(x_comb_iter_0) x_comb_iter_3 = x_comb_iter_3_right + x_comb_iter_1 x_comb_iter_4_left = self.comb_iter_4_left(x_comb_iter_0) x_comb_iter_4_right = self.comb_iter_4_right(x_right) x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right x_out = torch.cat( [x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1 ) return x_out class NASNetAMobile(nn.Module): """Neural Architecture Search (NAS). Reference: Zoph et al. Learning Transferable Architectures for Scalable Image Recognition. CVPR 2018. Public keys: - ``nasnetamobile``: NASNet-A Mobile. """ def __init__( self, num_classes, loss, stem_filters=32, penultimate_filters=1056, filters_multiplier=2, **kwargs ): super(NASNetAMobile, self).__init__() self.stem_filters = stem_filters self.penultimate_filters = penultimate_filters self.filters_multiplier = filters_multiplier self.loss = loss filters = self.penultimate_filters // 24 # 24 is default value for the architecture self.conv0 = nn.Sequential() self.conv0.add_module( 'conv', nn.Conv2d( in_channels=3, out_channels=self.stem_filters, kernel_size=3, padding=0, stride=2, bias=False ) ) self.conv0.add_module( 'bn', nn.BatchNorm2d( self.stem_filters, eps=0.001, momentum=0.1, affine=True ) ) self.cell_stem_0 = CellStem0( self.stem_filters, num_filters=filters // (filters_multiplier**2) ) self.cell_stem_1 = CellStem1( self.stem_filters, num_filters=filters // filters_multiplier ) self.cell_0 = FirstCell( in_channels_left=filters, out_channels_left=filters // 2, # 1, 0.5 in_channels_right=2 * filters, out_channels_right=filters ) # 2, 1 self.cell_1 = NormalCell( in_channels_left=2 * filters, out_channels_left=filters, # 2, 1 in_channels_right=6 * filters, out_channels_right=filters ) # 6, 1 self.cell_2 = NormalCell( in_channels_left=6 * filters, out_channels_left=filters, # 6, 1 in_channels_right=6 * filters, out_channels_right=filters ) # 6, 1 self.cell_3 = NormalCell( in_channels_left=6 * filters, out_channels_left=filters, # 6, 1 in_channels_right=6 * filters, out_channels_right=filters ) # 6, 1 self.reduction_cell_0 = ReductionCell0( in_channels_left=6 * filters, out_channels_left=2 * filters, # 6, 2 in_channels_right=6 * filters, out_channels_right=2 * filters ) # 6, 2 self.cell_6 = FirstCell( in_channels_left=6 * filters, out_channels_left=filters, # 6, 1 in_channels_right=8 * filters, out_channels_right=2 * filters ) # 8, 2 self.cell_7 = NormalCell( in_channels_left=8 * filters, out_channels_left=2 * filters, # 8, 2 in_channels_right=12 * filters, out_channels_right=2 * filters ) # 12, 2 self.cell_8 = NormalCell( in_channels_left=12 * filters, out_channels_left=2 * filters, # 12, 2 in_channels_right=12 * filters, out_channels_right=2 * filters ) # 12, 2 self.cell_9 = NormalCell( in_channels_left=12 * filters, out_channels_left=2 * filters, # 12, 2 in_channels_right=12 * filters, out_channels_right=2 * filters ) # 12, 2 self.reduction_cell_1 = ReductionCell1( in_channels_left=12 * filters, out_channels_left=4 * filters, # 12, 4 in_channels_right=12 * filters, out_channels_right=4 * filters ) # 12, 4 self.cell_12 = FirstCell( in_channels_left=12 * filters, out_channels_left=2 * filters, # 12, 2 in_channels_right=16 * filters, out_channels_right=4 * filters ) # 16, 4 self.cell_13 = NormalCell( in_channels_left=16 * filters, out_channels_left=4 * filters, # 16, 4 in_channels_right=24 * filters, out_channels_right=4 * filters ) # 24, 4 self.cell_14 = NormalCell( in_channels_left=24 * filters, out_channels_left=4 * filters, # 24, 4 in_channels_right=24 * filters, out_channels_right=4 * filters ) # 24, 4 self.cell_15 = NormalCell( in_channels_left=24 * filters, out_channels_left=4 * filters, # 24, 4 in_channels_right=24 * filters, out_channels_right=4 * filters ) # 24, 4 self.relu = nn.ReLU() self.dropout = nn.Dropout() self.classifier = nn.Linear(24 * filters, num_classes) self._init_params() def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) def features(self, input): x_conv0 = self.conv0(input) x_stem_0 = self.cell_stem_0(x_conv0) x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0) x_cell_0 = self.cell_0(x_stem_1, x_stem_0) x_cell_1 = self.cell_1(x_cell_0, x_stem_1) x_cell_2 = self.cell_2(x_cell_1, x_cell_0) x_cell_3 = self.cell_3(x_cell_2, x_cell_1) x_reduction_cell_0 = self.reduction_cell_0(x_cell_3, x_cell_2) x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_3) x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0) x_cell_8 = self.cell_8(x_cell_7, x_cell_6) x_cell_9 = self.cell_9(x_cell_8, x_cell_7) x_reduction_cell_1 = self.reduction_cell_1(x_cell_9, x_cell_8) x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_9) x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1) x_cell_14 = self.cell_14(x_cell_13, x_cell_12) x_cell_15 = self.cell_15(x_cell_14, x_cell_13) x_cell_15 = self.relu(x_cell_15) x_cell_15 = F.avg_pool2d( x_cell_15, x_cell_15.size()[2:] ) # global average pool x_cell_15 = x_cell_15.view(x_cell_15.size(0), -1) x_cell_15 = self.dropout(x_cell_15) return x_cell_15 def forward(self, input): v = self.features(input) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError('Unsupported loss: {}'.format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def nasnetamobile(num_classes, loss='softmax', pretrained=True, **kwargs): model = NASNetAMobile(num_classes, loss, **kwargs) if pretrained: model_url = pretrained_settings['nasnetamobile']['imagenet']['url'] init_pretrained_weights(model, model_url) return model ================================================ FILE: torchreid/models/osnet.py ================================================ from __future__ import division, absolute_import import warnings import torch from torch import nn from torch.nn import functional as F __all__ = [ 'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25', 'osnet_ibn_x1_0' ] pretrained_urls = { 'osnet_x1_0': 'https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY', 'osnet_x0_75': 'https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq', 'osnet_x0_5': 'https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i', 'osnet_x0_25': 'https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs', 'osnet_ibn_x1_0': 'https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l' } ########## # Basic layers ########## class ConvLayer(nn.Module): """Convolution layer (conv + bn + relu).""" def __init__( self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, IN=False ): super(ConvLayer, self).__init__() self.conv = nn.Conv2d( in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False, groups=groups ) if IN: self.bn = nn.InstanceNorm2d(out_channels, affine=True) else: self.bn = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU(inplace=True) def forward(self, x): x = self.conv(x) x = self.bn(x) x = self.relu(x) return x class Conv1x1(nn.Module): """1x1 convolution + bn + relu.""" def __init__(self, in_channels, out_channels, stride=1, groups=1): super(Conv1x1, self).__init__() self.conv = nn.Conv2d( in_channels, out_channels, 1, stride=stride, padding=0, bias=False, groups=groups ) self.bn = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU(inplace=True) def forward(self, x): x = self.conv(x) x = self.bn(x) x = self.relu(x) return x class Conv1x1Linear(nn.Module): """1x1 convolution + bn (w/o non-linearity).""" def __init__(self, in_channels, out_channels, stride=1): super(Conv1x1Linear, self).__init__() self.conv = nn.Conv2d( in_channels, out_channels, 1, stride=stride, padding=0, bias=False ) self.bn = nn.BatchNorm2d(out_channels) def forward(self, x): x = self.conv(x) x = self.bn(x) return x class Conv3x3(nn.Module): """3x3 convolution + bn + relu.""" def __init__(self, in_channels, out_channels, stride=1, groups=1): super(Conv3x3, self).__init__() self.conv = nn.Conv2d( in_channels, out_channels, 3, stride=stride, padding=1, bias=False, groups=groups ) self.bn = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU(inplace=True) def forward(self, x): x = self.conv(x) x = self.bn(x) x = self.relu(x) return x class LightConv3x3(nn.Module): """Lightweight 3x3 convolution. 1x1 (linear) + dw 3x3 (nonlinear). """ def __init__(self, in_channels, out_channels): super(LightConv3x3, self).__init__() self.conv1 = nn.Conv2d( in_channels, out_channels, 1, stride=1, padding=0, bias=False ) self.conv2 = nn.Conv2d( out_channels, out_channels, 3, stride=1, padding=1, bias=False, groups=out_channels ) self.bn = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU(inplace=True) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = self.bn(x) x = self.relu(x) return x ########## # Building blocks for omni-scale feature learning ########## class ChannelGate(nn.Module): """A mini-network that generates channel-wise gates conditioned on input tensor.""" def __init__( self, in_channels, num_gates=None, return_gates=False, gate_activation='sigmoid', reduction=16, layer_norm=False ): super(ChannelGate, self).__init__() if num_gates is None: num_gates = in_channels self.return_gates = return_gates self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.fc1 = nn.Conv2d( in_channels, in_channels // reduction, kernel_size=1, bias=True, padding=0 ) self.norm1 = None if layer_norm: self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1)) self.relu = nn.ReLU(inplace=True) self.fc2 = nn.Conv2d( in_channels // reduction, num_gates, kernel_size=1, bias=True, padding=0 ) if gate_activation == 'sigmoid': self.gate_activation = nn.Sigmoid() elif gate_activation == 'relu': self.gate_activation = nn.ReLU(inplace=True) elif gate_activation == 'linear': self.gate_activation = None else: raise RuntimeError( "Unknown gate activation: {}".format(gate_activation) ) def forward(self, x): input = x x = self.global_avgpool(x) x = self.fc1(x) if self.norm1 is not None: x = self.norm1(x) x = self.relu(x) x = self.fc2(x) if self.gate_activation is not None: x = self.gate_activation(x) if self.return_gates: return x return input * x class OSBlock(nn.Module): """Omni-scale feature learning block.""" def __init__( self, in_channels, out_channels, IN=False, bottleneck_reduction=4, **kwargs ): super(OSBlock, self).__init__() mid_channels = out_channels // bottleneck_reduction self.conv1 = Conv1x1(in_channels, mid_channels) self.conv2a = LightConv3x3(mid_channels, mid_channels) self.conv2b = nn.Sequential( LightConv3x3(mid_channels, mid_channels), LightConv3x3(mid_channels, mid_channels), ) self.conv2c = nn.Sequential( LightConv3x3(mid_channels, mid_channels), LightConv3x3(mid_channels, mid_channels), LightConv3x3(mid_channels, mid_channels), ) self.conv2d = nn.Sequential( LightConv3x3(mid_channels, mid_channels), LightConv3x3(mid_channels, mid_channels), LightConv3x3(mid_channels, mid_channels), LightConv3x3(mid_channels, mid_channels), ) self.gate = ChannelGate(mid_channels) self.conv3 = Conv1x1Linear(mid_channels, out_channels) self.downsample = None if in_channels != out_channels: self.downsample = Conv1x1Linear(in_channels, out_channels) self.IN = None if IN: self.IN = nn.InstanceNorm2d(out_channels, affine=True) def forward(self, x): identity = x x1 = self.conv1(x) x2a = self.conv2a(x1) x2b = self.conv2b(x1) x2c = self.conv2c(x1) x2d = self.conv2d(x1) x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d) x3 = self.conv3(x2) if self.downsample is not None: identity = self.downsample(identity) out = x3 + identity if self.IN is not None: out = self.IN(out) return F.relu(out) ########## # Network architecture ########## class OSNet(nn.Module): """Omni-Scale Network. Reference: - Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019. - Zhou et al. Learning Generalisable Omni-Scale Representations for Person Re-Identification. arXiv preprint, 2019. """ def __init__( self, num_classes, blocks, layers, channels, feature_dim=512, loss='softmax', IN=False, **kwargs ): super(OSNet, self).__init__() num_blocks = len(blocks) assert num_blocks == len(layers) assert num_blocks == len(channels) - 1 self.loss = loss # convolutional backbone self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN) self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) self.conv2 = self._make_layer( blocks[0], layers[0], channels[0], channels[1], reduce_spatial_size=True, IN=IN ) self.conv3 = self._make_layer( blocks[1], layers[1], channels[1], channels[2], reduce_spatial_size=True ) self.conv4 = self._make_layer( blocks[2], layers[2], channels[2], channels[3], reduce_spatial_size=False ) self.conv5 = Conv1x1(channels[3], channels[3]) self.global_avgpool = nn.AdaptiveAvgPool2d(1) # fully connected layer self.fc = self._construct_fc_layer( feature_dim, channels[3], dropout_p=None ) # identity classification layer self.classifier = nn.Linear(self.feature_dim, num_classes) self._init_params() def _make_layer( self, block, layer, in_channels, out_channels, reduce_spatial_size, IN=False ): layers = [] layers.append(block(in_channels, out_channels, IN=IN)) for i in range(1, layer): layers.append(block(out_channels, out_channels, IN=IN)) if reduce_spatial_size: layers.append( nn.Sequential( Conv1x1(out_channels, out_channels), nn.AvgPool2d(2, stride=2) ) ) return nn.Sequential(*layers) def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): if fc_dims is None or fc_dims < 0: self.feature_dim = input_dim return None if isinstance(fc_dims, int): fc_dims = [fc_dims] layers = [] for dim in fc_dims: layers.append(nn.Linear(input_dim, dim)) layers.append(nn.BatchNorm1d(dim)) layers.append(nn.ReLU(inplace=True)) if dropout_p is not None: layers.append(nn.Dropout(p=dropout_p)) input_dim = dim self.feature_dim = fc_dims[-1] return nn.Sequential(*layers) def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) def featuremaps(self, x): x = self.conv1(x) x = self.maxpool(x) x = self.conv2(x) x = self.conv3(x) x = self.conv4(x) x = self.conv5(x) return x def forward(self, x, return_featuremaps=False): x = self.featuremaps(x) if return_featuremaps: return x v = self.global_avgpool(x) v = v.view(v.size(0), -1) if self.fc is not None: v = self.fc(v) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError("Unsupported loss: {}".format(self.loss)) def init_pretrained_weights(model, key=''): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ import os import errno import gdown from collections import OrderedDict def _get_torch_home(): ENV_TORCH_HOME = 'TORCH_HOME' ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' DEFAULT_CACHE_DIR = '~/.cache' torch_home = os.path.expanduser( os.getenv( ENV_TORCH_HOME, os.path.join( os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch' ) ) ) return torch_home torch_home = _get_torch_home() model_dir = os.path.join(torch_home, 'checkpoints') try: os.makedirs(model_dir) except OSError as e: if e.errno == errno.EEXIST: # Directory already exists, ignore. pass else: # Unexpected OSError, re-raise. raise filename = key + '_imagenet.pth' cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): gdown.download(pretrained_urls[key], cached_file, quiet=False) state_dict = torch.load(cached_file) model_dict = model.state_dict() new_state_dict = OrderedDict() matched_layers, discarded_layers = [], [] for k, v in state_dict.items(): if k.startswith('module.'): k = k[7:] # discard module. if k in model_dict and model_dict[k].size() == v.size(): new_state_dict[k] = v matched_layers.append(k) else: discarded_layers.append(k) model_dict.update(new_state_dict) model.load_state_dict(model_dict) if len(matched_layers) == 0: warnings.warn( 'The pretrained weights from "{}" cannot be loaded, ' 'please check the key names manually ' '(** ignored and continue **)'.format(cached_file) ) else: print( 'Successfully loaded imagenet pretrained weights from "{}"'. format(cached_file) ) if len(discarded_layers) > 0: print( '** The following layers are discarded ' 'due to unmatched keys or layer size: {}'. format(discarded_layers) ) ########## # Instantiation ########## def osnet_x1_0(num_classes=1000, pretrained=True, loss='softmax', **kwargs): # standard size (width x1.0) model = OSNet( num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, **kwargs ) if pretrained: init_pretrained_weights(model, key='osnet_x1_0') return model def osnet_x0_75(num_classes=1000, pretrained=True, loss='softmax', **kwargs): # medium size (width x0.75) model = OSNet( num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[48, 192, 288, 384], loss=loss, **kwargs ) if pretrained: init_pretrained_weights(model, key='osnet_x0_75') return model def osnet_x0_5(num_classes=1000, pretrained=True, loss='softmax', **kwargs): # tiny size (width x0.5) model = OSNet( num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[32, 128, 192, 256], loss=loss, **kwargs ) if pretrained: init_pretrained_weights(model, key='osnet_x0_5') return model def osnet_x0_25(num_classes=1000, pretrained=True, loss='softmax', **kwargs): # very tiny size (width x0.25) model = OSNet( num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[16, 64, 96, 128], loss=loss, **kwargs ) if pretrained: init_pretrained_weights(model, key='osnet_x0_25') return model def osnet_ibn_x1_0( num_classes=1000, pretrained=True, loss='softmax', **kwargs ): # standard size (width x1.0) + IBN layer # Ref: Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV, 2018. model = OSNet( num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, IN=True, **kwargs ) if pretrained: init_pretrained_weights(model, key='osnet_ibn_x1_0') return model ================================================ FILE: torchreid/models/osnet_ain.py ================================================ from __future__ import division, absolute_import import warnings import torch from torch import nn from torch.nn import functional as F __all__ = ['osnet_ain_x1_0'] pretrained_urls = { 'osnet_ain_x1_0': 'https://drive.google.com/uc?id=1-CaioD9NaqbHK_kzSMW8VE4_3KcsRjEo' } ########## # Basic layers ########## class ConvLayer(nn.Module): """Convolution layer (conv + bn + relu).""" def __init__( self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, IN=False ): super(ConvLayer, self).__init__() self.conv = nn.Conv2d( in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False, groups=groups ) if IN: self.bn = nn.InstanceNorm2d(out_channels, affine=True) else: self.bn = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU() def forward(self, x): x = self.conv(x) x = self.bn(x) return self.relu(x) class Conv1x1(nn.Module): """1x1 convolution + bn + relu.""" def __init__(self, in_channels, out_channels, stride=1, groups=1): super(Conv1x1, self).__init__() self.conv = nn.Conv2d( in_channels, out_channels, 1, stride=stride, padding=0, bias=False, groups=groups ) self.bn = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU() def forward(self, x): x = self.conv(x) x = self.bn(x) return self.relu(x) class Conv1x1Linear(nn.Module): """1x1 convolution + bn (w/o non-linearity).""" def __init__(self, in_channels, out_channels, stride=1, bn=True): super(Conv1x1Linear, self).__init__() self.conv = nn.Conv2d( in_channels, out_channels, 1, stride=stride, padding=0, bias=False ) self.bn = None if bn: self.bn = nn.BatchNorm2d(out_channels) def forward(self, x): x = self.conv(x) if self.bn is not None: x = self.bn(x) return x class Conv3x3(nn.Module): """3x3 convolution + bn + relu.""" def __init__(self, in_channels, out_channels, stride=1, groups=1): super(Conv3x3, self).__init__() self.conv = nn.Conv2d( in_channels, out_channels, 3, stride=stride, padding=1, bias=False, groups=groups ) self.bn = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU() def forward(self, x): x = self.conv(x) x = self.bn(x) return self.relu(x) class LightConv3x3(nn.Module): """Lightweight 3x3 convolution. 1x1 (linear) + dw 3x3 (nonlinear). """ def __init__(self, in_channels, out_channels): super(LightConv3x3, self).__init__() self.conv1 = nn.Conv2d( in_channels, out_channels, 1, stride=1, padding=0, bias=False ) self.conv2 = nn.Conv2d( out_channels, out_channels, 3, stride=1, padding=1, bias=False, groups=out_channels ) self.bn = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU() def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = self.bn(x) return self.relu(x) class LightConvStream(nn.Module): """Lightweight convolution stream.""" def __init__(self, in_channels, out_channels, depth): super(LightConvStream, self).__init__() assert depth >= 1, 'depth must be equal to or larger than 1, but got {}'.format( depth ) layers = [] layers += [LightConv3x3(in_channels, out_channels)] for i in range(depth - 1): layers += [LightConv3x3(out_channels, out_channels)] self.layers = nn.Sequential(*layers) def forward(self, x): return self.layers(x) ########## # Building blocks for omni-scale feature learning ########## class ChannelGate(nn.Module): """A mini-network that generates channel-wise gates conditioned on input tensor.""" def __init__( self, in_channels, num_gates=None, return_gates=False, gate_activation='sigmoid', reduction=16, layer_norm=False ): super(ChannelGate, self).__init__() if num_gates is None: num_gates = in_channels self.return_gates = return_gates self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.fc1 = nn.Conv2d( in_channels, in_channels // reduction, kernel_size=1, bias=True, padding=0 ) self.norm1 = None if layer_norm: self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1)) self.relu = nn.ReLU() self.fc2 = nn.Conv2d( in_channels // reduction, num_gates, kernel_size=1, bias=True, padding=0 ) if gate_activation == 'sigmoid': self.gate_activation = nn.Sigmoid() elif gate_activation == 'relu': self.gate_activation = nn.ReLU() elif gate_activation == 'linear': self.gate_activation = None else: raise RuntimeError( "Unknown gate activation: {}".format(gate_activation) ) def forward(self, x): input = x x = self.global_avgpool(x) x = self.fc1(x) if self.norm1 is not None: x = self.norm1(x) x = self.relu(x) x = self.fc2(x) if self.gate_activation is not None: x = self.gate_activation(x) if self.return_gates: return x return input * x class OSBlock(nn.Module): """Omni-scale feature learning block.""" def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs): super(OSBlock, self).__init__() assert T >= 1 assert out_channels >= reduction and out_channels % reduction == 0 mid_channels = out_channels // reduction self.conv1 = Conv1x1(in_channels, mid_channels) self.conv2 = nn.ModuleList() for t in range(1, T + 1): self.conv2 += [LightConvStream(mid_channels, mid_channels, t)] self.gate = ChannelGate(mid_channels) self.conv3 = Conv1x1Linear(mid_channels, out_channels) self.downsample = None if in_channels != out_channels: self.downsample = Conv1x1Linear(in_channels, out_channels) def forward(self, x): identity = x x1 = self.conv1(x) x2 = 0 for conv2_t in self.conv2: x2_t = conv2_t(x1) x2 = x2 + self.gate(x2_t) x3 = self.conv3(x2) if self.downsample is not None: identity = self.downsample(identity) out = x3 + identity return F.relu(out) class OSBlockINin(nn.Module): """Omni-scale feature learning block with instance normalization.""" def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs): super(OSBlockINin, self).__init__() assert T >= 1 assert out_channels >= reduction and out_channels % reduction == 0 mid_channels = out_channels // reduction self.conv1 = Conv1x1(in_channels, mid_channels) self.conv2 = nn.ModuleList() for t in range(1, T + 1): self.conv2 += [LightConvStream(mid_channels, mid_channels, t)] self.gate = ChannelGate(mid_channels) self.conv3 = Conv1x1Linear(mid_channels, out_channels, bn=False) self.downsample = None if in_channels != out_channels: self.downsample = Conv1x1Linear(in_channels, out_channels) self.IN = nn.InstanceNorm2d(out_channels, affine=True) def forward(self, x): identity = x x1 = self.conv1(x) x2 = 0 for conv2_t in self.conv2: x2_t = conv2_t(x1) x2 = x2 + self.gate(x2_t) x3 = self.conv3(x2) x3 = self.IN(x3) # IN inside residual if self.downsample is not None: identity = self.downsample(identity) out = x3 + identity return F.relu(out) ########## # Network architecture ########## class OSNet(nn.Module): """Omni-Scale Network. Reference: - Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019. - Zhou et al. Learning Generalisable Omni-Scale Representations for Person Re-Identification. arXiv preprint, 2019. """ def __init__( self, num_classes, blocks, layers, channels, feature_dim=512, loss='softmax', conv1_IN=False, **kwargs ): super(OSNet, self).__init__() num_blocks = len(blocks) assert num_blocks == len(layers) assert num_blocks == len(channels) - 1 self.loss = loss self.feature_dim = feature_dim # convolutional backbone self.conv1 = ConvLayer( 3, channels[0], 7, stride=2, padding=3, IN=conv1_IN ) self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) self.conv2 = self._make_layer( blocks[0], layers[0], channels[0], channels[1] ) self.pool2 = nn.Sequential( Conv1x1(channels[1], channels[1]), nn.AvgPool2d(2, stride=2) ) self.conv3 = self._make_layer( blocks[1], layers[1], channels[1], channels[2] ) self.pool3 = nn.Sequential( Conv1x1(channels[2], channels[2]), nn.AvgPool2d(2, stride=2) ) self.conv4 = self._make_layer( blocks[2], layers[2], channels[2], channels[3] ) self.conv5 = Conv1x1(channels[3], channels[3]) self.global_avgpool = nn.AdaptiveAvgPool2d(1) # fully connected layer self.fc = self._construct_fc_layer( self.feature_dim, channels[3], dropout_p=None ) # identity classification layer self.classifier = nn.Linear(self.feature_dim, num_classes) self._init_params() def _make_layer(self, blocks, layer, in_channels, out_channels): layers = [] layers += [blocks[0](in_channels, out_channels)] for i in range(1, len(blocks)): layers += [blocks[i](out_channels, out_channels)] return nn.Sequential(*layers) def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): if fc_dims is None or fc_dims < 0: self.feature_dim = input_dim return None if isinstance(fc_dims, int): fc_dims = [fc_dims] layers = [] for dim in fc_dims: layers.append(nn.Linear(input_dim, dim)) layers.append(nn.BatchNorm1d(dim)) layers.append(nn.ReLU()) if dropout_p is not None: layers.append(nn.Dropout(p=dropout_p)) input_dim = dim self.feature_dim = fc_dims[-1] return nn.Sequential(*layers) def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.InstanceNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) def featuremaps(self, x): x = self.conv1(x) x = self.maxpool(x) x = self.conv2(x) x = self.pool2(x) x = self.conv3(x) x = self.pool3(x) x = self.conv4(x) x = self.conv5(x) return x def forward(self, x, return_featuremaps=False): x = self.featuremaps(x) if return_featuremaps: return x v = self.global_avgpool(x) v = v.view(v.size(0), -1) if self.fc is not None: v = self.fc(v) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError("Unsupported loss: {}".format(self.loss)) def init_pretrained_weights(model, key=''): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ import os import errno import gdown from collections import OrderedDict def _get_torch_home(): ENV_TORCH_HOME = 'TORCH_HOME' ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' DEFAULT_CACHE_DIR = '~/.cache' torch_home = os.path.expanduser( os.getenv( ENV_TORCH_HOME, os.path.join( os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch' ) ) ) return torch_home torch_home = _get_torch_home() model_dir = os.path.join(torch_home, 'checkpoints') try: os.makedirs(model_dir) except OSError as e: if e.errno == errno.EEXIST: # Directory already exists, ignore. pass else: # Unexpected OSError, re-raise. raise filename = key + '_imagenet.pth' cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): gdown.download(pretrained_urls[key], cached_file, quiet=False) state_dict = torch.load(cached_file) model_dict = model.state_dict() new_state_dict = OrderedDict() matched_layers, discarded_layers = [], [] for k, v in state_dict.items(): if k.startswith('module.'): k = k[7:] # discard module. if k in model_dict and model_dict[k].size() == v.size(): new_state_dict[k] = v matched_layers.append(k) else: discarded_layers.append(k) model_dict.update(new_state_dict) model.load_state_dict(model_dict) if len(matched_layers) == 0: warnings.warn( 'The pretrained weights from "{}" cannot be loaded, ' 'please check the key names manually ' '(** ignored and continue **)'.format(cached_file) ) else: print( 'Successfully loaded imagenet pretrained weights from "{}"'. format(cached_file) ) if len(discarded_layers) > 0: print( '** The following layers are discarded ' 'due to unmatched keys or layer size: {}'. format(discarded_layers) ) ########## # Instantiation ########## def osnet_ain_x1_0( num_classes=1000, pretrained=True, loss='softmax', **kwargs ): model = OSNet( num_classes, blocks=[ [OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock] ], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, conv1_IN=True, **kwargs ) if pretrained: init_pretrained_weights(model, key='osnet_ain_x1_0') return model ================================================ FILE: torchreid/models/pcb.py ================================================ from __future__ import division, absolute_import import torch.utils.model_zoo as model_zoo from torch import nn from torch.nn import functional as F __all__ = ['pcb_p6', 'pcb_p4'] model_urls = { 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', } def conv3x3(in_planes, out_planes, stride=1): """3x3 convolution with padding""" return nn.Conv2d( in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False ) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d( planes, planes, kernel_size=3, stride=stride, padding=1, bias=False ) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d( planes, planes * self.expansion, kernel_size=1, bias=False ) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class DimReduceLayer(nn.Module): def __init__(self, in_channels, out_channels, nonlinear): super(DimReduceLayer, self).__init__() layers = [] layers.append( nn.Conv2d( in_channels, out_channels, 1, stride=1, padding=0, bias=False ) ) layers.append(nn.BatchNorm2d(out_channels)) if nonlinear == 'relu': layers.append(nn.ReLU(inplace=True)) elif nonlinear == 'leakyrelu': layers.append(nn.LeakyReLU(0.1)) self.layers = nn.Sequential(*layers) def forward(self, x): return self.layers(x) class PCB(nn.Module): """Part-based Convolutional Baseline. Reference: Sun et al. Beyond Part Models: Person Retrieval with Refined Part Pooling (and A Strong Convolutional Baseline). ECCV 2018. Public keys: - ``pcb_p4``: PCB with 4-part strips. - ``pcb_p6``: PCB with 6-part strips. """ def __init__( self, num_classes, loss, block, layers, parts=6, reduced_dim=256, nonlinear='relu', **kwargs ): self.inplanes = 64 super(PCB, self).__init__() self.loss = loss self.parts = parts self.feature_dim = 512 * block.expansion # backbone network self.conv1 = nn.Conv2d( 3, 64, kernel_size=7, stride=2, padding=3, bias=False ) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=1) # pcb layers self.parts_avgpool = nn.AdaptiveAvgPool2d((self.parts, 1)) self.dropout = nn.Dropout(p=0.5) self.conv5 = DimReduceLayer( 512 * block.expansion, reduced_dim, nonlinear=nonlinear ) self.feature_dim = reduced_dim self.classifier = nn.ModuleList( [ nn.Linear(self.feature_dim, num_classes) for _ in range(self.parts) ] ) self._init_params() def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d( self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False ), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) def featuremaps(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) return x def forward(self, x): f = self.featuremaps(x) v_g = self.parts_avgpool(f) if not self.training: v_g = F.normalize(v_g, p=2, dim=1) return v_g.view(v_g.size(0), -1) v_g = self.dropout(v_g) v_h = self.conv5(v_g) y = [] for i in range(self.parts): v_h_i = v_h[:, :, i, :] v_h_i = v_h_i.view(v_h_i.size(0), -1) y_i = self.classifier[i](v_h_i) y.append(y_i) if self.loss == 'softmax': return y elif self.loss == 'triplet': v_g = F.normalize(v_g, p=2, dim=1) return y, v_g.view(v_g.size(0), -1) else: raise KeyError('Unsupported loss: {}'.format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def pcb_p6(num_classes, loss='softmax', pretrained=True, **kwargs): model = PCB( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=1, parts=6, reduced_dim=256, nonlinear='relu', **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model def pcb_p4(num_classes, loss='softmax', pretrained=True, **kwargs): model = PCB( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=1, parts=4, reduced_dim=256, nonlinear='relu', **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model ================================================ FILE: torchreid/models/pvpm.py ================================================ from __future__ import absolute_import from __future__ import division # Source: https://github.com/hh23333/PVPM __all__ = ['pcb_p6', 'pcb_p4', 'pose_resnet50_256_p4', 'pose_resnet50_256_p6', 'pose_resnet50_256_p6_pscore_reg', 'pose_resnet50_256_p4_pscore_reg'] import torch from torch import nn from torch.nn import functional as F import torchvision import torch.utils.model_zoo as model_zoo from .osnet import ConvLayer, Conv1x1, Conv1x1Linear, Conv3x3, LightConv3x3, OSBlock model_urls = { 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', } def conv3x3(in_planes, out_planes, stride=1): """3x3 convolution with padding""" return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class DimReduceLayer(nn.Module): def __init__(self, in_channels, out_channels, nonlinear): super(DimReduceLayer, self).__init__() layers = [] layers.append(nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False)) layers.append(nn.BatchNorm2d(out_channels)) if nonlinear == 'relu': layers.append(nn.ReLU(inplace=True)) elif nonlinear == 'leakyrelu': layers.append(nn.LeakyReLU(0.1)) self.layers = nn.Sequential(*layers) def forward(self, x): return self.layers(x) class PCB(nn.Module): """Part-based Convolutional Baseline. Reference: Sun et al. Beyond Part Models: Person Retrieval with Refined Part Pooling (and A Strong Convolutional Baseline). ECCV 2018. Public keys: - ``pcb_p4``: PCB with 4-part strips. - ``pcb_p6``: PCB with 6-part strips. """ def __init__(self, num_classes, loss, block, layers, parts=6, reduced_dim=256, nonlinear='relu', **kwargs): self.inplanes = 64 super(PCB, self).__init__() self.loss = loss self.parts = parts self.feature_dim = 512 * block.expansion # backbone network self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=1) # pcb layers self.parts_avgpool = nn.AdaptiveAvgPool2d((self.parts, 1)) self.dropout = nn.Dropout(p=0.5) self.em = nn.ModuleList( # TODO: before = DimReduceLayer [self._construct_em_layer(reduced_dim, 512 * block.expansion) for _ in range(self.parts)]) self.feature_dim = reduced_dim self.classifier = nn.ModuleList( [nn.Linear(self.feature_dim, num_classes, bias=False) for _ in range(self.parts)]) self._init_params() def _construct_em_layer(self, fc_dims, input_dim, dropout_p=0.5): # TODO new """ Construct fully connected layer - fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed - input_dim (int): input dimension - dropout_p (float): dropout probability, if None, dropout is unused """ if fc_dims is None: self.feature_dim = input_dim return None layers = [] # layers.append(nn.Linear(input_dim, fc_dims)) layers.append(nn.Conv2d(input_dim, fc_dims, 1, stride=1, padding=0)) layers.append(nn.BatchNorm2d(fc_dims)) layers.append(nn.ReLU(inplace=True)) # layers.append(nn.Dropout(p=dropout_p)) # self.feature_dim = fc_dims[-1] return nn.Sequential(*layers) def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.001) # TODO Learning rate of pre-trained layers: 0.1 x base learning rate ? if m.bias is not None: nn.init.constant_(m.bias, 0) def featuremaps(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) return x def forward(self, x): f = self.featuremaps(x) # vis_featmat_Kmeans(f) # TODO # vis_featmat_DBSCAN(f) # TODO v_g = self.parts_avgpool(f) # nn.AdaptiveAvgPool2d((self.parts, 1)) if not self.training: v_g = F.normalize(v_g, p=2, dim=1) return v_g.view(v_g.size(0), -1) # v_g = self.dropout(v_g) # v_h = self.conv5(v_g) y = [] v = [] # v_g.shape = [n, 2048, 6, 1] ? for i in range(self.parts): # TODO new v_g_i = v_g[:, :, i, :].view(v_g.size(0), -1, 1, 1) v_g_i = self.em[i](v_g_i) # fully connected layer, Conv2d-BatchNorm2d-ReLU v_h_i = v_g_i.view(v_g_i.size(0), -1) y_i = self.classifier[i](v_h_i) y.append(y_i) v.append(v_g_i) if self.loss == 'softmax': return y elif self.loss == 'triplet': v_g = F.normalize(v_g, p=2, dim=1) # TODO return y, v_g.view(v_g.size(0), -1) else: raise KeyError('Unsupported loss: {}'.format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()} model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def pcb_p6(num_classes, loss='softmax', pretrained=True, **kwargs): model = PCB( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=1, parts=6, reduced_dim=256, nonlinear='relu', **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model def pcb_p4(num_classes, loss='softmax', pretrained=True, **kwargs): model = PCB( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=1, parts=4, reduced_dim=256, nonlinear='relu', **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model class Conv1x1_att(nn.Module): """1x1 convolution + bn + relu.""" def __init__(self, in_channels, out_channels, stride=1, groups=1): super(Conv1x1_att, self).__init__() self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False, groups=groups) self.bn = nn.BatchNorm2d(out_channels) self.activation = nn.Sigmoid() def forward(self, x): x = self.conv(x) x = self.bn(x) x = self.activation(x) return x class score_embedding(nn.Module): """1x1 convolution + bn + relu.""" def __init__(self, in_channels, out_channels): super(score_embedding, self).__init__() self.pool = nn.AdaptiveAvgPool2d((1, 1)) self.reg = nn.Linear(in_channels, out_channels, bias=False) self.bn = nn.BatchNorm1d(out_channels) self.activation = nn.Sigmoid() def forward(self, x): x = self.pool(x) x = x.view(x.size(0), -1) x = self.reg(x) x = self.bn(x) x = self.activation(x) return x class Pose_Subnet(nn.Module): # TODO ''' PVP and PGA ''' def __init__(self, blocks, in_channels, channels, att_num=1, IN=False, matching_score_reg=False): super(Pose_Subnet, self).__init__() num_blocks = len(blocks) self.conv1 = ConvLayer(in_channels, channels[0], 7, stride=1, padding=3, IN=IN) self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) self.conv2 = self._make_layer(blocks[0], 1, channels[0], channels[1], reduce_spatial_size=True) self.conv3 = self._make_layer(blocks[1], 1, channels[1], channels[2], reduce_spatial_size=False) self.conv4 = Conv3x3(channels[2], channels[2]) # PGA self.conv_out = Conv1x1_att(channels[2], att_num) # PVP self.matching_score_reg = matching_score_reg if self.matching_score_reg: self.conv_score = score_embedding(channels[2], att_num) self._init_params() def _make_layer(self, block, layer, in_channels, out_channels, reduce_spatial_size, IN=False): layers = [] layers.append(block(in_channels, out_channels, IN=IN, gate_reduction=4)) for i in range(1, layer): layers.append(block(out_channels, out_channels, IN=IN, gate_reduction=4)) if reduce_spatial_size: layers.append( nn.Sequential( Conv1x1(out_channels, out_channels), nn.AvgPool2d(2, stride=2) ) ) return nn.Sequential(*layers) def forward(self, x): x = self.conv1(x) x = self.maxpool(x) x = self.conv2(x) x = self.conv3(x) x_ = self.conv4(x) x = self.conv_out(x_) _, max_index = x.max(dim=1, keepdim=True) onehot_index = torch.zeros_like(x).scatter_(1, max_index, 1) if self.matching_score_reg: score = self.conv_score(x_) return x, score, onehot_index else: return x, onehot_index def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) class pose_guide_att_Resnet(PCB): def __init__(self, num_classes, loss, block, layers, last_stride=2, parts=4, reduced_dim=None, nonlinear='relu', pose_inchannel=56, part_score_reg=False, **kwargs): super(pose_guide_att_Resnet, self).__init__(num_classes, loss, block, layers, last_stride=last_stride, parts=parts, reduced_dim=reduced_dim, nonlinear=nonlinear, **kwargs) self.part_score_reg = part_score_reg self.pose_subnet = Pose_Subnet(blocks=[OSBlock, OSBlock], in_channels=pose_inchannel, channels=[32, 32, 32], att_num=parts, matching_score_reg=part_score_reg) self.pose_pool = nn.AdaptiveAvgPool2d((1, 1)) self.parts_avgpool = nn.ModuleList([nn.AdaptiveAvgPool2d((1, 1)) for _ in range(self.parts)]) # TODO why not use the same????? def forward(self, x, pose_map): f = self.featuremaps(x) if self.part_score_reg: pose_att, part_score, onehot_index = self.pose_subnet(pose_map) # TODO else: pose_att, onehot_index = self.pose_subnet(pose_map) pose_att = pose_att * onehot_index pose_att_pool = self.pose_pool(pose_att) # AdaptiveAvgPool2d -> (1,1) v_g = [] for i in range(self.parts): v_g_i = f * pose_att[:, i, :, :].unsqueeze(1) / (pose_att_pool[:, i, :, :].unsqueeze(1) + 1e-6) # TODO why divide by average? v_g_i = self.parts_avgpool[i](v_g_i) v_g.append(v_g_i) if not self.training: v_g = torch.cat(v_g, dim=2) v_g = F.normalize(v_g, p=2, dim=1) # TODO apply normalize myself? if self.part_score_reg: return v_g.squeeze(), part_score else: return v_g.view(v_g.size(0), -1) y = [] v = [] for i in range(self.parts): # add final fc layer v_g_i = self.em[i](v_g[i]) v_h_i = v_g_i.view(v_g_i.size(0), -1) y_i = self.classifier[i](v_h_i) y.append(y_i) v.append(v_g_i) if self.loss == 'softmax': if self.training: if self.part_score_reg: return y, pose_att, part_score, v_g else: return y, pose_att else: return y elif self.loss == 'triplet': return y, v else: raise KeyError("Unsupported loss: {}".format(self.loss)) def pose_resnet50_256_p4(num_classes, loss='softmax', pretrained=True, **kwargs): model = pose_guide_att_Resnet( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], parts=4, reduced_dim=256, nonlinear='relu', **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model def pose_resnet50_256_p6(num_classes, loss='softmax', pretrained=True, **kwargs): model = pose_guide_att_Resnet( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], parts=6, reduced_dim=256, nonlinear='relu', **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model def pose_resnet50_256_p6_pscore_reg(num_classes, loss='softmax', pretrained=True, **kwargs): model = pose_guide_att_Resnet( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], parts=6, reduced_dim=256, nonlinear='relu', part_score_reg=True, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model def pose_resnet50_256_p4_pscore_reg(num_classes, loss='softmax', pretrained=True, **kwargs): model = pose_guide_att_Resnet( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], parts=4, reduced_dim=256, nonlinear='relu', part_score_reg=True, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model ================================================ FILE: torchreid/models/resnet.py ================================================ """ Code source: https://github.com/pytorch/vision """ from __future__ import division, absolute_import import torch.utils.model_zoo as model_zoo from torch import nn __all__ = [ 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'resnet50_fc512' ] model_urls = { 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth', 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth', } def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): """3x3 convolution with padding""" return nn.Conv2d( in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, groups=groups, bias=False, dilation=dilation ) def conv1x1(in_planes, out_planes, stride=1): """1x1 convolution""" return nn.Conv2d( in_planes, out_planes, kernel_size=1, stride=stride, bias=False ) class BasicBlock(nn.Module): expansion = 1 def __init__( self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None ): super(BasicBlock, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d if groups != 1 or base_width != 64: raise ValueError( 'BasicBlock only supports groups=1 and base_width=64' ) if dilation > 1: raise NotImplementedError( "Dilation > 1 not supported in BasicBlock" ) # Both self.conv1 and self.downsample layers downsample the input when stride != 1 self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = norm_layer(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = norm_layer(planes) self.downsample = downsample self.stride = stride def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: identity = self.downsample(x) out += identity out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__( self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None ): super(Bottleneck, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d width = int(planes * (base_width/64.)) * groups # Both self.conv2 and self.downsample layers downsample the input when stride != 1 self.conv1 = conv1x1(inplanes, width) self.bn1 = norm_layer(width) self.conv2 = conv3x3(width, width, stride, groups, dilation) self.bn2 = norm_layer(width) self.conv3 = conv1x1(width, planes * self.expansion) self.bn3 = norm_layer(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: identity = self.downsample(x) out += identity out = self.relu(out) return out class ResNet(nn.Module): """Residual network. Reference: - He et al. Deep Residual Learning for Image Recognition. CVPR 2016. - Xie et al. Aggregated Residual Transformations for Deep Neural Networks. CVPR 2017. Public keys: - ``resnet18``: ResNet18. - ``resnet34``: ResNet34. - ``resnet50``: ResNet50. - ``resnet101``: ResNet101. - ``resnet152``: ResNet152. - ``resnext50_32x4d``: ResNeXt50. - ``resnext101_32x8d``: ResNeXt101. - ``resnet50_fc512``: ResNet50 + FC. """ def __init__( self, num_classes, loss, block, layers, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None, last_stride=2, fc_dims=None, dropout_p=None, **kwargs ): super(ResNet, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.loss = loss self.feature_dim = 512 * block.expansion self.inplanes = 64 self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError( "replace_stride_with_dilation should be None " "or a 3-element tuple, got {}". format(replace_stride_with_dilation) ) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d( 3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False ) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer( block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0] ) self.layer3 = self._make_layer( block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1] ) self.layer4 = self._make_layer( block, 512, layers[3], stride=last_stride, dilate=replace_stride_with_dilation[2] ) self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = self._construct_fc_layer( fc_dims, 512 * block.expansion, dropout_p ) self.classifier = nn.Linear(self.feature_dim, num_classes) self._init_params() # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0) def _make_layer(self, block, planes, blocks, stride=1, dilate=False): norm_layer = self._norm_layer downsample = None previous_dilation = self.dilation if dilate: self.dilation *= stride stride = 1 if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( conv1x1(self.inplanes, planes * block.expansion, stride), norm_layer(planes * block.expansion), ) layers = [] layers.append( block( self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer ) ) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.append( block( self.inplanes, planes, groups=self.groups, base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer ) ) return nn.Sequential(*layers) def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): """Constructs fully connected layer Args: fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed input_dim (int): input dimension dropout_p (float): dropout probability, if None, dropout is unused """ if fc_dims is None: self.feature_dim = input_dim return None assert isinstance( fc_dims, (list, tuple) ), 'fc_dims must be either list or tuple, but got {}'.format( type(fc_dims) ) layers = [] for dim in fc_dims: layers.append(nn.Linear(input_dim, dim)) layers.append(nn.BatchNorm1d(dim)) layers.append(nn.ReLU(inplace=True)) if dropout_p is not None: layers.append(nn.Dropout(p=dropout_p)) input_dim = dim self.feature_dim = fc_dims[-1] return nn.Sequential(*layers) def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) def featuremaps(self, x): # x torch.Size([1, 3, 256, 128]) x = self.conv1(x) # torch.Size([1, 64, 128, 64]) x = self.bn1(x) # torch.Size([1, 64, 128, 64]) x = self.relu(x) # torch.Size([1, 64, 128, 64]) x = self.maxpool(x) # torch.Size([1, 64, 64, 32]) x = self.layer1(x) # torch.Size([1, 256, 64, 32]) - 3 blocks x = self.layer2(x) # torch.Size([1, 512, 32, 16]) - 4 blocks x = self.layer3(x) # torch.Size([1, 1024, 16, 8]) - 6 blocks x = self.layer4(x) # torch.Size([1, 2048, 8, 4]) - 3 blocks return x def forward(self, x): f = self.featuremaps(x) # torch.Size([1, 2048, 8, 4]) if self.loss == 'part_based': return f v = self.global_avgpool(f) # torch.Size([1, 2048, 1, 1]) v = v.view(v.size(0), -1) # torch.Size([1, 2048]) if self.fc is not None: v = self.fc(v) # torch.Size([1, 512]) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError("Unsupported loss: {}".format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) """ResNet""" def resnet18(num_classes, loss='softmax', pretrained=True, **kwargs): model = ResNet( num_classes=num_classes, loss=loss, block=BasicBlock, layers=[2, 2, 2, 2], last_stride=2, fc_dims=None, dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet18']) return model def resnet34(num_classes, loss='softmax', pretrained=True, **kwargs): model = ResNet( num_classes=num_classes, loss=loss, block=BasicBlock, layers=[3, 4, 6, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet34']) return model def resnet50(num_classes, loss='softmax', pretrained=True, **kwargs): model = ResNet( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], fc_dims=None, dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model def resnet101(num_classes, loss='softmax', pretrained=True, **kwargs): model = ResNet( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 23, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet101']) return model def resnet152(num_classes, loss='softmax', pretrained=True, **kwargs): model = ResNet( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 8, 36, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet152']) return model """ResNeXt""" def resnext50_32x4d(num_classes, loss='softmax', pretrained=True, **kwargs): model = ResNet( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=2, fc_dims=None, dropout_p=None, groups=32, width_per_group=4, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnext50_32x4d']) return model def resnext101_32x8d(num_classes, loss='softmax', pretrained=True, **kwargs): model = ResNet( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 23, 3], last_stride=2, fc_dims=None, dropout_p=None, groups=32, width_per_group=8, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnext101_32x8d']) return model """ ResNet + FC """ def resnet50_fc512(num_classes, loss='softmax', pretrained=True, **kwargs): model = ResNet( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=1, fc_dims=[512], dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model ================================================ FILE: torchreid/models/resnet_fastreid.py ================================================ # encoding: utf-8 """ @author: liaoxingyu @contact: sherlockliao01@gmail.com """ import math import torch from torch import nn # from fastreid.layers import ( # IBN, # SELayer, # Non_local, # get_norm, # ) # from fastreid.utils.checkpoint import get_missing_parameters_message, get_unexpected_parameters_message # from .build import BACKBONE_REGISTRY # from fastreid.utils import comm model_urls = { '18x': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', '34x': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', '50x': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', '101x': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 'ibn_18x': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet18_ibn_a-2f571257.pth', 'ibn_34x': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet34_ibn_a-94bc1577.pth', 'ibn_50x': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet50_ibn_a-d9d0bb7b.pth', 'ibn_101x': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/resnet101_ibn_a-59ea0ac6.pth', 'se_ibn_101x': 'https://github.com/XingangPan/IBN-Net/releases/download/v1.0/se_resnet101_ibn_a-fabed4e2.pth', } def get_norm(norm, out_channels, **kwargs): """ Args: norm (str or callable): either one of BN, GhostBN, FrozenBN, GN or SyncBN; or a callable that takes a channel number and returns the normalization layer as a nn.Module out_channels: number of channels for normalization layer Returns: nn.Module or None: the normalization layer """ if isinstance(norm, str): if len(norm) == 0: return None norm = { "BN": BatchNorm, # "syncBN": SyncBatchNorm, # "GhostBN": GhostBatchNorm, # "FrozenBN": FrozenBatchNorm, "GN": lambda channels, **args: nn.GroupNorm(32, channels), }[norm] return norm(out_channels, **kwargs) class Non_local(nn.Module): def __init__(self, in_channels, bn_norm, reduc_ratio=2): super(Non_local, self).__init__() self.in_channels = in_channels self.inter_channels = reduc_ratio // reduc_ratio self.g = nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0) self.W = nn.Sequential( nn.Conv2d(in_channels=self.inter_channels, out_channels=self.in_channels, kernel_size=1, stride=1, padding=0), get_norm(bn_norm, self.in_channels), ) nn.init.constant_(self.W[1].weight, 0.0) nn.init.constant_(self.W[1].bias, 0.0) self.theta = nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0) self.phi = nn.Conv2d(in_channels=self.in_channels, out_channels=self.inter_channels, kernel_size=1, stride=1, padding=0) def forward(self, x): """ :param x: (b, t, h, w) :return x: (b, t, h, w) """ batch_size = x.size(0) g_x = self.g(x).view(batch_size, self.inter_channels, -1) g_x = g_x.permute(0, 2, 1) theta_x = self.theta(x).view(batch_size, self.inter_channels, -1) theta_x = theta_x.permute(0, 2, 1) phi_x = self.phi(x).view(batch_size, self.inter_channels, -1) f = torch.matmul(theta_x, phi_x) N = f.size(-1) f_div_C = f / N y = torch.matmul(f_div_C, g_x) y = y.permute(0, 2, 1).contiguous() y = y.view(batch_size, self.inter_channels, *x.size()[2:]) W_y = self.W(y) z = W_y + x return z class IBN(nn.Module): def __init__(self, planes, bn_norm, **kwargs): super(IBN, self).__init__() half1 = int(planes / 2) self.half = half1 half2 = planes - half1 self.IN = nn.InstanceNorm2d(half1, affine=True) self.BN = get_norm(bn_norm, half2, **kwargs) def forward(self, x): split = torch.split(x, self.half, 1) out1 = self.IN(split[0].contiguous()) out2 = self.BN(split[1].contiguous()) out = torch.cat((out1, out2), 1) return out class BatchNorm(nn.BatchNorm2d): def __init__(self, num_features, eps=1e-05, momentum=0.1, weight_freeze=False, bias_freeze=False, weight_init=1.0, bias_init=0.0, **kwargs): super().__init__(num_features, eps=eps, momentum=momentum) if weight_init is not None: nn.init.constant_(self.weight, weight_init) if bias_init is not None: nn.init.constant_(self.bias, bias_init) self.weight.requires_grad_(not weight_freeze) self.bias.requires_grad_(not bias_freeze) class SELayer(nn.Module): def __init__(self, channel, reduction=16): super(SELayer, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Sequential( nn.Linear(channel, int(channel / reduction), bias=False), nn.ReLU(inplace=True), nn.Linear(int(channel / reduction), channel, bias=False), nn.Sigmoid() ) def forward(self, x): b, c, _, _ = x.size() y = self.avg_pool(x).view(b, c) y = self.fc(y).view(b, c, 1, 1) return x * y.expand_as(x) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, bn_norm, with_ibn=False, with_se=False, stride=1, downsample=None, reduction=16): super(BasicBlock, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False) if with_ibn: self.bn1 = IBN(planes, bn_norm) else: self.bn1 = get_norm(bn_norm, planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = get_norm(bn_norm, planes) self.relu = nn.ReLU(inplace=True) if with_se: self.se = SELayer(planes, reduction) else: self.se = nn.Identity() self.downsample = downsample self.stride = stride def forward(self, x): identity = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.se(out) if self.downsample is not None: identity = self.downsample(x) out += identity out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, bn_norm, with_ibn=False, with_se=False, stride=1, downsample=None, reduction=16): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) if with_ibn: self.bn1 = IBN(planes, bn_norm) else: self.bn1 = get_norm(bn_norm, planes) self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = get_norm(bn_norm, planes) self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = get_norm(bn_norm, planes * self.expansion) self.relu = nn.ReLU(inplace=True) if with_se: self.se = SELayer(planes * self.expansion, reduction) else: self.se = nn.Identity() self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) out = self.se(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, last_stride, bn_norm, with_ibn, with_se, with_nl, block, layers, non_layers): self.inplanes = 64 super().__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = get_norm(bn_norm, 64) self.relu = nn.ReLU(inplace=True) # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) self.layer1 = self._make_layer(block, 64, layers[0], 1, bn_norm, with_ibn, with_se) self.layer2 = self._make_layer(block, 128, layers[1], 2, bn_norm, with_ibn, with_se) self.layer3 = self._make_layer(block, 256, layers[2], 2, bn_norm, with_ibn, with_se) self.layer4 = self._make_layer(block, 512, layers[3], last_stride, bn_norm, with_se=with_se) self.random_init() # fmt: off if with_nl: self._build_nonlocal(layers, non_layers, bn_norm) else: self.NL_1_idx = self.NL_2_idx = self.NL_3_idx = self.NL_4_idx = [] # fmt: on self.feature_dim = 2048 def _make_layer(self, block, planes, blocks, stride=1, bn_norm="BN", with_ibn=False, with_se=False): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), get_norm(bn_norm, planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, bn_norm, with_ibn, with_se, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes, bn_norm, with_ibn, with_se)) return nn.Sequential(*layers) def _build_nonlocal(self, layers, non_layers, bn_norm): self.NL_1 = nn.ModuleList( [Non_local(256, bn_norm) for _ in range(non_layers[0])]) self.NL_1_idx = sorted([layers[0] - (i + 1) for i in range(non_layers[0])]) self.NL_2 = nn.ModuleList( [Non_local(512, bn_norm) for _ in range(non_layers[1])]) self.NL_2_idx = sorted([layers[1] - (i + 1) for i in range(non_layers[1])]) self.NL_3 = nn.ModuleList( [Non_local(1024, bn_norm) for _ in range(non_layers[2])]) self.NL_3_idx = sorted([layers[2] - (i + 1) for i in range(non_layers[2])]) self.NL_4 = nn.ModuleList( [Non_local(2048, bn_norm) for _ in range(non_layers[3])]) self.NL_4_idx = sorted([layers[3] - (i + 1) for i in range(non_layers[3])]) def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) # layer 1 NL1_counter = 0 if len(self.NL_1_idx) == 0: self.NL_1_idx = [-1] for i in range(len(self.layer1)): x = self.layer1[i](x) if i == self.NL_1_idx[NL1_counter]: _, C, H, W = x.shape x = self.NL_1[NL1_counter](x) NL1_counter += 1 # layer 2 NL2_counter = 0 if len(self.NL_2_idx) == 0: self.NL_2_idx = [-1] for i in range(len(self.layer2)): x = self.layer2[i](x) if i == self.NL_2_idx[NL2_counter]: _, C, H, W = x.shape x = self.NL_2[NL2_counter](x) NL2_counter += 1 # layer 3 NL3_counter = 0 if len(self.NL_3_idx) == 0: self.NL_3_idx = [-1] for i in range(len(self.layer3)): x = self.layer3[i](x) if i == self.NL_3_idx[NL3_counter]: _, C, H, W = x.shape x = self.NL_3[NL3_counter](x) NL3_counter += 1 # layer 4 NL4_counter = 0 if len(self.NL_4_idx) == 0: self.NL_4_idx = [-1] for i in range(len(self.layer4)): x = self.layer4[i](x) if i == self.NL_4_idx[NL4_counter]: _, C, H, W = x.shape x = self.NL_4[NL4_counter](x) NL4_counter += 1 return x def random_init(self): for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels nn.init.normal_(m.weight, 0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def init_pretrained_weights(key): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ import os import errno import gdown def _get_torch_home(): ENV_TORCH_HOME = 'TORCH_HOME' ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' DEFAULT_CACHE_DIR = '~/.cache' torch_home = os.path.expanduser( os.getenv( ENV_TORCH_HOME, os.path.join( os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch' ) ) ) return torch_home torch_home = _get_torch_home() model_dir = os.path.join(torch_home, 'checkpoints') try: os.makedirs(model_dir) except OSError as e: if e.errno == errno.EEXIST: # Directory already exists, ignore. pass else: # Unexpected OSError, re-raise. raise filename = model_urls[key].split('/')[-1] cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): print(f"Pretrain model don't exist, downloading from {model_urls[key]}") gdown.download(model_urls[key], cached_file, quiet=False) print(f"Loading pretrained model from {cached_file}") state_dict = torch.load(cached_file, map_location=torch.device('cpu')) return state_dict def fastreid_resnet(pretrained=True, **kwargs): return build_resnet_backbone(pretrained) def fastreid_resnet_ibn(pretrained=True, **kwargs): return build_resnet_backbone(pretrained, with_ibn=True) def fastreid_resnet_nl(pretrained=True, **kwargs): return build_resnet_backbone(pretrained, with_nl=True) def fastreid_resnet_ibn_nl(pretrained=True, **kwargs): return build_resnet_backbone(pretrained, with_ibn=True, with_nl=True) def build_resnet_backbone(pretrained=True, with_ibn=False, with_nl=False, last_stride=1, **kwargs): """ Create a ResNet instance from config. Returns: ResNet: a :class:`ResNet` instance. """ # fmt: off pretrain = pretrained pretrain_path = False bn_norm = "BN" # with_ibn = cfg.MODEL.BACKBONE.WITH_IBN with_se = False # with_nl = cfg.MODEL.BACKBONE.WITH_NL depth = "50x" # fmt: on num_blocks_per_stage = { '18x': [2, 2, 2, 2], '34x': [3, 4, 6, 3], '50x': [3, 4, 6, 3], '101x': [3, 4, 23, 3], }[depth] nl_layers_per_stage = { '18x': [0, 0, 0, 0], '34x': [0, 0, 0, 0], '50x': [0, 2, 3, 0], '101x': [0, 2, 9, 0] }[depth] block = { '18x': BasicBlock, '34x': BasicBlock, '50x': Bottleneck, '101x': Bottleneck }[depth] model = ResNet(last_stride, bn_norm, with_ibn, with_se, with_nl, block, num_blocks_per_stage, nl_layers_per_stage) if pretrain: # Load pretrain path if specifically if pretrain_path: try: state_dict = torch.load(pretrain_path, map_location=torch.device('cpu')) print(f"Loading pretrained model from {pretrain_path}") except FileNotFoundError as e: print(f'{pretrain_path} is not found! Please check this path.') raise e except KeyError as e: print("State dict keys error! Please check the state dict.") raise e else: key = depth if with_ibn: key = 'ibn_' + key if with_se: key = 'se_' + key state_dict = init_pretrained_weights(key) incompatible = model.load_state_dict(state_dict, strict=False) if incompatible.missing_keys: print( "incompatible.missing_keys".format(incompatible.missing_keys) ) if incompatible.unexpected_keys: print( "incompatible.unexpected_keys".format(incompatible.unexpected_keys) ) return model ================================================ FILE: torchreid/models/resnet_ibn_a.py ================================================ """ Credit to https://github.com/XingangPan/IBN-Net. """ from __future__ import division, absolute_import import math import torch import torch.nn as nn import torch.utils.model_zoo as model_zoo __all__ = ['resnet50_ibn_a'] model_urls = { 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', } def conv3x3(in_planes, out_planes, stride=1): "3x3 convolution with padding" return nn.Conv2d( in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False ) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class IBN(nn.Module): def __init__(self, planes): super(IBN, self).__init__() half1 = int(planes / 2) self.half = half1 half2 = planes - half1 self.IN = nn.InstanceNorm2d(half1, affine=True) self.BN = nn.BatchNorm2d(half2) def forward(self, x): split = torch.split(x, self.half, 1) out1 = self.IN(split[0].contiguous()) out2 = self.BN(split[1].contiguous()) out = torch.cat((out1, out2), 1) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, ibn=False, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) if ibn: self.bn1 = IBN(planes) else: self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d( planes, planes, kernel_size=3, stride=stride, padding=1, bias=False ) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d( planes, planes * self.expansion, kernel_size=1, bias=False ) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class ResNet(nn.Module): """Residual network + IBN layer. Reference: - He et al. Deep Residual Learning for Image Recognition. CVPR 2016. - Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV 2018. """ def __init__( self, block, layers, num_classes=1000, loss='softmax', fc_dims=None, dropout_p=None, **kwargs ): scale = 64 self.inplanes = scale super(ResNet, self).__init__() self.loss = loss self.feature_dim = scale * 8 * block.expansion self.conv1 = nn.Conv2d( 3, scale, kernel_size=7, stride=2, padding=3, bias=False ) self.bn1 = nn.BatchNorm2d(scale) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, scale, layers[0]) self.layer2 = self._make_layer(block, scale * 2, layers[1], stride=2) self.layer3 = self._make_layer(block, scale * 4, layers[2], stride=2) self.layer4 = self._make_layer(block, scale * 8, layers[3], stride=2) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = self._construct_fc_layer( fc_dims, scale * 8 * block.expansion, dropout_p ) self.classifier = nn.Linear(self.feature_dim, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.InstanceNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d( self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False ), nn.BatchNorm2d(planes * block.expansion), ) layers = [] ibn = True if planes == 512: ibn = False layers.append(block(self.inplanes, planes, ibn, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes, ibn)) return nn.Sequential(*layers) def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): """Constructs fully connected layer Args: fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed input_dim (int): input dimension dropout_p (float): dropout probability, if None, dropout is unused """ if fc_dims is None: self.feature_dim = input_dim return None assert isinstance( fc_dims, (list, tuple) ), 'fc_dims must be either list or tuple, but got {}'.format( type(fc_dims) ) layers = [] for dim in fc_dims: layers.append(nn.Linear(input_dim, dim)) layers.append(nn.BatchNorm1d(dim)) layers.append(nn.ReLU(inplace=True)) if dropout_p is not None: layers.append(nn.Dropout(p=dropout_p)) input_dim = dim self.feature_dim = fc_dims[-1] return nn.Sequential(*layers) def featuremaps(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) return x def forward(self, x): f = self.featuremaps(x) v = self.avgpool(f) v = v.view(v.size(0), -1) if self.fc is not None: v = self.fc(v) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError("Unsupported loss: {}".format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def resnet50_ibn_a(num_classes, loss='softmax', pretrained=False, **kwargs): model = ResNet( Bottleneck, [3, 4, 6, 3], num_classes=num_classes, loss=loss, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model ================================================ FILE: torchreid/models/resnet_ibn_b.py ================================================ """ Credit to https://github.com/XingangPan/IBN-Net. """ from __future__ import division, absolute_import import math import torch.nn as nn import torch.utils.model_zoo as model_zoo __all__ = ['resnet50_ibn_b'] model_urls = { 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', } def conv3x3(in_planes, out_planes, stride=1): "3x3 convolution with padding" return nn.Conv2d( in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False ) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None, IN=False): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d( planes, planes, kernel_size=3, stride=stride, padding=1, bias=False ) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d( planes, planes * self.expansion, kernel_size=1, bias=False ) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.IN = None if IN: self.IN = nn.InstanceNorm2d(planes * 4, affine=True) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual if self.IN is not None: out = self.IN(out) out = self.relu(out) return out class ResNet(nn.Module): """Residual network + IBN layer. Reference: - He et al. Deep Residual Learning for Image Recognition. CVPR 2016. - Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV 2018. """ def __init__( self, block, layers, num_classes=1000, loss='softmax', fc_dims=None, dropout_p=None, **kwargs ): scale = 64 self.inplanes = scale super(ResNet, self).__init__() self.loss = loss self.feature_dim = scale * 8 * block.expansion self.conv1 = nn.Conv2d( 3, scale, kernel_size=7, stride=2, padding=3, bias=False ) self.bn1 = nn.InstanceNorm2d(scale, affine=True) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer( block, scale, layers[0], stride=1, IN=True ) self.layer2 = self._make_layer( block, scale * 2, layers[1], stride=2, IN=True ) self.layer3 = self._make_layer(block, scale * 4, layers[2], stride=2) self.layer4 = self._make_layer(block, scale * 8, layers[3], stride=2) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = self._construct_fc_layer( fc_dims, scale * 8 * block.expansion, dropout_p ) self.classifier = nn.Linear(self.feature_dim, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.InstanceNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() def _make_layer(self, block, planes, blocks, stride=1, IN=False): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d( self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False ), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks - 1): layers.append(block(self.inplanes, planes)) layers.append(block(self.inplanes, planes, IN=IN)) return nn.Sequential(*layers) def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): """Constructs fully connected layer Args: fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed input_dim (int): input dimension dropout_p (float): dropout probability, if None, dropout is unused """ if fc_dims is None: self.feature_dim = input_dim return None assert isinstance( fc_dims, (list, tuple) ), 'fc_dims must be either list or tuple, but got {}'.format( type(fc_dims) ) layers = [] for dim in fc_dims: layers.append(nn.Linear(input_dim, dim)) layers.append(nn.BatchNorm1d(dim)) layers.append(nn.ReLU(inplace=True)) if dropout_p is not None: layers.append(nn.Dropout(p=dropout_p)) input_dim = dim self.feature_dim = fc_dims[-1] return nn.Sequential(*layers) def featuremaps(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) return x def forward(self, x): f = self.featuremaps(x) v = self.avgpool(f) v = v.view(v.size(0), -1) if self.fc is not None: v = self.fc(v) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError("Unsupported loss: {}".format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def resnet50_ibn_b(num_classes, loss='softmax', pretrained=False, **kwargs): model = ResNet( Bottleneck, [3, 4, 6, 3], num_classes=num_classes, loss=loss, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model ================================================ FILE: torchreid/models/resnetmid.py ================================================ from __future__ import division, absolute_import import torch import torch.utils.model_zoo as model_zoo from torch import nn __all__ = ['resnet50mid'] model_urls = { 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', } def conv3x3(in_planes, out_planes, stride=1): """3x3 convolution with padding""" return nn.Conv2d( in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False ) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d( planes, planes, kernel_size=3, stride=stride, padding=1, bias=False ) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d( planes, planes * self.expansion, kernel_size=1, bias=False ) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class ResNetMid(nn.Module): """Residual network + mid-level features. Reference: Yu et al. The Devil is in the Middle: Exploiting Mid-level Representations for Cross-Domain Instance Matching. arXiv:1711.08106. Public keys: - ``resnet50mid``: ResNet50 + mid-level feature fusion. """ def __init__( self, num_classes, loss, block, layers, last_stride=2, fc_dims=None, **kwargs ): self.inplanes = 64 super(ResNetMid, self).__init__() self.loss = loss self.feature_dim = 512 * block.expansion # backbone network self.conv1 = nn.Conv2d( 3, 64, kernel_size=7, stride=2, padding=3, bias=False ) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer( block, 512, layers[3], stride=last_stride ) self.global_avgpool = nn.AdaptiveAvgPool2d(1) assert fc_dims is not None self.fc_fusion = self._construct_fc_layer( fc_dims, 512 * block.expansion * 2 ) self.feature_dim += 512 * block.expansion self.classifier = nn.Linear(self.feature_dim, num_classes) self._init_params() def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d( self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False ), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): """Constructs fully connected layer Args: fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed input_dim (int): input dimension dropout_p (float): dropout probability, if None, dropout is unused """ if fc_dims is None: self.feature_dim = input_dim return None assert isinstance( fc_dims, (list, tuple) ), 'fc_dims must be either list or tuple, but got {}'.format( type(fc_dims) ) layers = [] for dim in fc_dims: layers.append(nn.Linear(input_dim, dim)) layers.append(nn.BatchNorm1d(dim)) layers.append(nn.ReLU(inplace=True)) if dropout_p is not None: layers.append(nn.Dropout(p=dropout_p)) input_dim = dim self.feature_dim = fc_dims[-1] return nn.Sequential(*layers) def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) def featuremaps(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x4a = self.layer4[0](x) x4b = self.layer4[1](x4a) x4c = self.layer4[2](x4b) return x4a, x4b, x4c def forward(self, x): x4a, x4b, x4c = self.featuremaps(x) v4a = self.global_avgpool(x4a) v4b = self.global_avgpool(x4b) v4c = self.global_avgpool(x4c) v4ab = torch.cat([v4a, v4b], 1) v4ab = v4ab.view(v4ab.size(0), -1) v4ab = self.fc_fusion(v4ab) v4c = v4c.view(v4c.size(0), -1) v = torch.cat([v4ab, v4c], 1) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError('Unsupported loss: {}'.format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) """ Residual network configurations: -- resnet18: block=BasicBlock, layers=[2, 2, 2, 2] resnet34: block=BasicBlock, layers=[3, 4, 6, 3] resnet50: block=Bottleneck, layers=[3, 4, 6, 3] resnet101: block=Bottleneck, layers=[3, 4, 23, 3] resnet152: block=Bottleneck, layers=[3, 8, 36, 3] """ def resnet50mid(num_classes, loss='softmax', pretrained=True, **kwargs): model = ResNetMid( num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=2, fc_dims=[1024], **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['resnet50']) return model ================================================ FILE: torchreid/models/senet.py ================================================ from __future__ import division, absolute_import import math from collections import OrderedDict import torch.nn as nn from torch.utils import model_zoo __all__ = [ 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'se_resnet50_fc512' ] """ Code imported from https://github.com/Cadene/pretrained-models.pytorch """ pretrained_settings = { 'senet154': { 'imagenet': { 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth', 'input_space': 'RGB', 'input_size': [3, 224, 224], 'input_range': [0, 1], 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'num_classes': 1000 } }, 'se_resnet50': { 'imagenet': { 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth', 'input_space': 'RGB', 'input_size': [3, 224, 224], 'input_range': [0, 1], 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'num_classes': 1000 } }, 'se_resnet101': { 'imagenet': { 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth', 'input_space': 'RGB', 'input_size': [3, 224, 224], 'input_range': [0, 1], 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'num_classes': 1000 } }, 'se_resnet152': { 'imagenet': { 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth', 'input_space': 'RGB', 'input_size': [3, 224, 224], 'input_range': [0, 1], 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'num_classes': 1000 } }, 'se_resnext50_32x4d': { 'imagenet': { 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth', 'input_space': 'RGB', 'input_size': [3, 224, 224], 'input_range': [0, 1], 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'num_classes': 1000 } }, 'se_resnext101_32x4d': { 'imagenet': { 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth', 'input_space': 'RGB', 'input_size': [3, 224, 224], 'input_range': [0, 1], 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'num_classes': 1000 } }, } class SEModule(nn.Module): def __init__(self, channels, reduction): super(SEModule, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc1 = nn.Conv2d( channels, channels // reduction, kernel_size=1, padding=0 ) self.relu = nn.ReLU(inplace=True) self.fc2 = nn.Conv2d( channels // reduction, channels, kernel_size=1, padding=0 ) self.sigmoid = nn.Sigmoid() def forward(self, x): module_input = x x = self.avg_pool(x) x = self.fc1(x) x = self.relu(x) x = self.fc2(x) x = self.sigmoid(x) return module_input * x class Bottleneck(nn.Module): """ Base class for bottlenecks that implements `forward()` method. """ def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out = self.se_module(out) + residual out = self.relu(out) return out class SEBottleneck(Bottleneck): """ Bottleneck for SENet154. """ expansion = 4 def __init__( self, inplanes, planes, groups, reduction, stride=1, downsample=None ): super(SEBottleneck, self).__init__() self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes * 2) self.conv2 = nn.Conv2d( planes * 2, planes * 4, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False ) self.bn2 = nn.BatchNorm2d(planes * 4) self.conv3 = nn.Conv2d( planes * 4, planes * 4, kernel_size=1, bias=False ) self.bn3 = nn.BatchNorm2d(planes * 4) self.relu = nn.ReLU(inplace=True) self.se_module = SEModule(planes * 4, reduction=reduction) self.downsample = downsample self.stride = stride class SEResNetBottleneck(Bottleneck): """ ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe implementation and uses `stride=stride` in `conv1` and not in `conv2` (the latter is used in the torchvision implementation of ResNet). """ expansion = 4 def __init__( self, inplanes, planes, groups, reduction, stride=1, downsample=None ): super(SEResNetBottleneck, self).__init__() self.conv1 = nn.Conv2d( inplanes, planes, kernel_size=1, bias=False, stride=stride ) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d( planes, planes, kernel_size=3, padding=1, groups=groups, bias=False ) self.bn2 = nn.BatchNorm2d(planes) self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * 4) self.relu = nn.ReLU(inplace=True) self.se_module = SEModule(planes * 4, reduction=reduction) self.downsample = downsample self.stride = stride class SEResNeXtBottleneck(Bottleneck): """ResNeXt bottleneck type C with a Squeeze-and-Excitation module""" expansion = 4 def __init__( self, inplanes, planes, groups, reduction, stride=1, downsample=None, base_width=4 ): super(SEResNeXtBottleneck, self).__init__() width = int(math.floor(planes * (base_width/64.)) * groups) self.conv1 = nn.Conv2d( inplanes, width, kernel_size=1, bias=False, stride=1 ) self.bn1 = nn.BatchNorm2d(width) self.conv2 = nn.Conv2d( width, width, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False ) self.bn2 = nn.BatchNorm2d(width) self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * 4) self.relu = nn.ReLU(inplace=True) self.se_module = SEModule(planes * 4, reduction=reduction) self.downsample = downsample self.stride = stride class SENet(nn.Module): """Squeeze-and-excitation network. Reference: Hu et al. Squeeze-and-Excitation Networks. CVPR 2018. Public keys: - ``senet154``: SENet154. - ``se_resnet50``: ResNet50 + SE. - ``se_resnet101``: ResNet101 + SE. - ``se_resnet152``: ResNet152 + SE. - ``se_resnext50_32x4d``: ResNeXt50 (groups=32, width=4) + SE. - ``se_resnext101_32x4d``: ResNeXt101 (groups=32, width=4) + SE. - ``se_resnet50_fc512``: (ResNet50 + SE) + FC. """ def __init__( self, num_classes, loss, block, layers, groups, reduction, dropout_p=0.2, inplanes=128, input_3x3=True, downsample_kernel_size=3, downsample_padding=1, last_stride=2, fc_dims=None, **kwargs ): """ Parameters ---------- block (nn.Module): Bottleneck class. - For SENet154: SEBottleneck - For SE-ResNet models: SEResNetBottleneck - For SE-ResNeXt models: SEResNeXtBottleneck layers (list of ints): Number of residual blocks for 4 layers of the network (layer1...layer4). groups (int): Number of groups for the 3x3 convolution in each bottleneck block. - For SENet154: 64 - For SE-ResNet models: 1 - For SE-ResNeXt models: 32 reduction (int): Reduction ratio for Squeeze-and-Excitation modules. - For all models: 16 dropout_p (float or None): Drop probability for the Dropout layer. If `None` the Dropout layer is not used. - For SENet154: 0.2 - For SE-ResNet models: None - For SE-ResNeXt models: None inplanes (int): Number of input channels for layer1. - For SENet154: 128 - For SE-ResNet models: 64 - For SE-ResNeXt models: 64 input_3x3 (bool): If `True`, use three 3x3 convolutions instead of a single 7x7 convolution in layer0. - For SENet154: True - For SE-ResNet models: False - For SE-ResNeXt models: False downsample_kernel_size (int): Kernel size for downsampling convolutions in layer2, layer3 and layer4. - For SENet154: 3 - For SE-ResNet models: 1 - For SE-ResNeXt models: 1 downsample_padding (int): Padding for downsampling convolutions in layer2, layer3 and layer4. - For SENet154: 1 - For SE-ResNet models: 0 - For SE-ResNeXt models: 0 num_classes (int): Number of outputs in `classifier` layer. """ super(SENet, self).__init__() self.inplanes = inplanes self.loss = loss if input_3x3: layer0_modules = [ ( 'conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1, bias=False) ), ('bn1', nn.BatchNorm2d(64)), ('relu1', nn.ReLU(inplace=True)), ( 'conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False) ), ('bn2', nn.BatchNorm2d(64)), ('relu2', nn.ReLU(inplace=True)), ( 'conv3', nn.Conv2d( 64, inplanes, 3, stride=1, padding=1, bias=False ) ), ('bn3', nn.BatchNorm2d(inplanes)), ('relu3', nn.ReLU(inplace=True)), ] else: layer0_modules = [ ( 'conv1', nn.Conv2d( 3, inplanes, kernel_size=7, stride=2, padding=3, bias=False ) ), ('bn1', nn.BatchNorm2d(inplanes)), ('relu1', nn.ReLU(inplace=True)), ] # To preserve compatibility with Caffe weights `ceil_mode=True` # is used instead of `padding=1`. layer0_modules.append( ('pool', nn.MaxPool2d(3, stride=2, ceil_mode=True)) ) self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) self.layer1 = self._make_layer( block, planes=64, blocks=layers[0], groups=groups, reduction=reduction, downsample_kernel_size=1, downsample_padding=0 ) self.layer2 = self._make_layer( block, planes=128, blocks=layers[1], stride=2, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding ) self.layer3 = self._make_layer( block, planes=256, blocks=layers[2], stride=2, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding ) self.layer4 = self._make_layer( block, planes=512, blocks=layers[3], stride=last_stride, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding ) self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.fc = self._construct_fc_layer( fc_dims, 512 * block.expansion, dropout_p ) self.classifier = nn.Linear(self.feature_dim, num_classes) def _make_layer( self, block, planes, blocks, groups, reduction, stride=1, downsample_kernel_size=1, downsample_padding=0 ): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d( self.inplanes, planes * block.expansion, kernel_size=downsample_kernel_size, stride=stride, padding=downsample_padding, bias=False ), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append( block( self.inplanes, planes, groups, reduction, stride, downsample ) ) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes, groups, reduction)) return nn.Sequential(*layers) def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): """ Construct fully connected layer - fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed - input_dim (int): input dimension - dropout_p (float): dropout probability, if None, dropout is unused """ if fc_dims is None: self.feature_dim = input_dim return None assert isinstance( fc_dims, (list, tuple) ), 'fc_dims must be either list or tuple, but got {}'.format( type(fc_dims) ) layers = [] for dim in fc_dims: layers.append(nn.Linear(input_dim, dim)) layers.append(nn.BatchNorm1d(dim)) layers.append(nn.ReLU(inplace=True)) if dropout_p is not None: layers.append(nn.Dropout(p=dropout_p)) input_dim = dim self.feature_dim = fc_dims[-1] return nn.Sequential(*layers) def featuremaps(self, x): x = self.layer0(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) return x def forward(self, x): f = self.featuremaps(x) v = self.global_avgpool(f) v = v.view(v.size(0), -1) if self.fc is not None: v = self.fc(v) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError("Unsupported loss: {}".format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def senet154(num_classes, loss='softmax', pretrained=True, **kwargs): model = SENet( num_classes=num_classes, loss=loss, block=SEBottleneck, layers=[3, 8, 36, 3], groups=64, reduction=16, dropout_p=0.2, last_stride=2, fc_dims=None, **kwargs ) if pretrained: model_url = pretrained_settings['senet154']['imagenet']['url'] init_pretrained_weights(model, model_url) return model def se_resnet50(num_classes, loss='softmax', pretrained=True, **kwargs): model = SENet( num_classes=num_classes, loss=loss, block=SEResNetBottleneck, layers=[3, 4, 6, 3], groups=1, reduction=16, dropout_p=None, inplanes=64, input_3x3=False, downsample_kernel_size=1, downsample_padding=0, last_stride=2, fc_dims=None, **kwargs ) if pretrained: model_url = pretrained_settings['se_resnet50']['imagenet']['url'] init_pretrained_weights(model, model_url) return model def se_resnet50_fc512(num_classes, loss='softmax', pretrained=True, **kwargs): model = SENet( num_classes=num_classes, loss=loss, block=SEResNetBottleneck, layers=[3, 4, 6, 3], groups=1, reduction=16, dropout_p=None, inplanes=64, input_3x3=False, downsample_kernel_size=1, downsample_padding=0, last_stride=1, fc_dims=[512], **kwargs ) if pretrained: model_url = pretrained_settings['se_resnet50']['imagenet']['url'] init_pretrained_weights(model, model_url) return model def se_resnet101(num_classes, loss='softmax', pretrained=True, **kwargs): model = SENet( num_classes=num_classes, loss=loss, block=SEResNetBottleneck, layers=[3, 4, 23, 3], groups=1, reduction=16, dropout_p=None, inplanes=64, input_3x3=False, downsample_kernel_size=1, downsample_padding=0, last_stride=2, fc_dims=None, **kwargs ) if pretrained: model_url = pretrained_settings['se_resnet101']['imagenet']['url'] init_pretrained_weights(model, model_url) return model def se_resnet152(num_classes, loss='softmax', pretrained=True, **kwargs): model = SENet( num_classes=num_classes, loss=loss, block=SEResNetBottleneck, layers=[3, 8, 36, 3], groups=1, reduction=16, dropout_p=None, inplanes=64, input_3x3=False, downsample_kernel_size=1, downsample_padding=0, last_stride=2, fc_dims=None, **kwargs ) if pretrained: model_url = pretrained_settings['se_resnet152']['imagenet']['url'] init_pretrained_weights(model, model_url) return model def se_resnext50_32x4d(num_classes, loss='softmax', pretrained=True, **kwargs): model = SENet( num_classes=num_classes, loss=loss, block=SEResNeXtBottleneck, layers=[3, 4, 6, 3], groups=32, reduction=16, dropout_p=None, inplanes=64, input_3x3=False, downsample_kernel_size=1, downsample_padding=0, last_stride=2, fc_dims=None, **kwargs ) if pretrained: model_url = pretrained_settings['se_resnext50_32x4d']['imagenet']['url' ] init_pretrained_weights(model, model_url) return model def se_resnext101_32x4d( num_classes, loss='softmax', pretrained=True, **kwargs ): model = SENet( num_classes=num_classes, loss=loss, block=SEResNeXtBottleneck, layers=[3, 4, 23, 3], groups=32, reduction=16, dropout_p=None, inplanes=64, input_3x3=False, downsample_kernel_size=1, downsample_padding=0, last_stride=2, fc_dims=None, **kwargs ) if pretrained: model_url = pretrained_settings['se_resnext101_32x4d']['imagenet'][ 'url'] init_pretrained_weights(model, model_url) return model ================================================ FILE: torchreid/models/shufflenet.py ================================================ from __future__ import division, absolute_import import torch import torch.utils.model_zoo as model_zoo from torch import nn from torch.nn import functional as F __all__ = ['shufflenet'] model_urls = { # training epoch = 90, top1 = 61.8 'imagenet': 'https://mega.nz/#!RDpUlQCY!tr_5xBEkelzDjveIYBBcGcovNCOrgfiJO9kiidz9fZM', } class ChannelShuffle(nn.Module): def __init__(self, num_groups): super(ChannelShuffle, self).__init__() self.g = num_groups def forward(self, x): b, c, h, w = x.size() n = c // self.g # reshape x = x.view(b, self.g, n, h, w) # transpose x = x.permute(0, 2, 1, 3, 4).contiguous() # flatten x = x.view(b, c, h, w) return x class Bottleneck(nn.Module): def __init__( self, in_channels, out_channels, stride, num_groups, group_conv1x1=True ): super(Bottleneck, self).__init__() assert stride in [1, 2], 'Warning: stride must be either 1 or 2' self.stride = stride mid_channels = out_channels // 4 if stride == 2: out_channels -= in_channels # group conv is not applied to first conv1x1 at stage 2 num_groups_conv1x1 = num_groups if group_conv1x1 else 1 self.conv1 = nn.Conv2d( in_channels, mid_channels, 1, groups=num_groups_conv1x1, bias=False ) self.bn1 = nn.BatchNorm2d(mid_channels) self.shuffle1 = ChannelShuffle(num_groups) self.conv2 = nn.Conv2d( mid_channels, mid_channels, 3, stride=stride, padding=1, groups=mid_channels, bias=False ) self.bn2 = nn.BatchNorm2d(mid_channels) self.conv3 = nn.Conv2d( mid_channels, out_channels, 1, groups=num_groups, bias=False ) self.bn3 = nn.BatchNorm2d(out_channels) if stride == 2: self.shortcut = nn.AvgPool2d(3, stride=2, padding=1) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.shuffle1(out) out = self.bn2(self.conv2(out)) out = self.bn3(self.conv3(out)) if self.stride == 2: res = self.shortcut(x) out = F.relu(torch.cat([res, out], 1)) else: out = F.relu(x + out) return out # configuration of (num_groups: #out_channels) based on Table 1 in the paper cfg = { 1: [144, 288, 576], 2: [200, 400, 800], 3: [240, 480, 960], 4: [272, 544, 1088], 8: [384, 768, 1536], } class ShuffleNet(nn.Module): """ShuffleNet. Reference: Zhang et al. ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices. CVPR 2018. Public keys: - ``shufflenet``: ShuffleNet (groups=3). """ def __init__(self, num_classes, loss='softmax', num_groups=3, **kwargs): super(ShuffleNet, self).__init__() self.loss = loss self.conv1 = nn.Sequential( nn.Conv2d(3, 24, 3, stride=2, padding=1, bias=False), nn.BatchNorm2d(24), nn.ReLU(), nn.MaxPool2d(3, stride=2, padding=1), ) self.stage2 = nn.Sequential( Bottleneck( 24, cfg[num_groups][0], 2, num_groups, group_conv1x1=False ), Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups), Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups), Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups), ) self.stage3 = nn.Sequential( Bottleneck(cfg[num_groups][0], cfg[num_groups][1], 2, num_groups), Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), ) self.stage4 = nn.Sequential( Bottleneck(cfg[num_groups][1], cfg[num_groups][2], 2, num_groups), Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups), Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups), Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups), ) self.classifier = nn.Linear(cfg[num_groups][2], num_classes) self.feat_dim = cfg[num_groups][2] def forward(self, x): x = self.conv1(x) x = self.stage2(x) x = self.stage3(x) x = self.stage4(x) x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), -1) if not self.training: return x y = self.classifier(x) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, x else: raise KeyError('Unsupported loss: {}'.format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def shufflenet(num_classes, loss='softmax', pretrained=True, **kwargs): model = ShuffleNet(num_classes, loss, **kwargs) if pretrained: # init_pretrained_weights(model, model_urls['imagenet']) import warnings warnings.warn( 'The imagenet pretrained weights need to be manually downloaded from {}' .format(model_urls['imagenet']) ) return model ================================================ FILE: torchreid/models/shufflenetv2.py ================================================ """ Code source: https://github.com/pytorch/vision """ from __future__ import division, absolute_import import torch import torch.utils.model_zoo as model_zoo from torch import nn __all__ = [ 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0' ] model_urls = { 'shufflenetv2_x0.5': 'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth', 'shufflenetv2_x1.0': 'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth', 'shufflenetv2_x1.5': None, 'shufflenetv2_x2.0': None, } def channel_shuffle(x, groups): batchsize, num_channels, height, width = x.data.size() channels_per_group = num_channels // groups # reshape x = x.view(batchsize, groups, channels_per_group, height, width) x = torch.transpose(x, 1, 2).contiguous() # flatten x = x.view(batchsize, -1, height, width) return x class InvertedResidual(nn.Module): def __init__(self, inp, oup, stride): super(InvertedResidual, self).__init__() if not (1 <= stride <= 3): raise ValueError('illegal stride value') self.stride = stride branch_features = oup // 2 assert (self.stride != 1) or (inp == branch_features << 1) if self.stride > 1: self.branch1 = nn.Sequential( self.depthwise_conv( inp, inp, kernel_size=3, stride=self.stride, padding=1 ), nn.BatchNorm2d(inp), nn.Conv2d( inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False ), nn.BatchNorm2d(branch_features), nn.ReLU(inplace=True), ) self.branch2 = nn.Sequential( nn.Conv2d( inp if (self.stride > 1) else branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False ), nn.BatchNorm2d(branch_features), nn.ReLU(inplace=True), self.depthwise_conv( branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1 ), nn.BatchNorm2d(branch_features), nn.Conv2d( branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False ), nn.BatchNorm2d(branch_features), nn.ReLU(inplace=True), ) @staticmethod def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False): return nn.Conv2d( i, o, kernel_size, stride, padding, bias=bias, groups=i ) def forward(self, x): if self.stride == 1: x1, x2 = x.chunk(2, dim=1) out = torch.cat((x1, self.branch2(x2)), dim=1) else: out = torch.cat((self.branch1(x), self.branch2(x)), dim=1) out = channel_shuffle(out, 2) return out class ShuffleNetV2(nn.Module): """ShuffleNetV2. Reference: Ma et al. ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design. ECCV 2018. Public keys: - ``shufflenet_v2_x0_5``: ShuffleNetV2 x0.5. - ``shufflenet_v2_x1_0``: ShuffleNetV2 x1.0. - ``shufflenet_v2_x1_5``: ShuffleNetV2 x1.5. - ``shufflenet_v2_x2_0``: ShuffleNetV2 x2.0. """ def __init__( self, num_classes, loss, stages_repeats, stages_out_channels, **kwargs ): super(ShuffleNetV2, self).__init__() self.loss = loss if len(stages_repeats) != 3: raise ValueError( 'expected stages_repeats as list of 3 positive ints' ) if len(stages_out_channels) != 5: raise ValueError( 'expected stages_out_channels as list of 5 positive ints' ) self._stage_out_channels = stages_out_channels input_channels = 3 output_channels = self._stage_out_channels[0] self.conv1 = nn.Sequential( nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False), nn.BatchNorm2d(output_channels), nn.ReLU(inplace=True), ) input_channels = output_channels self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) stage_names = ['stage{}'.format(i) for i in [2, 3, 4]] for name, repeats, output_channels in zip( stage_names, stages_repeats, self._stage_out_channels[1:] ): seq = [InvertedResidual(input_channels, output_channels, 2)] for i in range(repeats - 1): seq.append( InvertedResidual(output_channels, output_channels, 1) ) setattr(self, name, nn.Sequential(*seq)) input_channels = output_channels output_channels = self._stage_out_channels[-1] self.conv5 = nn.Sequential( nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False), nn.BatchNorm2d(output_channels), nn.ReLU(inplace=True), ) self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Linear(output_channels, num_classes) def featuremaps(self, x): x = self.conv1(x) x = self.maxpool(x) x = self.stage2(x) x = self.stage3(x) x = self.stage4(x) x = self.conv5(x) return x def forward(self, x): f = self.featuremaps(x) v = self.global_avgpool(f) v = v.view(v.size(0), -1) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError("Unsupported loss: {}".format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ if model_url is None: import warnings warnings.warn( 'ImageNet pretrained weights are unavailable for this model' ) return pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def shufflenet_v2_x0_5(num_classes, loss='softmax', pretrained=True, **kwargs): model = ShuffleNetV2( num_classes, loss, [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['shufflenetv2_x0.5']) return model def shufflenet_v2_x1_0(num_classes, loss='softmax', pretrained=True, **kwargs): model = ShuffleNetV2( num_classes, loss, [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['shufflenetv2_x1.0']) return model def shufflenet_v2_x1_5(num_classes, loss='softmax', pretrained=True, **kwargs): model = ShuffleNetV2( num_classes, loss, [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['shufflenetv2_x1.5']) return model def shufflenet_v2_x2_0(num_classes, loss='softmax', pretrained=True, **kwargs): model = ShuffleNetV2( num_classes, loss, [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['shufflenetv2_x2.0']) return model ================================================ FILE: torchreid/models/squeezenet.py ================================================ """ Code source: https://github.com/pytorch/vision """ from __future__ import division, absolute_import import torch import torch.nn as nn import torch.utils.model_zoo as model_zoo __all__ = ['squeezenet1_0', 'squeezenet1_1', 'squeezenet1_0_fc512'] model_urls = { 'squeezenet1_0': 'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth', 'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth', } class Fire(nn.Module): def __init__( self, inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes ): super(Fire, self).__init__() self.inplanes = inplanes self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1) self.squeeze_activation = nn.ReLU(inplace=True) self.expand1x1 = nn.Conv2d( squeeze_planes, expand1x1_planes, kernel_size=1 ) self.expand1x1_activation = nn.ReLU(inplace=True) self.expand3x3 = nn.Conv2d( squeeze_planes, expand3x3_planes, kernel_size=3, padding=1 ) self.expand3x3_activation = nn.ReLU(inplace=True) def forward(self, x): x = self.squeeze_activation(self.squeeze(x)) return torch.cat( [ self.expand1x1_activation(self.expand1x1(x)), self.expand3x3_activation(self.expand3x3(x)) ], 1 ) class SqueezeNet(nn.Module): """SqueezeNet. Reference: Iandola et al. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size. arXiv:1602.07360. Public keys: - ``squeezenet1_0``: SqueezeNet (version=1.0). - ``squeezenet1_1``: SqueezeNet (version=1.1). - ``squeezenet1_0_fc512``: SqueezeNet (version=1.0) + FC. """ def __init__( self, num_classes, loss, version=1.0, fc_dims=None, dropout_p=None, **kwargs ): super(SqueezeNet, self).__init__() self.loss = loss self.feature_dim = 512 if version not in [1.0, 1.1]: raise ValueError( 'Unsupported SqueezeNet version {version}:' '1.0 or 1.1 expected'.format(version=version) ) if version == 1.0: self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=7, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(96, 16, 64, 64), Fire(128, 16, 64, 64), Fire(128, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 32, 128, 128), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(512, 64, 256, 256), ) else: self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256), ) self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.fc = self._construct_fc_layer(fc_dims, 512, dropout_p) self.classifier = nn.Linear(self.feature_dim, num_classes) self._init_params() def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): """Constructs fully connected layer Args: fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed input_dim (int): input dimension dropout_p (float): dropout probability, if None, dropout is unused """ if fc_dims is None: self.feature_dim = input_dim return None assert isinstance( fc_dims, (list, tuple) ), 'fc_dims must be either list or tuple, but got {}'.format( type(fc_dims) ) layers = [] for dim in fc_dims: layers.append(nn.Linear(input_dim, dim)) layers.append(nn.BatchNorm1d(dim)) layers.append(nn.ReLU(inplace=True)) if dropout_p is not None: layers.append(nn.Dropout(p=dropout_p)) input_dim = dim self.feature_dim = fc_dims[-1] return nn.Sequential(*layers) def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) def forward(self, x): f = self.features(x) v = self.global_avgpool(f) v = v.view(v.size(0), -1) if self.fc is not None: v = self.fc(v) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError('Unsupported loss: {}'.format(self.loss)) def init_pretrained_weights(model, model_url): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url, map_location=None) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def squeezenet1_0(num_classes, loss='softmax', pretrained=True, **kwargs): model = SqueezeNet( num_classes, loss, version=1.0, fc_dims=None, dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['squeezenet1_0']) return model def squeezenet1_0_fc512( num_classes, loss='softmax', pretrained=True, **kwargs ): model = SqueezeNet( num_classes, loss, version=1.0, fc_dims=[512], dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['squeezenet1_0']) return model def squeezenet1_1(num_classes, loss='softmax', pretrained=True, **kwargs): model = SqueezeNet( num_classes, loss, version=1.1, fc_dims=None, dropout_p=None, **kwargs ) if pretrained: init_pretrained_weights(model, model_urls['squeezenet1_1']) return model ================================================ FILE: torchreid/models/xception.py ================================================ from __future__ import division, absolute_import import torch.nn as nn import torch.nn.functional as F import torch.utils.model_zoo as model_zoo __all__ = ['xception'] pretrained_settings = { 'xception': { 'imagenet': { 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth', 'input_space': 'RGB', 'input_size': [3, 299, 299], 'input_range': [0, 1], 'mean': [0.5, 0.5, 0.5], 'std': [0.5, 0.5, 0.5], 'num_classes': 1000, 'scale': 0.8975 # The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299 } } } class SeparableConv2d(nn.Module): def __init__( self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=False ): super(SeparableConv2d, self).__init__() self.conv1 = nn.Conv2d( in_channels, in_channels, kernel_size, stride, padding, dilation, groups=in_channels, bias=bias ) self.pointwise = nn.Conv2d( in_channels, out_channels, 1, 1, 0, 1, 1, bias=bias ) def forward(self, x): x = self.conv1(x) x = self.pointwise(x) return x class Block(nn.Module): def __init__( self, in_filters, out_filters, reps, strides=1, start_with_relu=True, grow_first=True ): super(Block, self).__init__() if out_filters != in_filters or strides != 1: self.skip = nn.Conv2d( in_filters, out_filters, 1, stride=strides, bias=False ) self.skipbn = nn.BatchNorm2d(out_filters) else: self.skip = None self.relu = nn.ReLU(inplace=True) rep = [] filters = in_filters if grow_first: rep.append(self.relu) rep.append( SeparableConv2d( in_filters, out_filters, 3, stride=1, padding=1, bias=False ) ) rep.append(nn.BatchNorm2d(out_filters)) filters = out_filters for i in range(reps - 1): rep.append(self.relu) rep.append( SeparableConv2d( filters, filters, 3, stride=1, padding=1, bias=False ) ) rep.append(nn.BatchNorm2d(filters)) if not grow_first: rep.append(self.relu) rep.append( SeparableConv2d( in_filters, out_filters, 3, stride=1, padding=1, bias=False ) ) rep.append(nn.BatchNorm2d(out_filters)) if not start_with_relu: rep = rep[1:] else: rep[0] = nn.ReLU(inplace=False) if strides != 1: rep.append(nn.MaxPool2d(3, strides, 1)) self.rep = nn.Sequential(*rep) def forward(self, inp): x = self.rep(inp) if self.skip is not None: skip = self.skip(inp) skip = self.skipbn(skip) else: skip = inp x += skip return x class Xception(nn.Module): """Xception. Reference: Chollet. Xception: Deep Learning with Depthwise Separable Convolutions. CVPR 2017. Public keys: - ``xception``: Xception. """ def __init__( self, num_classes, loss, fc_dims=None, dropout_p=None, **kwargs ): super(Xception, self).__init__() self.loss = loss self.conv1 = nn.Conv2d(3, 32, 3, 2, 0, bias=False) self.bn1 = nn.BatchNorm2d(32) self.conv2 = nn.Conv2d(32, 64, 3, bias=False) self.bn2 = nn.BatchNorm2d(64) self.block1 = Block( 64, 128, 2, 2, start_with_relu=False, grow_first=True ) self.block2 = Block( 128, 256, 2, 2, start_with_relu=True, grow_first=True ) self.block3 = Block( 256, 728, 2, 2, start_with_relu=True, grow_first=True ) self.block4 = Block( 728, 728, 3, 1, start_with_relu=True, grow_first=True ) self.block5 = Block( 728, 728, 3, 1, start_with_relu=True, grow_first=True ) self.block6 = Block( 728, 728, 3, 1, start_with_relu=True, grow_first=True ) self.block7 = Block( 728, 728, 3, 1, start_with_relu=True, grow_first=True ) self.block8 = Block( 728, 728, 3, 1, start_with_relu=True, grow_first=True ) self.block9 = Block( 728, 728, 3, 1, start_with_relu=True, grow_first=True ) self.block10 = Block( 728, 728, 3, 1, start_with_relu=True, grow_first=True ) self.block11 = Block( 728, 728, 3, 1, start_with_relu=True, grow_first=True ) self.block12 = Block( 728, 1024, 2, 2, start_with_relu=True, grow_first=False ) self.conv3 = SeparableConv2d(1024, 1536, 3, 1, 1) self.bn3 = nn.BatchNorm2d(1536) self.conv4 = SeparableConv2d(1536, 2048, 3, 1, 1) self.bn4 = nn.BatchNorm2d(2048) self.global_avgpool = nn.AdaptiveAvgPool2d(1) self.feature_dim = 2048 self.fc = self._construct_fc_layer(fc_dims, 2048, dropout_p) self.classifier = nn.Linear(self.feature_dim, num_classes) self._init_params() def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): """Constructs fully connected layer. Args: fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed input_dim (int): input dimension dropout_p (float): dropout probability, if None, dropout is unused """ if fc_dims is None: self.feature_dim = input_dim return None assert isinstance( fc_dims, (list, tuple) ), 'fc_dims must be either list or tuple, but got {}'.format( type(fc_dims) ) layers = [] for dim in fc_dims: layers.append(nn.Linear(input_dim, dim)) layers.append(nn.BatchNorm1d(dim)) layers.append(nn.ReLU(inplace=True)) if dropout_p is not None: layers.append(nn.Dropout(p=dropout_p)) input_dim = dim self.feature_dim = fc_dims[-1] return nn.Sequential(*layers) def _init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_( m.weight, mode='fan_out', nonlinearity='relu' ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) def featuremaps(self, input): x = self.conv1(input) x = self.bn1(x) x = F.relu(x, inplace=True) x = self.conv2(x) x = self.bn2(x) x = F.relu(x, inplace=True) x = self.block1(x) x = self.block2(x) x = self.block3(x) x = self.block4(x) x = self.block5(x) x = self.block6(x) x = self.block7(x) x = self.block8(x) x = self.block9(x) x = self.block10(x) x = self.block11(x) x = self.block12(x) x = self.conv3(x) x = self.bn3(x) x = F.relu(x, inplace=True) x = self.conv4(x) x = self.bn4(x) x = F.relu(x, inplace=True) return x def forward(self, x): f = self.featuremaps(x) v = self.global_avgpool(f) v = v.view(v.size(0), -1) if self.fc is not None: v = self.fc(v) if not self.training: return v y = self.classifier(v) if self.loss == 'softmax': return y elif self.loss == 'triplet': return y, v else: raise KeyError('Unsupported loss: {}'.format(self.loss)) def init_pretrained_weights(model, model_url): """Initialize models with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ pretrain_dict = model_zoo.load_url(model_url) model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) def xception(num_classes, loss='softmax', pretrained=True, **kwargs): model = Xception(num_classes, loss, fc_dims=None, dropout_p=None, **kwargs) if pretrained: model_url = pretrained_settings['xception']['imagenet']['url'] init_pretrained_weights(model, model_url) return model ================================================ FILE: torchreid/optim/__init__.py ================================================ from __future__ import absolute_import from .optimizer import build_optimizer from .lr_scheduler import build_lr_scheduler ================================================ FILE: torchreid/optim/lr_scheduler.py ================================================ from __future__ import print_function, absolute_import from bisect import bisect_right import torch AVAI_SCH = ['single_step', 'multi_step', 'warmup_multi_step', 'cosine'] def build_lr_scheduler( optimizer, lr_scheduler='single_step', stepsize=1, gamma=0.1, max_epoch=1 ): """A function wrapper for building a learning rate scheduler. Args: optimizer (Optimizer): an Optimizer. lr_scheduler (str, optional): learning rate scheduler method. Default is single_step. stepsize (int or list, optional): step size to decay learning rate. When ``lr_scheduler`` is "single_step", ``stepsize`` should be an integer. When ``lr_scheduler`` is "multi_step", ``stepsize`` is a list. Default is 1. gamma (float, optional): decay rate. Default is 0.1. max_epoch (int, optional): maximum epoch (for cosine annealing). Default is 1. Examples:: >>> # Decay learning rate by every 20 epochs. >>> scheduler = torchreid.optim.build_lr_scheduler( >>> optimizer, lr_scheduler='single_step', stepsize=20 >>> ) >>> # Decay learning rate at 30, 50 and 55 epochs. >>> scheduler = torchreid.optim.build_lr_scheduler( >>> optimizer, lr_scheduler='multi_step', stepsize=[30, 50, 55] >>> ) """ if lr_scheduler not in AVAI_SCH: raise ValueError( 'Unsupported scheduler: {}. Must be one of {}'.format( lr_scheduler, AVAI_SCH ) ) if lr_scheduler == 'single_step': if isinstance(stepsize, list): stepsize = stepsize[-1] if not isinstance(stepsize, int): raise TypeError( 'For single_step lr_scheduler, stepsize must ' 'be an integer, but got {}'.format(type(stepsize)) ) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=stepsize, gamma=gamma ) elif lr_scheduler == 'multi_step': if not isinstance(stepsize, list): raise TypeError( 'For multi_step lr_scheduler, stepsize must ' 'be a list, but got {}'.format(type(stepsize)) ) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=stepsize, gamma=gamma ) elif lr_scheduler == 'warmup_multi_step': if not isinstance(stepsize, list): raise TypeError( 'For warmup_multi_step lr_scheduler, stepsize must ' 'be a list, but got {}'.format(type(stepsize)) ) scheduler = WarmupMultiStepLR( optimizer, milestones=stepsize, gamma=gamma, warmup_factor=0.01, warmup_iters=10, warmup_method="linear" ) elif lr_scheduler == 'cosine': scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(max_epoch) ) return scheduler class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): """Source: https://github.com/michuanhaohao/reid-strong-baseline""" def __init__( self, optimizer, milestones, gamma=0.1, warmup_factor=1.0 / 3, warmup_iters=500, warmup_method="linear", last_epoch=-1, ): if not list(milestones) == sorted(milestones): raise ValueError( "Milestones should be a list of" " increasing integers. Got {}", milestones, ) if warmup_method not in ("constant", "linear"): raise ValueError( "Only 'constant' or 'linear' warmup_method accepted" "got {}".format(warmup_method) ) self.milestones = milestones self.gamma = gamma self.warmup_factor = warmup_factor self.warmup_iters = warmup_iters self.warmup_method = warmup_method super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) def get_lr(self): warmup_factor = 1 if self.last_epoch < self.warmup_iters: if self.warmup_method == "constant": warmup_factor = self.warmup_factor elif self.warmup_method == "linear": alpha = self.last_epoch / self.warmup_iters warmup_factor = self.warmup_factor * (1 - alpha) + alpha return [ base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch) for base_lr in self.base_lrs ] ================================================ FILE: torchreid/optim/optimizer.py ================================================ from __future__ import print_function, absolute_import import warnings import torch import torch.nn as nn from .radam import RAdam AVAI_OPTIMS = ['adam', 'amsgrad', 'sgd', 'rmsprop', 'radam'] def build_optimizer( model, optim='adam', lr=0.0003, weight_decay=5e-04, momentum=0.9, sgd_dampening=0, sgd_nesterov=False, rmsprop_alpha=0.99, adam_beta1=0.9, adam_beta2=0.99, staged_lr=False, new_layers='', base_lr_mult=0.1 ): """A function wrapper for building an optimizer. Args: model (nn.Module): model. optim (str, optional): optimizer. Default is "adam". lr (float, optional): learning rate. Default is 0.0003. weight_decay (float, optional): weight decay (L2 penalty). Default is 5e-04. momentum (float, optional): momentum factor in sgd. Default is 0.9, sgd_dampening (float, optional): dampening for momentum. Default is 0. sgd_nesterov (bool, optional): enables Nesterov momentum. Default is False. rmsprop_alpha (float, optional): smoothing constant for rmsprop. Default is 0.99. adam_beta1 (float, optional): beta-1 value in adam. Default is 0.9. adam_beta2 (float, optional): beta-2 value in adam. Default is 0.99, staged_lr (bool, optional): uses different learning rates for base and new layers. Base layers are pretrained layers while new layers are randomly initialized, e.g. the identity classification layer. Enabling ``staged_lr`` can allow the base layers to be trained with a smaller learning rate determined by ``base_lr_mult``, while the new layers will take the ``lr``. Default is False. new_layers (str or list): attribute names in ``model``. Default is empty. base_lr_mult (float, optional): learning rate multiplier for base layers. Default is 0.1. Examples:: >>> # A normal optimizer can be built by >>> optimizer = torchreid.optim.build_optimizer(model, optim='sgd', lr=0.01) >>> # If you want to use a smaller learning rate for pretrained layers >>> # and the attribute name for the randomly initialized layer is 'classifier', >>> # you can do >>> optimizer = torchreid.optim.build_optimizer( >>> model, optim='sgd', lr=0.01, staged_lr=True, >>> new_layers='classifier', base_lr_mult=0.1 >>> ) >>> # Now the `classifier` has learning rate 0.01 but the base layers >>> # have learning rate 0.01 * 0.1. >>> # new_layers can also take multiple attribute names. Say the new layers >>> # are 'fc' and 'classifier', you can do >>> optimizer = torchreid.optim.build_optimizer( >>> model, optim='sgd', lr=0.01, staged_lr=True, >>> new_layers=['fc', 'classifier'], base_lr_mult=0.1 >>> ) """ if optim not in AVAI_OPTIMS: raise ValueError( 'Unsupported optim: {}. Must be one of {}'.format( optim, AVAI_OPTIMS ) ) if not isinstance(model, nn.Module): raise TypeError( 'model given to build_optimizer must be an instance of nn.Module' ) if staged_lr: if isinstance(new_layers, str): if new_layers is None: warnings.warn( 'new_layers is empty, therefore, staged_lr is useless' ) new_layers = [new_layers] if isinstance(model, nn.DataParallel): model = model.module base_params = [] base_layers = [] new_params = [] for name, module in model.named_children(): if name in new_layers: new_params += [p for p in module.parameters()] else: base_params += [p for p in module.parameters()] base_layers.append(name) param_groups = [ { 'params': base_params, 'lr': lr * base_lr_mult }, { 'params': new_params }, ] else: param_groups = model.parameters() if optim == 'adam': optimizer = torch.optim.Adam( param_groups, lr=lr, weight_decay=weight_decay, betas=(adam_beta1, adam_beta2), ) elif optim == 'amsgrad': optimizer = torch.optim.Adam( param_groups, lr=lr, weight_decay=weight_decay, betas=(adam_beta1, adam_beta2), amsgrad=True, ) elif optim == 'sgd': optimizer = torch.optim.SGD( param_groups, lr=lr, momentum=momentum, weight_decay=weight_decay, dampening=sgd_dampening, nesterov=sgd_nesterov, ) elif optim == 'rmsprop': optimizer = torch.optim.RMSprop( param_groups, lr=lr, momentum=momentum, weight_decay=weight_decay, alpha=rmsprop_alpha, ) elif optim == 'radam': optimizer = RAdam( param_groups, lr=lr, weight_decay=weight_decay, betas=(adam_beta1, adam_beta2) ) return optimizer ================================================ FILE: torchreid/optim/radam.py ================================================ """ Imported from: https://github.com/LiyuanLucasLiu/RAdam Paper: https://arxiv.org/abs/1908.03265 @article{liu2019radam, title={On the Variance of the Adaptive Learning Rate and Beyond}, author={Liu, Liyuan and Jiang, Haoming and He, Pengcheng and Chen, Weizhu and Liu, Xiaodong and Gao, Jianfeng and Han, Jiawei}, journal={arXiv preprint arXiv:1908.03265}, year={2019} } """ from __future__ import print_function, absolute_import import math import torch from torch.optim.optimizer import Optimizer class RAdam(Optimizer): def __init__( self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, degenerated_to_sgd=True ): if not 0.0 <= lr: raise ValueError("Invalid learning rate: {}".format(lr)) if not 0.0 <= eps: raise ValueError("Invalid epsilon value: {}".format(eps)) if not 0.0 <= betas[0] < 1.0: raise ValueError( "Invalid beta parameter at index 0: {}".format(betas[0]) ) if not 0.0 <= betas[1] < 1.0: raise ValueError( "Invalid beta parameter at index 1: {}".format(betas[1]) ) self.degenerated_to_sgd = degenerated_to_sgd defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) self.buffer = [[None, None, None] for ind in range(10)] super(RAdam, self).__init__(params, defaults) def __setstate__(self, state): super(RAdam, self).__setstate__(state) def step(self, closure=None): loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data.float() if grad.is_sparse: raise RuntimeError( 'RAdam does not support sparse gradients' ) p_data_fp32 = p.data.float() state = self.state[p] if len(state) == 0: state['step'] = 0 state['exp_avg'] = torch.zeros_like(p_data_fp32) state['exp_avg_sq'] = torch.zeros_like(p_data_fp32) else: state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32) state['exp_avg_sq'] = state['exp_avg_sq'].type_as( p_data_fp32 ) exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] beta1, beta2 = group['betas'] exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) exp_avg.mul_(beta1).add_(1 - beta1, grad) state['step'] += 1 buffered = self.buffer[int(state['step'] % 10)] if state['step'] == buffered[0]: N_sma, step_size = buffered[1], buffered[2] else: buffered[0] = state['step'] beta2_t = beta2**state['step'] N_sma_max = 2 / (1-beta2) - 1 N_sma = N_sma_max - 2 * state['step' ] * beta2_t / (1-beta2_t) buffered[1] = N_sma # more conservative since it's an approximated value if N_sma >= 5: step_size = math.sqrt( (1-beta2_t) * (N_sma-4) / (N_sma_max-4) * (N_sma-2) / N_sma * N_sma_max / (N_sma_max-2) ) / (1 - beta1**state['step']) elif self.degenerated_to_sgd: step_size = 1.0 / (1 - beta1**state['step']) else: step_size = -1 buffered[2] = step_size # more conservative since it's an approximated value if N_sma >= 5: if group['weight_decay'] != 0: p_data_fp32.add_( -group['weight_decay'] * group['lr'], p_data_fp32 ) denom = exp_avg_sq.sqrt().add_(group['eps']) p_data_fp32.addcdiv_( -step_size * group['lr'], exp_avg, denom ) p.data.copy_(p_data_fp32) elif step_size > 0: if group['weight_decay'] != 0: p_data_fp32.add_( -group['weight_decay'] * group['lr'], p_data_fp32 ) p_data_fp32.add_(-step_size * group['lr'], exp_avg) p.data.copy_(p_data_fp32) return loss class PlainRAdam(Optimizer): def __init__( self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, degenerated_to_sgd=True ): if not 0.0 <= lr: raise ValueError("Invalid learning rate: {}".format(lr)) if not 0.0 <= eps: raise ValueError("Invalid epsilon value: {}".format(eps)) if not 0.0 <= betas[0] < 1.0: raise ValueError( "Invalid beta parameter at index 0: {}".format(betas[0]) ) if not 0.0 <= betas[1] < 1.0: raise ValueError( "Invalid beta parameter at index 1: {}".format(betas[1]) ) self.degenerated_to_sgd = degenerated_to_sgd defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) super(PlainRAdam, self).__init__(params, defaults) def __setstate__(self, state): super(PlainRAdam, self).__setstate__(state) def step(self, closure=None): loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data.float() if grad.is_sparse: raise RuntimeError( 'RAdam does not support sparse gradients' ) p_data_fp32 = p.data.float() state = self.state[p] if len(state) == 0: state['step'] = 0 state['exp_avg'] = torch.zeros_like(p_data_fp32) state['exp_avg_sq'] = torch.zeros_like(p_data_fp32) else: state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32) state['exp_avg_sq'] = state['exp_avg_sq'].type_as( p_data_fp32 ) exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] beta1, beta2 = group['betas'] exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) exp_avg.mul_(beta1).add_(1 - beta1, grad) state['step'] += 1 beta2_t = beta2**state['step'] N_sma_max = 2 / (1-beta2) - 1 N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1-beta2_t) # more conservative since it's an approximated value if N_sma >= 5: if group['weight_decay'] != 0: p_data_fp32.add_( -group['weight_decay'] * group['lr'], p_data_fp32 ) step_size = group['lr'] * math.sqrt( (1-beta2_t) * (N_sma-4) / (N_sma_max-4) * (N_sma-2) / N_sma * N_sma_max / (N_sma_max-2) ) / (1 - beta1**state['step']) denom = exp_avg_sq.sqrt().add_(group['eps']) p_data_fp32.addcdiv_(-step_size, exp_avg, denom) p.data.copy_(p_data_fp32) elif self.degenerated_to_sgd: if group['weight_decay'] != 0: p_data_fp32.add_( -group['weight_decay'] * group['lr'], p_data_fp32 ) step_size = group['lr'] / (1 - beta1**state['step']) p_data_fp32.add_(-step_size, exp_avg) p.data.copy_(p_data_fp32) return loss class AdamW(Optimizer): def __init__( self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, warmup=0 ): if not 0.0 <= lr: raise ValueError("Invalid learning rate: {}".format(lr)) if not 0.0 <= eps: raise ValueError("Invalid epsilon value: {}".format(eps)) if not 0.0 <= betas[0] < 1.0: raise ValueError( "Invalid beta parameter at index 0: {}".format(betas[0]) ) if not 0.0 <= betas[1] < 1.0: raise ValueError( "Invalid beta parameter at index 1: {}".format(betas[1]) ) defaults = dict( lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, warmup=warmup ) super(AdamW, self).__init__(params, defaults) def __setstate__(self, state): super(AdamW, self).__setstate__(state) def step(self, closure=None): loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data.float() if grad.is_sparse: raise RuntimeError( 'Adam does not support sparse gradients, please consider SparseAdam instead' ) p_data_fp32 = p.data.float() state = self.state[p] if len(state) == 0: state['step'] = 0 state['exp_avg'] = torch.zeros_like(p_data_fp32) state['exp_avg_sq'] = torch.zeros_like(p_data_fp32) else: state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32) state['exp_avg_sq'] = state['exp_avg_sq'].type_as( p_data_fp32 ) exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] beta1, beta2 = group['betas'] state['step'] += 1 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) exp_avg.mul_(beta1).add_(1 - beta1, grad) denom = exp_avg_sq.sqrt().add_(group['eps']) bias_correction1 = 1 - beta1**state['step'] bias_correction2 = 1 - beta2**state['step'] if group['warmup'] > state['step']: scheduled_lr = 1e-8 + state['step'] * group['lr'] / group[ 'warmup'] else: scheduled_lr = group['lr'] step_size = scheduled_lr * math.sqrt( bias_correction2 ) / bias_correction1 if group['weight_decay'] != 0: p_data_fp32.add_( -group['weight_decay'] * scheduled_lr, p_data_fp32 ) p_data_fp32.addcdiv_(-step_size, exp_avg, denom) p.data.copy_(p_data_fp32) return loss ================================================ FILE: torchreid/scripts/__init__.py ================================================ ================================================ FILE: torchreid/scripts/default_config.py ================================================ import random import uuid from datetime import datetime from yacs.config import CfgNode as CN from torchreid.utils.constants import * from deepdiff import DeepDiff import re import pprint def get_default_config(): cfg = CN() # project cfg.project = CN() cfg.project.name = "BPBreID" # will be used as WanDB project name cfg.project.experiment_name = "" cfg.project.diff_config = "" cfg.project.notes = "" cfg.project.tags = [] cfg.project.config_file = "" cfg.project.debug_mode = False cfg.project.logger = CN() # Choose experiment manager client to use or simply use disk dump / matplotlib cfg.project.logger.use_clearml = False cfg.project.logger.use_neptune = False cfg.project.logger.use_tensorboard = False cfg.project.logger.use_wandb = False cfg.project.logger.matplotlib_show = False cfg.project.logger.save_disk = True # save images to disk cfg.project.job_id = random.randint(0, 1_000_000_000) cfg.project.experiment_id = str(uuid.uuid4()) cfg.project.start_time = datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%MS") # model cfg.model = CN() cfg.model.name = 'bpbreid' cfg.model.pretrained = True # automatically load pretrained model weights if available (For example HRNet # pretrained weights on ImageNet) cfg.model.load_weights = '' # path to model weights, for doing inference with a model that was saved on disk with 'save_model_flag' cfg.model.load_config = False # load config saved with model weights and overwrite current config cfg.model.resume = '' # path to checkpoint for resume training cfg.model.save_model_flag = False # path to checkpoint for resume training # configs for our part-based model BPBreID cfg.model.bpbreid = CN() cfg.model.bpbreid.pooling = 'gwap' # ['gap', 'gmp', 'gwap', 'gwap2'] cfg.model.bpbreid.normalization = 'identity' # ['identity', 'batch_norm_2d'] - obsolete, always use identity cfg.model.bpbreid.mask_filtering_training = False # use visibility scores at training - do not have an influence on testing performance yet, to be improved cfg.model.bpbreid.mask_filtering_testing = True # use visibility scores at testing - do have a big influence on testing performance when activated cfg.model.bpbreid.last_stride = 1 # last stride of the resnet backbone - 1 for better performance cfg.model.bpbreid.dim_reduce = 'after_pooling' # where to apply feature dimensionality reduction (before or after global pooling) ['none', 'before_pooling', 'after_pooling', 'before_and_after_pooling', 'after_pooling_with_dropout'] cfg.model.bpbreid.dim_reduce_output = 512 # reduce feature dimension to this value when above config is not 'none' cfg.model.bpbreid.backbone = 'resnet50' # ['resnet50', 'hrnet32', 'fastreid_resnet_ibn_nl'] cfg.model.bpbreid.learnable_attention_enabled = True # use learnable attention mechanism to pool part features, otherwise, use fixed attention weights from external (pifpaf) heatmaps/masks cfg.model.bpbreid.test_embeddings = ['bn_foreg', 'parts'] # embeddings to use at inference among ['globl', 'foreg', 'backg', 'conct', 'parts']: append 'bn_' suffix to use batch normed embeddings cfg.model.bpbreid.test_use_target_segmentation = 'none' # ['soft', 'hard', 'none'] - use external part mask to further refine the attention weights at inference cfg.model.bpbreid.training_binary_visibility_score = True # use binary visibility score (0 or 1) instead of continuous visibility score (0 to 1) at training cfg.model.bpbreid.testing_binary_visibility_score = True # use binary visibility score (0 or 1) instead of continuous visibility score (0 to 1) at testing cfg.model.bpbreid.shared_parts_id_classifier = False # if each part branch uses share weights for the identity classifier. Used only when the identity loss is used on part-based embeddings. cfg.model.bpbreid.hrnet_pretrained_path = "pretrained_models/" # path to pretrained weights for HRNet backbone, download on our Google Drive or on https://github.com/HRNet/HRNet-Image-Classification # number of horizontal stripes desired. When BPBreID is used, this variable will be automatically filled depending # on "data.masks.preprocess" cfg.model.bpbreid.masks = CN() cfg.model.bpbreid.masks.type = 'disk' # when 'disk' is used, load part masks from storage in 'cfg.model.bpbreid.masks.dir' folder # when 'stripes' is used, divide the image in 'cfg.model.bpbreid.masks.parts_num' horizontal stripes in a PCB style. # 'stripes' with parts_num=1 can be used to emulate the global method Bag of Tricks (BoT) cfg.model.bpbreid.masks.parts_num = 1 # number of part-based embedding to extract. When PCB is used, change this parameter to the number of stripes required cfg.model.bpbreid.masks.dir = 'pifpaf_maskrcnn_filtering' # masks will be loaded from 'dataset_path/masks/' directory cfg.model.bpbreid.masks.preprocess = 'eight' # how to group the 36 pifpaf parts into smaller human semantic groups ['eight', 'five', 'four', 'two', ...], more combination available inside 'torchreid/data/masks_transforms/__init__.masks_preprocess_pifpaf' cfg.model.bpbreid.masks.softmax_weight = 15 cfg.model.bpbreid.masks.background_computation_strategy = 'threshold' # threshold, diff_from_max cfg.model.bpbreid.masks.mask_filtering_threshold = 0.5 # data cfg.data = CN() cfg.data.type = 'image' cfg.data.root = 'reid-data' cfg.data.sources = ['market1501'] cfg.data.targets = ['market1501'] cfg.data.workers = 4 # number of data loading workers, set to 0 to enable breakpoint debugging in dataloader code cfg.data.split_id = 0 # split index cfg.data.height = 256 # image height cfg.data.width = 128 # image width cfg.data.combineall = False # combine train, query and gallery for training cfg.data.transforms = ['rc', 're'] # data augmentation from ['rf', 'rc', 're', 'cj'] = ['random flip', 'random crop', 'random erasing', 'color jitter'] cfg.data.ro = CN() # parameters for random occlusion data augmentation with Pascal VOC, to be improved, not maintained cfg.data.ro.path = "" cfg.data.ro.p = 0.5 cfg.data.ro.n = 1 cfg.data.ro.min_overlap = 0.5 cfg.data.ro.max_overlap = 0.8 cfg.data.cj = CN() # parameters for color jitter data augmentation cfg.data.cj.brightness = 0.2 cfg.data.cj.contrast = 0.15 cfg.data.cj.saturation = 0. cfg.data.cj.hue = 0. cfg.data.cj.always_apply = False cfg.data.cj.p = 0.5 cfg.data.norm_mean = [0.485, 0.456, 0.406] # default is imagenet mean cfg.data.norm_std = [0.229, 0.224, 0.225] # default is imagenet std cfg.data.save_dir = 'logs' # save figures, images, logs, etc. in this folder cfg.data.load_train_targets = False # specific datasets cfg.market1501 = CN() cfg.market1501.use_500k_distractors = False # add 500k distractors to the gallery set for market1501 cfg.cuhk03 = CN() cfg.cuhk03.labeled_images = False # use labeled images, if False, use detected images cfg.cuhk03.classic_split = False # use classic split by Li et al. CVPR14 cfg.cuhk03.use_metric_cuhk03 = False # use cuhk03's metric for evaluation # sampler cfg.sampler = CN() cfg.sampler.train_sampler = 'RandomIdentitySampler' # sampler for source train loader cfg.sampler.train_sampler_t = 'RandomIdentitySampler' # sampler for target train loader cfg.sampler.num_instances = 4 # number of instances per identity for RandomIdentitySampler # video reid setting cfg.video = CN() cfg.video.seq_len = 15 # number of images to sample in a tracklet cfg.video.sample_method = 'evenly' # how to sample images from a tracklet 'random'/'evenly'/'all' cfg.video.pooling_method = 'avg' # how to pool features over a tracklet # train cfg.train = CN() cfg.train.optim = 'adam' cfg.train.lr = 0.00035 cfg.train.weight_decay = 5e-4 cfg.train.max_epoch = 120 cfg.train.start_epoch = 0 cfg.train.batch_size = 64 cfg.train.fixbase_epoch = 0 # number of epochs to fix base layers cfg.train.open_layers = [ 'classifier' ] # layers for training while keeping others frozen cfg.train.staged_lr = False # set different lr to different layers cfg.train.new_layers = ['classifier'] # newly added layers with default lr cfg.train.base_lr_mult = 0.1 # learning rate multiplier for base layers cfg.train.lr_scheduler = 'warmup_multi_step' cfg.train.stepsize = [40, 70] # stepsize to decay learning rate cfg.train.gamma = 0.1 # learning rate decay multiplier cfg.train.seed = 1 # random seed cfg.train.eval_freq = -1 # evaluation frequency (-1 means to only test after training) cfg.train.batch_debug_freq = 0 cfg.train.batch_log_freq = 0 # optimizer cfg.sgd = CN() cfg.sgd.momentum = 0.9 # momentum factor for sgd and rmsprop cfg.sgd.dampening = 0. # dampening for momentum cfg.sgd.nesterov = False # Nesterov momentum cfg.rmsprop = CN() cfg.rmsprop.alpha = 0.99 # smoothing constant cfg.adam = CN() cfg.adam.beta1 = 0.9 # exponential decay rate for first moment cfg.adam.beta2 = 0.999 # exponential decay rate for second moment # loss cfg.loss = CN() cfg.loss.name = 'part_based' # use part based engine to train bpbreid with GiLt loss cfg.loss.part_based = CN() cfg.loss.part_based.name = 'part_averaged_triplet_loss' # ['inter_parts_triplet_loss', 'intra_parts_triplet_loss', 'part_max_triplet_loss', 'part_averaged_triplet_loss', 'part_min_triplet_loss', 'part_max_min_triplet_loss', 'part_random_max_min_triplet_loss'] cfg.loss.part_based.ppl = "cl" # body part prediction loss: ['cl', 'fl', 'dl'] = [cross entropy loss with label smoothing, focal loss, dice loss] cfg.loss.part_based.weights = CN() # weights to apply for the different losses and different types of embeddings, for more details, have a look at 'torchreid/losses/GiLt_loss.py' cfg.loss.part_based.weights[GLOBAL] = CN() cfg.loss.part_based.weights[GLOBAL].id = 1. cfg.loss.part_based.weights[GLOBAL].tr = 0. cfg.loss.part_based.weights[FOREGROUND] = CN() cfg.loss.part_based.weights[FOREGROUND].id = 1. cfg.loss.part_based.weights[FOREGROUND].tr = 0. cfg.loss.part_based.weights[CONCAT_PARTS] = CN() cfg.loss.part_based.weights[CONCAT_PARTS].id = 1. cfg.loss.part_based.weights[CONCAT_PARTS].tr = 0. cfg.loss.part_based.weights[PARTS] = CN() cfg.loss.part_based.weights[PARTS].id = 0. cfg.loss.part_based.weights[PARTS].tr = 1. cfg.loss.part_based.weights[PIXELS] = CN() cfg.loss.part_based.weights[PIXELS].ce = 0.35 cfg.loss.softmax = CN() cfg.loss.softmax.label_smooth = True # use label smoothing regularizer cfg.loss.triplet = CN() cfg.loss.triplet.margin = 0.3 # distance margin cfg.loss.triplet.weight_t = 1. # weight to balance hard triplet loss cfg.loss.triplet.weight_x = 0. # weight to balance cross entropy loss # test cfg.test = CN() cfg.test.batch_size = 128 cfg.test.batch_size_pairwise_dist_matrix = 500 # query to gallery distance matrix is computed on the GPU by batch of gallery samples with this size. # To avoid out of memory issue, we don't compute it for all gallery samples at the same time, but we compute it # in batches of 'batch_size_pairwise_dist_matrix' gallery samples. cfg.test.dist_metric = 'euclidean' # distance metric, ['euclidean', 'cosine'] cfg.test.normalize_feature = True # normalize feature vectors before computing distance cfg.test.ranks = [1, 5, 10, 20] # cmc ranks cfg.test.evaluate = False # test only cfg.test.start_eval = 0 # start to evaluate after a specific epoch cfg.test.rerank = False # use person re-ranking cfg.test.visrank = False # visualize ranked results (only available when cfg.test.evaluate=True) cfg.test.visrank_topk = 10 # top-k ranks to visualize cfg.test.visrank_count = 10 # number of top-k ranks to plot cfg.test.visrank_q_idx_list = [0, 1, 2, 3, 4, 5] # list of ids of queries for which we want to plot topk rank. If len(visrank_q_idx_list) < visrank_count, remaining ids will be random cfg.test.vis_feature_maps = False cfg.test.visrank_per_body_part = False cfg.test.vis_embedding_projection = False cfg.test.save_features = False # save test set extracted features to disk cfg.test.detailed_ranking = True # display ranking performance for each part individually cfg.test.part_based = CN() cfg.test.part_based.dist_combine_strat = "mean" # ['mean', 'max'] local part based distances are combined into a global distance using this strategy # inference cfg.inference = CN() cfg.inference.enabled = False cfg.inference.input_folder = "" return cfg keys_to_ignore_in_diff = { "cfg.project", "cfg.model.save_model_flag", "cfg.model.bpbreid.backbone", "cfg.model.bpbreid.learnable_attention_enabled", "cfg.model.bpbreid.masks.parts_num", "cfg.model.bpbreid.masks.dir", "cfg.data.type", "cfg.data.root", "cfg.data.sources", "cfg.data.targets", "cfg.data.workers", "cfg.data.split_id", "cfg.data.combineall", "cfg.data.save_dir", "cfg.train.eval_freq", "cfg.train.batch_debug_freq", "cfg.train.batch_log_freq", "cfg.test.batch_size", "cfg.test.batch_size_pairwise_dist_matrix", "cfg.test.dist_metric", "cfg.test.ranks", "cfg.test.evaluate", "cfg.test.start_eval", "cfg.test.rerank", "cfg.test.visrank", "cfg.test.visrank_topk", "cfg.test.visrank_count", "cfg.test.visrank_q_idx_list", "cfg.test.vis_feature_maps", "cfg.test.visrank_per_body_part", "cfg.test.vis_embedding_projection", "cfg.test.save_features", "cfg.test.detailed_ranking", "cfg.train.open_layers", "cfg.model.load_weights", } def imagedata_kwargs(cfg): return { 'config': cfg, 'root': cfg.data.root, 'sources': cfg.data.sources, 'targets': cfg.data.targets, 'height': cfg.data.height, 'width': cfg.data.width, 'transforms': cfg.data.transforms, 'norm_mean': cfg.data.norm_mean, 'norm_std': cfg.data.norm_std, 'use_gpu': cfg.use_gpu, 'split_id': cfg.data.split_id, 'combineall': cfg.data.combineall, 'load_train_targets': cfg.data.load_train_targets, 'batch_size_train': cfg.train.batch_size, 'batch_size_test': cfg.test.batch_size, 'workers': cfg.data.workers, 'num_instances': cfg.sampler.num_instances, 'train_sampler': cfg.sampler.train_sampler, 'train_sampler_t': cfg.sampler.train_sampler_t, # image 'cuhk03_labeled': cfg.cuhk03.labeled_images, 'cuhk03_classic_split': cfg.cuhk03.classic_split, 'market1501_500k': cfg.market1501.use_500k_distractors, 'use_masks': cfg.loss.name == 'part_based', 'masks_dir': cfg.model.bpbreid.masks.dir, } def videodata_kwargs(cfg): return { 'root': cfg.data.root, 'sources': cfg.data.sources, 'targets': cfg.data.targets, 'height': cfg.data.height, 'width': cfg.data.width, 'transforms': cfg.data.transforms, 'norm_mean': cfg.data.norm_mean, 'norm_std': cfg.data.norm_std, 'use_gpu': cfg.use_gpu, 'split_id': cfg.data.split_id, 'combineall': cfg.data.combineall, 'batch_size_train': cfg.train.batch_size, 'batch_size_test': cfg.test.batch_size, 'workers': cfg.data.workers, 'num_instances': cfg.sampler.num_instances, 'train_sampler': cfg.sampler.train_sampler, # video 'seq_len': cfg.video.seq_len, 'sample_method': cfg.video.sample_method } def optimizer_kwargs(cfg): return { 'optim': cfg.train.optim, 'lr': cfg.train.lr, 'weight_decay': cfg.train.weight_decay, 'momentum': cfg.sgd.momentum, 'sgd_dampening': cfg.sgd.dampening, 'sgd_nesterov': cfg.sgd.nesterov, 'rmsprop_alpha': cfg.rmsprop.alpha, 'adam_beta1': cfg.adam.beta1, 'adam_beta2': cfg.adam.beta2, 'staged_lr': cfg.train.staged_lr, 'new_layers': cfg.train.new_layers, 'base_lr_mult': cfg.train.base_lr_mult } def lr_scheduler_kwargs(cfg): return { 'lr_scheduler': cfg.train.lr_scheduler, 'stepsize': cfg.train.stepsize, 'gamma': cfg.train.gamma, 'max_epoch': cfg.train.max_epoch } def engine_run_kwargs(cfg): return { 'save_dir': cfg.data.save_dir, 'fixbase_epoch': cfg.train.fixbase_epoch, 'open_layers': cfg.train.open_layers, 'test_only': cfg.test.evaluate, 'dist_metric': cfg.test.dist_metric, 'normalize_feature': cfg.test.normalize_feature, 'visrank': cfg.test.visrank, 'visrank_topk': cfg.test.visrank_topk, 'visrank_q_idx_list': cfg.test.visrank_q_idx_list, 'visrank_count': cfg.test.visrank_count, 'use_metric_cuhk03': cfg.cuhk03.use_metric_cuhk03, 'ranks': cfg.test.ranks, 'rerank': cfg.test.rerank, 'save_features': cfg.test.save_features } def display_config_diff(cfg, default_cfg_copy): def iterdict(d): for k, v in d.items(): if isinstance(v, dict): iterdict(v) else: if type(v) == list: v = str(v) d.update({k: v}) return d ddiff = DeepDiff(iterdict(default_cfg_copy), iterdict(cfg.clone()), ignore_order=True) cfg_diff = {} if 'values_changed' in ddiff: for k, v in ddiff['values_changed'].items(): reformatted_key = "cfg." + k.replace("root['", "").replace("']['", ".").replace("']", "") if "[" in reformatted_key: reformatted_key = reformatted_key.split("[")[0] reformatted_key_split = reformatted_key.split(".") ignore_key = False for i in range(2, len(reformatted_key_split) + 1): prefix = ".".join(reformatted_key_split[0:i]) if prefix in keys_to_ignore_in_diff: ignore_key = True break if not ignore_key: key = re.findall(r"\['([A-Za-z0-9_]+)'\]", k)[-1] cfg_diff[key] = v['new_value'] print("Diff from default config :") pprint.pprint(cfg_diff) if len(str(cfg_diff)) < 128: cfg.project.diff_config = str(cfg_diff) else: cfg.project.diff_config = str(cfg_diff)[0:124] + "..." ================================================ FILE: torchreid/scripts/get_labels.py ================================================ import argparse import glob import os from pathlib import Path from typing import List import cv2 import detectron2.data.transforms as T import numpy as np import openpifpaf import torch import tqdm from detectron2.checkpoint import DetectionCheckpointer from detectron2.config import CfgNode, get_cfg from detectron2.model_zoo import get_checkpoint_url, get_config_file from detectron2.modeling import build_model from detectron2.structures import Instances from torch.utils.data import DataLoader, Dataset def build_config_maskrcnn(model_config_name): cfg = get_cfg() cfg.merge_from_file(cfg_filename=get_config_file(model_config_name)) cfg.MODEL.WEIGHTS = get_checkpoint_url(model_config_name) cfg.MODEL.DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' return cfg def compare_arrays(array1, array2): """ Compare two arrays and calculate Mean Absolute Error (MAE) and percentage difference. Args: array1 (np.ndarray): First array. array2 (np.ndarray): Second array. Returns: mae (float): Mean Absolute Error (MAE) between the arrays. mae_percentage (float): Percentage difference between the arrays. """ def calculate_mae(array1, array2): # Calculate Mean Absolute Error (MAE) mae = np.mean(np.abs(array1 - array2)) mae_percentage = (mae / np.max(array1)) * 100 return mae, mae_percentage print(f"Average percentage difference: {calculate_mae(array1, array2)[1]}%") def get_image_paths(source, path_format=False): """ Get the paths of all image files in a directory. Args: source (str): Directory path. path_format (bool, optional): Return paths as Path objects if True, otherwise as strings. Default is False. Returns: image_paths (List[str or Path]): List of image file paths. """ image_paths = glob.glob(f"{source}/**/*.[jJ][pP][gG]", recursive=True) + \ glob.glob(f"{source}/**/*.[pP][nN][gG]", recursive=True) + \ glob.glob(f"{source}/**/*.[jJ][pP][eE][gG]", recursive=True) + \ glob.glob(f"{source}/**/*.[tT][iI][fF]", recursive=True) + \ glob.glob(f"{source}/**/*.[tT][iI][fF][fF]", recursive=True) if path_format: image_paths = [Path(path_str) for path_str in image_paths] return image_paths def format_path(img_path, dataset_dir): """ Formats the given image path based on the dataset directory. Args: img_path (str): The path of the image file. dataset_dir (str): The directory path of the dataset. Returns: str: The formatted path of the image file. """ if "occluded_reid" in dataset_dir.lower() or "occluded-reid" in dataset_dir.lower(): return os.path.join(os.path.basename(os.path.dirname(os.path.dirname(img_path))), os.path.basename(img_path)) elif "p-dukemtmc_reid" in dataset_dir.lower() or "p-dukemtmc-reid" in dataset_dir.lower(): return os.path.join(os.path.basename(os.path.dirname(os.path.dirname(os.path.dirname(img_path)))), os.path.basename(os.path.dirname(os.path.dirname(img_path))), os.path.basename(img_path)) return os.path.relpath(img_path, dataset_dir) def get_label_paths(is_mask, img_paths, dataset_dir): """ Get the paths of label files corresponding to the image paths. Args: is_mask (bool): Indicates if the label is a mask or not. img_paths (List[str]): List of image file paths. dataset_dir (str): Directory path of the dataset. Returns: relative_paths (List[str]): List of relative paths of the image files. file_paths (List[str]): List of label file paths. """ relative_paths, file_paths = [], [] for img_name in img_paths: relative_path = format_path(img_name, dataset_dir) if not is_mask: file_path = os.path.join(dataset_dir, "masks", "pifpaf", relative_path + ".confidence_fields.npy") else: file_path = os.path.join(dataset_dir, "masks", "pifpaf_maskrcnn_filtering", relative_path + ".npy") relative_paths.append(relative_path) file_paths.append(file_path) return relative_paths, file_paths def skip_existing(is_mask, imagery, dataset_dir): """ Filter out image paths for which label files already exist. Args: is_mask (bool): Indicates if the label is a mask or not. imagery (List[str]): List of image file paths. dataset_dir (str): Directory path of the dataset. Returns: new_imagery (List[str]): List of image file paths for which label files do not exist. """ relative_paths, file_paths = get_label_paths(is_mask=is_mask, img_paths=imagery, dataset_dir=dataset_dir) new_imagery = [] for index, file_path in enumerate(file_paths): if not os.path.exists(file_path): new_imagery.append(imagery[index]) return new_imagery def save_files(files, files_path, verbose=True): """ Save files to specified paths. Args: files (List[object]): List of files to be saved. files_path (List[str]): List of paths where files will be saved. verbose (bool, optional): Print progress if True. Default is True. """ for file, file_path in zip(files, files_path): os.makedirs(os.path.dirname(file_path), exist_ok=True) np.save(file_path, file) if verbose: print(f"Processed {os.path.basename(file_path)}") class ImageDataset(Dataset): """ Custom dataset class for loading images. Args: imagery (List[Path]): List of image file paths. Returns: (str, np.ndarray): Tuple containing the image file path and the loaded image. """ def __init__(self, imagery: List[Path]): self.imagery = imagery def __getitem__(self, index): return self.imagery[index], cv2.imread(str(self.imagery[index])) def __len__(self): return len(self.imagery) class BatchPifPaf: def __init__(self, model_name: str = "shufflenetv2k16", batch_size: int = None, workers: int = None): """ Initializes a BatchPifPaf object. Args: model_name (str): Name of the OpenPifPaf model to use. batch_size (int): Batch size for inference. workers (int): Number of workers for data loading. """ models = [ 'resnet50', 'shufflenetv2k16', 'shufflenetv2k30', ] assert model_name in models, f"Model name must be one of {models}" print(f"* OpenPifPaf model -> {model_name}") # Define the OpenPifPaf model self.model = openpifpaf.Predictor(checkpoint=model_name, visualize_image=True, visualize_processed_image=True) self.batch_size = batch_size if batch_size else self.model.batch_size self.workers = workers if workers else self.model.loader_workers if self.model.loader_workers is not None else 0 self.__collate = openpifpaf.datasets.collate_images_anns_meta def __call__(self, imagery: List[Path] or List[str], dataset_dir: List[Path] or List[str], is_overwrite: bool = False, verbose: bool = False): """ Perform batch processing on the given imagery using the OpenPifPaf model. Args: imagery (List[Path] or List[str]): List of image paths or image file names. dataset_dir (List[Path] or List[str]): List of dataset directories. is_overwrite (bool, optional): Whether to overwrite existing files. Defaults to False. verbose (bool, optional): Whether to print verbose information. Defaults to False. Yields: torch.Tensor: Predictions for each image as a NumPy array. """ assert len(imagery) > 0, "No images found in imagery." if not is_overwrite: imagery = skip_existing(False, imagery, dataset_dir) dataset = openpifpaf.datasets.ImageList( imagery, preprocess=self.model.preprocess, with_raw_image=True ) loader = DataLoader( dataset, self.batch_size, shuffle=False, pin_memory=self.model.device.type != 'cpu', num_workers=self.workers, collate_fn=self.__collate, ) total_batches = len(loader) progress_bar = tqdm.tqdm(total=total_batches, desc="Processing", unit="batch") with torch.no_grad(): for batch in loader: if len(batch) == 3: processed_image_batch, gt_anns_batch, meta_batch = batch elif len(batch) == 4: image_batch, processed_image_batch, gt_anns_batch, meta_batch = batch # Specify the file path where you want to save the .npy file relative_paths, file_paths = get_label_paths(False, [d["file_name"] for d in meta_batch], dataset_dir) # Obtain the confidence values (pifpaf_conf) for the processed image batch pifpaf_conf: torch.Tensor = self.__get_pifpaf_conf(processed_image_batch) # Save the NumPy array to the .npy file save_files(pifpaf_conf.numpy(), file_paths, verbose) progress_bar.update(1) progress_bar.close() def __get_pifpaf_conf(self, processed_image_batch: Instances): """ Get the confidence scores from the processed image batch. Args: processed_image_batch (Instances): Processed image batch containing pose estimation fields. Returns: torch.Tensor: Confidence scores for keypoints and connections. """ # Retrieve the pose estimation fields from the model processor fields_batch = self.model.processor.fields_batch(self.model.model, processed_image_batch, device=self.model.device) # Extract the pif (keypoint) and paf (connection) fields from the batch pif, paf = zip(*fields_batch) # Extract the confidence scores for keypoints (index 1 in each field) pif_confidence_scores = torch.stack(pif)[:, :, 1] paf_confidence_scores = torch.stack(paf)[:, :, 1] # Concatenate the confidence scores for keypoints and connections pifpaf_confidence_scores = torch.cat((pif_confidence_scores, paf_confidence_scores), dim=1) # Return the concatenated confidence scores return pifpaf_confidence_scores class BatchMask: def __init__(self, cfg: CfgNode or str, batch_size: int = None, workers: int = None): """ Initialize the BatchMask class for performing batched instance segmentation using a Mask R-CNN model. Args: cfg (CfgNode or str): Configuration options for the Mask R-CNN model. batch_size (int, optional): Batch size for processing images. Defaults to None. workers (int, optional): Number of worker processes for data loading. Defaults to None. """ # Clone the provided configuration or get a default configuration self.cfg = build_config_maskrcnn(cfg) if isinstance(cfg, str) else cfg.clone() print(f"* MaskRCNN model -> {cfg if isinstance(cfg, str) else self.cfg.MODEL.WEIGHTS}") # Set the batch size for processing images, defaulting to 32 if not provided self.batch_size = batch_size if batch_size else 32 # Set the number of worker processes for data loading, defaulting to the number of CPU cores self.workers = workers if workers is not None else 0 # Build the Mask R-CNN model self.model = build_model(self.cfg) # Set the model to evaluation mode self.model.eval() # Load the pre-trained weights for the model checkpointer = DetectionCheckpointer(self.model) checkpointer.load(self.cfg.MODEL.WEIGHTS) # Define the augmentation transform for resizing images self.aug = T.ResizeShortestEdge( [self.cfg.INPUT.MIN_SIZE_TEST, self.cfg.INPUT.MIN_SIZE_TEST], self.cfg.INPUT.MAX_SIZE_TEST ) # Set the input image format to RGB or BGR based on the configuration self.input_format = self.cfg.INPUT.FORMAT assert self.input_format in ["RGB", "BGR"], self.input_format def __collate(self, batch): """ Collates a batch of images and their paths for use in data loading. Args: batch (list): A list of tuples containing image paths and corresponding images. Returns: tuple: A tuple containing two lists: the paths of the images and the processed data. """ paths, data = [], [] for path, image in batch: if self.input_format == "RGB": # Convert image format from RGB to BGR if required by the model image = image[:, :, ::-1] height, width = image.shape[:2] # Apply augmentation and transformation to the image image = self.aug.get_transform(image).apply_image(image) image = image.astype("float32").transpose(2, 0, 1) image = torch.as_tensor(image) data.append({"image": image, "height": height, "width": width}) paths.append(path) return paths, data def __call__(self, imagery: List[Path] or List[str], dataset_dir: List[Path] or List[str], is_overwrite: bool = False, verbose: bool = False): """ Perform the batch processing of imagery to generate and save mask files. Args: imagery (List[Path] or List[str]): A list of image paths or image filenames. dataset_dir (List[Path] or List[str]): A list of dataset directories. is_overwrite (bool, optional): Whether to overwrite existing mask files. Defaults to False. verbose (bool, optional): Whether to print verbose information. Defaults to False. """ assert len(imagery) > 0, "No images found in imagery." if not is_overwrite: # Skip existing images if overwrite is disabled imagery = skip_existing(True, imagery, dataset_dir) # Create an instance of the ImageDataset class dataset = ImageDataset(imagery) # Create a data loader for batch processing loader = DataLoader( dataset, self.batch_size, shuffle=False, num_workers=self.workers, collate_fn=self.__collate, pin_memory=True ) total_batches = len(loader) progress_bar = tqdm.tqdm(total=total_batches, desc="Processing", unit="batch") with torch.no_grad(): for paths, batch in loader: # Get the paths and file paths for saving the mask files relative_paths, pifpaf_file_paths = get_label_paths(is_mask=False, img_paths=paths, dataset_dir=dataset_dir) assert all(os.path.exists(path) for path in pifpaf_file_paths), "Some PiPaf Label File ('.confidence_fields.npy') does not exist!" # Filter the predictions using the mask files pifpaf_filtered: List[np.ndarray] = self.__filter_pifpaf_with_mask(batch, pifpaf_file_paths) # Get the file paths for saving the mask files _, mask_file_paths = get_label_paths(is_mask=True, img_paths=paths, dataset_dir=dataset_dir) # Save the filtered mask files save_files(pifpaf_filtered, mask_file_paths, verbose) progress_bar.update(1) progress_bar.close() def __filter_pifpaf_with_mask(self, batch, pifpaf_file_paths: List[Path] or List[str]): """ Filter PifPaf predictions using segmentation masks. Args: paths (List[Path] or List[str]): List of image paths or filenames. batch: Batch data containing images. pifpaf_file_paths (List[Path] or List[str]): List of PifPaf label file paths. Returns: List[np.ndarray]: Filtered PifPaf arrays. """ # Order the bounding boxes by distance from the center of the image(default) def order_bbox(image_size, bbox_list, only_horizontal=False, only_vertical=False): distances = [] image_height, image_width = image_size center_x = image_width // 2 center_y = image_height // 2 for i, bbox in enumerate(bbox_list): x1, y1, x2, y2 = bbox bbox_center_x = (x1 + x2) // 2 bbox_center_y = (y1 + y2) // 2 distance = bbox_center_x if only_horizontal else bbox_center_y if only_vertical else np.sqrt( (bbox_center_x - center_x) ** 2 + (bbox_center_y - center_y) ** 2) distances.append((i, distance)) distances = sorted(distances, key=lambda x: x[1]) return distances # Filter segmentations masks based on class and distance from the center of the image def filter_masks(results): image_size = results[0]["instances"].image_size pred_boxes, scores, pred_classes, pred_masks = results[0]["instances"].get_fields().values() if len(pred_masks) == 0: raise Exception("Error: Pifpaf model did not return any masks!") # Filter out all masks that are not person filtered_boxes, filtered_masks = zip( *[(box.cpu().numpy(), mask.cpu().numpy()) for box, mask, cls in zip(pred_boxes, pred_masks, pred_classes) if cls == 0]) # Order the masks by bbox distance to the center of the image distances = order_bbox(image_size, filtered_boxes) filtered_masks = [filtered_masks[i] for i, _ in distances] return filtered_masks # Filter PifPaf array using segmentation mask def filter_pifpaf_with_mask(pifpaf_array, mask, is_resize_pifpaf=False, interpolation=cv2.INTER_CUBIC): if is_resize_pifpaf: # Resize the PifPaf array to match the size of the mask pifpaf_resized = np.transpose(pifpaf_array, (1, 2, 0)) pifpaf_resized = cv2.resize(pifpaf_resized, dsize=(mask.shape[1], mask.shape[0]), interpolation=interpolation) pifpaf_resized = np.transpose(pifpaf_resized, (2, 0, 1)) # Filter the PifPaf array using the segmentation mask filtered_pifpaf = mask * pifpaf_resized filtered_pifpaf = np.array( [cv2.resize(slice, (9, 17), interpolation=cv2.INTER_CUBIC) for slice in filtered_pifpaf]) return filtered_pifpaf # Resize the mask to match the size of the PifPaf array mask_resized = cv2.resize(mask.astype(np.uint8), (pifpaf_array.shape[2], pifpaf_array.shape[1])) filtered_pifpaf = mask_resized * pifpaf_array return filtered_pifpaf # Get the masks from the PifPaf predictions masks = filter_masks(self.model(batch)) # Load the PifPaf label arrays pifpaf_labels = [np.load(pifpaf_file_path) for pifpaf_file_path in pifpaf_file_paths] # Filter the PifPaf arrays using the masks pifpaf_filtered = [filter_pifpaf_with_mask(pifpaf_label, mask) for pifpaf_label, mask in zip(pifpaf_labels, masks)] return pifpaf_filtered def main(): # Parse command line arguments parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('-s', '--source', type=str, required=True, help='Source dataset containing image files') parser.add_argument('--maskrcnn-cfg-file', type=str, default="COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", help='Configuration file for the Mask R-CNN model') parser.add_argument('--pifpaf-model-name', type=str, default="shufflenetv2k16", help='Name of the PifPaf model') parser.add_argument('-b', '--batch-size', type=int, help='Batch size for processing images') parser.add_argument('--num-workers', type=int, help='Number of worker processes for data loading') args = parser.parse_args() # Get image paths img_paths = get_image_paths(args.source) # Perform PifPaf processing pifpaf_model = BatchPifPaf(model_name=args.pifpaf_model_name, batch_size=args.batch_size, workers=args.num_workers) pifpaf_model(imagery=img_paths, dataset_dir=args.source, is_overwrite=False) # Perform Mask R-CNN processing mask_model = BatchMask(cfg=args.maskrcnn_cfg_file, batch_size=args.batch_size, workers=args.num_workers) mask_model(imagery=img_paths, dataset_dir=args.source, is_overwrite=False) if __name__ == '__main__': main() ================================================ FILE: torchreid/scripts/main.py ================================================ import os import argparse import torch import torch.nn as nn import torchreid from torchreid.tools.extract_part_based_features import extract_reid_features from torchreid.data.masks_transforms import compute_parts_num_and_names from torchreid.utils import ( Logger, check_isfile, set_random_seed, collect_env_info, resume_from_checkpoint, load_pretrained_weights, compute_model_complexity, Writer, load_checkpoint ) from torchreid.scripts.default_config import ( imagedata_kwargs, optimizer_kwargs, videodata_kwargs, engine_run_kwargs, get_default_config, lr_scheduler_kwargs, display_config_diff ) from torchreid.utils.engine_state import EngineState def build_datamanager(cfg): if cfg.data.type == 'image': return torchreid.data.ImageDataManager(**imagedata_kwargs(cfg)) else: return torchreid.data.VideoDataManager(**videodata_kwargs(cfg)) def build_engine(cfg, datamanager, model, optimizer, scheduler, writer, engine_state): if cfg.data.type == 'image': if cfg.loss.name == 'softmax': engine = torchreid.engine.ImageSoftmaxEngine( datamanager, model, optimizer=optimizer, scheduler=scheduler, use_gpu=cfg.use_gpu, label_smooth=cfg.loss.softmax.label_smooth, save_model_flag=cfg.model.save_model_flag, writer=writer, engine_state=engine_state ) elif cfg.loss.name == 'triplet': engine = torchreid.engine.ImageTripletEngine( datamanager, model, optimizer=optimizer, margin=cfg.loss.triplet.margin, weight_t=cfg.loss.triplet.weight_t, weight_x=cfg.loss.triplet.weight_x, scheduler=scheduler, use_gpu=cfg.use_gpu, label_smooth=cfg.loss.softmax.label_smooth, save_model_flag=cfg.model.save_model_flag, writer=writer, engine_state=engine_state ) elif cfg.loss.name == 'part_based': engine = torchreid.engine.ImagePartBasedEngine( datamanager, model, optimizer=optimizer, loss_name=cfg.loss.part_based.name, config=cfg, margin=cfg.loss.triplet.margin, scheduler=scheduler, use_gpu=cfg.use_gpu, save_model_flag=cfg.model.save_model_flag, writer=writer, engine_state=engine_state, dist_combine_strat=cfg.test.part_based.dist_combine_strat, batch_size_pairwise_dist_matrix=cfg.test.batch_size_pairwise_dist_matrix, mask_filtering_training=cfg.model.bpbreid.mask_filtering_training, mask_filtering_testing=cfg.model.bpbreid.mask_filtering_testing ) else: if cfg.loss.name == 'softmax': engine = torchreid.engine.VideoSoftmaxEngine( datamanager, model, optimizer=optimizer, scheduler=scheduler, use_gpu=cfg.use_gpu, label_smooth=cfg.loss.softmax.label_smooth, pooling_method=cfg.video.pooling_method, save_model_flag=cfg.model.save_model_flag, writer=writer, engine_state=engine_state ) else: engine = torchreid.engine.VideoTripletEngine( datamanager, model, optimizer=optimizer, margin=cfg.loss.triplet.margin, weight_t=cfg.loss.triplet.weight_t, weight_x=cfg.loss.triplet.weight_x, scheduler=scheduler, use_gpu=cfg.use_gpu, label_smooth=cfg.loss.softmax.label_smooth, save_model_flag=cfg.model.save_model_flag, writer=writer, engine_state=engine_state ) return engine def reset_config(cfg, args): if args.root: cfg.data.root = args.root if args.save_dir: cfg.data.save_dir = args.save_dir if args.inference_enabled: cfg.inference.enabled = args.inference_enabled if args.sources: cfg.data.sources = args.sources if args.targets: cfg.data.targets = args.targets if args.transforms: cfg.data.transforms = args.transforms if args.job_id: cfg.project.job_id = args.job_id def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument( '--config-file', type=str, default='', help='path to config file' ) parser.add_argument( '-s', '--sources', type=str, nargs='+', help='source datasets (delimited by space)' ) parser.add_argument( '-t', '--targets', type=str, nargs='+', help='target datasets (delimited by space)' ) parser.add_argument( '--transforms', type=str, nargs='+', help='data augmentation' ) parser.add_argument( '--root', type=str, default='', help='path to data root' ) parser.add_argument( '--save_dir', type=str, default='', help='path to output root dir' ) parser.add_argument( 'opts', default=None, nargs=argparse.REMAINDER, help='Modify config options using the command-line' ) parser.add_argument( '--job-id', type=int, default=None, help='Slurm job id' ) parser.add_argument( '--inference-enabled', type=bool, default=False, ) args = parser.parse_args() cfg = build_config(args, args.config_file) engine, model = build_torchreid_model_engine(cfg) print('Starting experiment {} with job id {} and creation date {}'.format(cfg.project.experiment_id, cfg.project.job_id, cfg.project.start_time)) engine.run(**engine_run_kwargs(cfg)) print( 'End of experiment {} with job id {} and creation date {}'.format(cfg.project.experiment_id, cfg.project.job_id, cfg.project.start_time)) if cfg.inference.enabled: print("Starting inference on external data") extract_reid_features(cfg, cfg.inference.input_folder, cfg.data.save_dir, model) def build_config(args=None, config_file=None, config=None): cfg = get_default_config() default_cfg_copy = cfg.clone() cfg.use_gpu = torch.cuda.is_available() if config: cfg.merge_from_other_cfg(config) if config_file: cfg.merge_from_file(config_file) cfg.project.config_file = os.path.basename(config_file) if args is not None: reset_config(cfg, args) cfg.merge_from_list(args.opts) # set parts information (number of parts K and each part name), # depending on the original loaded masks size or the transformation applied: compute_parts_num_and_names(cfg) if cfg.model.load_weights and check_isfile(cfg.model.load_weights) and cfg.model.load_config: checkpoint = load_checkpoint(cfg.model.load_weights) if 'config' in checkpoint: print('Overwriting current config with config loaded from {}'.format(cfg.model.load_weights)) bpbreid_config = checkpoint['config'].model.bpbreid if checkpoint['config'].data.sources[0] != cfg.data.targets[0]: print('WARNING: the train dataset of the loaded model is different from the target dataset in the ' 'current config.') bpbreid_config.pop('hrnet_pretrained_path', None) bpbreid_config.masks.pop('dir', None) cfg.model.bpbreid.merge_from_other_cfg(bpbreid_config) else: print('Could not load config from file {}'.format(cfg.model.load_weights)) display_config_diff(cfg, default_cfg_copy) cfg.data.save_dir = os.path.join(cfg.data.save_dir, str(cfg.project.job_id)) os.makedirs(cfg.data.save_dir) return cfg def build_torchreid_model_engine(cfg): if cfg.project.debug_mode: torch.autograd.set_detect_anomaly(True) logger = Logger(cfg) writer = Writer(cfg) set_random_seed(cfg.train.seed) print('Show configuration\n{}\n'.format(cfg)) print('Collecting env info ...') print('** System info **\n{}\n'.format(collect_env_info())) if cfg.use_gpu: torch.backends.cudnn.benchmark = True datamanager = build_datamanager(cfg) engine_state = EngineState(cfg.train.start_epoch, cfg.train.max_epoch) writer.init_engine_state(engine_state, cfg.model.bpbreid.masks.parts_num) print('Building model: {}'.format(cfg.model.name)) model = torchreid.models.build_model( name=cfg.model.name, num_classes=datamanager.num_train_pids, loss=cfg.loss.name, pretrained=cfg.model.pretrained, use_gpu=cfg.use_gpu, config=cfg ) logger.add_model(model) num_params, flops = compute_model_complexity( model, cfg ) print('Model complexity: params={:,} flops={:,}'.format(num_params, flops)) if cfg.model.load_weights and check_isfile(cfg.model.load_weights): load_pretrained_weights(model, cfg.model.load_weights) if cfg.use_gpu: model = nn.DataParallel(model).cuda() optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(cfg)) scheduler = torchreid.optim.build_lr_scheduler( optimizer, **lr_scheduler_kwargs(cfg) ) if cfg.model.resume and check_isfile(cfg.model.resume): cfg.train.start_epoch = resume_from_checkpoint( cfg.model.resume, model, optimizer=optimizer, scheduler=scheduler ) print( 'Building {}-engine for {}-reid'.format(cfg.loss.name, cfg.data.type) ) engine = build_engine(cfg, datamanager, model, optimizer, scheduler, writer, engine_state) return engine, model if __name__ == '__main__': main() ================================================ FILE: torchreid/tools/__init__.py ================================================ ================================================ FILE: torchreid/tools/compute_mean_std.py ================================================ """ Compute channel-wise mean and standard deviation of a dataset. Usage: $ python compute_mean_std.py DATASET_ROOT DATASET_KEY - The first argument points to the root path where you put the datasets. - The second argument means the specific dataset key. For instance, your datasets are put under $DATA and you wanna compute the statistics of Market1501, do $ python compute_mean_std.py $DATA market1501 """ import argparse import torchreid def main(): parser = argparse.ArgumentParser() parser.add_argument('root', type=str) parser.add_argument('sources', type=str) args = parser.parse_args() datamanager = torchreid.data.ImageDataManager( root=args.root, sources=args.sources, targets=None, height=256, width=128, batch_size_train=100, batch_size_test=100, transforms=None, norm_mean=[0., 0., 0.], norm_std=[1., 1., 1.], train_sampler='SequentialSampler' ) train_loader = datamanager.train_loader print('Computing mean and std ...') mean = 0. std = 0. n_samples = 0. for data in train_loader: data = data[0] batch_size = data.size(0) data = data.view(batch_size, data.size(1), -1) mean += data.mean(2).sum(0) std += data.std(2).sum(0) n_samples += batch_size mean /= n_samples std /= n_samples print('Mean: {}'.format(mean)) print('Std: {}'.format(std)) if __name__ == '__main__': main() ================================================ FILE: torchreid/tools/extract_part_based_features.py ================================================ import torch import tqdm import glob import os import numpy as np from torchreid.scripts.default_config import get_default_config, display_config_diff from torchreid.tools.feature_extractor import FeatureExtractor def extract_part_based_features(extractor, image_list, batch_size=400): def chunks(lst, n): """Yield successive n-sized chunks from lst.""" for i in range(0, len(lst), n): yield lst[i:i + n] all_embeddings = [] all_visibility_scores = [] all_masks = [] images_chunks = chunks(image_list, batch_size) for chunk in tqdm.tqdm(images_chunks): embeddings, visibility_scores, masks = extractor(chunk) embeddings = embeddings.cpu().detach() visibility_scores = visibility_scores.cpu().detach() masks = masks.cpu().detach() all_embeddings.append(embeddings) all_visibility_scores.append(visibility_scores) all_masks.append(masks) all_embeddings = torch.cat(all_embeddings, 0).numpy() all_visibility_scores = torch.cat(all_visibility_scores, 0).numpy() all_masks = torch.cat(all_masks, 0).numpy() return { "parts_embeddings": all_embeddings, "parts_visibility_scores": all_visibility_scores, "parts_masks": all_masks, } def extract_det_idx(img_path): return int(os.path.basename(img_path).split("_")[0]) def extract_reid_features(cfg, base_folder, out_path, model=None, model_path=None, num_classes=None): extractor = FeatureExtractor( cfg, model_path=model_path, device='cuda' if torch.cuda.is_available() else 'cpu', num_classes=num_classes, model=model ) print("Looking for video folders with images crops in {}".format(base_folder)) folder_list = glob.glob(base_folder + '/*') for folder in folder_list: image_list = glob.glob(os.path.join(folder, "*.png")) image_list.sort(key=extract_det_idx) print("{} images to process for folder {}".format(len(image_list), folder)) results = extract_part_based_features(extractor, image_list, batch_size=50) # dump to disk video_name = os.path.splitext(os.path.basename(folder))[0] parts_embeddings_filename = os.path.join(out_path, "embeddings_" + video_name + ".npy") parts_visibility_scores_filanme = os.path.join(out_path, "visibility_scores_" + video_name + ".npy") parts_masks_filename = os.path.join(out_path, "masks_" + video_name + ".npy") os.makedirs(os.path.dirname(parts_embeddings_filename), exist_ok=True) os.makedirs(os.path.dirname(parts_visibility_scores_filanme), exist_ok=True) os.makedirs(os.path.dirname(parts_masks_filename), exist_ok=True) np.save(parts_embeddings_filename, results['parts_embeddings']) np.save(parts_visibility_scores_filanme, results['parts_visibility_scores']) np.save(parts_masks_filename, results['parts_masks']) print("features saved to {}".format(out_path)) ================================================ FILE: torchreid/tools/feature_extractor.py ================================================ from __future__ import absolute_import import numpy as np import torch import torchvision.transforms as T from PIL import Image from torchreid.utils import ( check_isfile, load_pretrained_weights, compute_model_complexity ) from torchreid.models import build_model from torchreid.data.transforms import build_transforms class FeatureExtractor(object): """A simple API for feature extraction. FeatureExtractor can be used like a python function, which accepts input of the following types: - a list of strings (image paths) - a list of numpy.ndarray each with shape (H, W, C) - a single string (image path) - a single numpy.ndarray with shape (H, W, C) - a torch.Tensor with shape (B, C, H, W) or (C, H, W) Returned is a torch tensor with shape (B, D) where D is the feature dimension. Args: model_name (str): model name. model_path (str): path to model weights. image_size (sequence or int): image height and width. pixel_mean (list): pixel mean for normalization. pixel_std (list): pixel std for normalization. pixel_norm (bool): whether to normalize pixels. device (str): 'cpu' or 'cuda' (could be specific gpu devices). verbose (bool): show model details. Examples:: from torchreid.utils import FeatureExtractor extractor = FeatureExtractor( model_name='osnet_x1_0', model_path='a/b/c/model.pth.tar', device='cuda' ) image_list = [ 'a/b/c/image001.jpg', 'a/b/c/image002.jpg', 'a/b/c/image003.jpg', 'a/b/c/image004.jpg', 'a/b/c/image005.jpg' ] features = extractor(image_list) print(features.shape) # output (5, 512) """ def __init__( self, cfg, model_path='', image_size=(256, 128), pixel_mean=[0.485, 0.456, 0.406], pixel_std=[0.229, 0.224, 0.225], pixel_norm=True, device='cuda', num_classes=1, verbose=True, model=None ): # Build model if model is None: print("building model on device {}".format(device)) model = build_model( name=cfg.model.name, loss=cfg.loss.name, pretrained=cfg.model.pretrained, num_classes=num_classes, use_gpu=device.startswith('cuda'), pooling=cfg.model.bpbreid.pooling, normalization=cfg.model.bpbreid.normalization, last_stride=cfg.model.bpbreid.last_stride, config=cfg ) model.eval() if verbose: num_params, flops = compute_model_complexity( model, cfg ) print('Model: {}'.format(cfg.model.name)) print('- params: {:,}'.format(num_params)) print('- flops: {:,}'.format(flops)) if model_path and check_isfile(model_path): load_pretrained_weights(model, model_path) # Build transform functions _, preprocess = build_transforms(image_size[0], image_size[1], cfg, transforms=None, norm_mean=pixel_mean, norm_std=pixel_std, masks_preprocess=cfg.model.bpbreid.masks.preprocess, softmax_weight=cfg.model.bpbreid.masks.softmax_weight, background_computation_strategy=cfg.model.bpbreid.masks.background_computation_strategy, mask_filtering_threshold=cfg.model.bpbreid.masks.mask_filtering_threshold, ) to_pil = T.ToPILImage() device = torch.device(device) model.to(device) # Class attributes self.model = model self.preprocess = preprocess self.to_pil = to_pil self.device = device def __call__(self, input, external_parts_masks=None): if isinstance(input, list): images = [] masks = [] for i, element in enumerate(input): if isinstance(element, str): image = Image.open(element).convert('RGB') elif isinstance(element, np.ndarray): image = self.to_pil(element) else: raise TypeError( 'Type of each element must belong to [str | numpy.ndarray]' ) transf_args = {} if external_parts_masks is not None: transf_args['mask'] = external_parts_masks[i].transpose(1, 2, 0) transf_args['image'] = np.array(image) result = self.preprocess(**transf_args) images.append(result['image']) if external_parts_masks is not None: masks.append(result['mask']) images = torch.stack(images, dim=0) images = images.to(self.device) if external_parts_masks is not None: masks = torch.stack(masks, dim=0) masks = masks.to(self.device) elif isinstance(input, str): image = Image.open(input).convert('RGB') transf_args = {} if external_parts_masks is not None: transf_args['mask'] = external_parts_masks.transpose(1, 2, 0) transf_args['image'] = np.array(image) result = self.preprocess(**transf_args) images = result['image'].unsqueeze(0).to(self.device) masks = result['mask'].unsqueeze(0).to(self.device) elif isinstance(input, np.ndarray): image = input transf_args = {} if external_parts_masks is not None: transf_args['mask'] = external_parts_masks.transpose(1, 2, 0) transf_args['image'] = np.array(image) result = self.preprocess(**transf_args) images = result['image'].unsqueeze(0).to(self.device) masks = result['mask'].unsqueeze(0).to(self.device) elif isinstance(input, torch.Tensor): if input.dim() == 3: input = input.unsqueeze(0) external_parts_masks = external_parts_masks.unsqueeze(0) images = input.to(self.device) masks = external_parts_masks.to(self.device) else: raise NotImplementedError with torch.no_grad(): features = self.model(images, external_parts_masks=masks) return features ================================================ FILE: torchreid/utils/__init__.py ================================================ from __future__ import absolute_import from .tools import * from .rerank import re_ranking from torchreid.utils.logging.deprecated_loggers import * from .avgmeter import * from .torchtools import * from torchreid.utils.logging.logger import * from .visualization import * from .reidtools import * from .writer import * from .model_complexity import compute_model_complexity ================================================ FILE: torchreid/utils/avgmeter.py ================================================ from __future__ import division, absolute_import import datetime import warnings from collections import defaultdict, OrderedDict import time import torch import numpy as np __all__ = ['AverageMeter', 'MetricMeter', 'TimeMeter', 'TorchTimeMeter', 'EpochMetricsMeter'] from torchreid.utils.engine_state import EngineStateListener class AverageMeter(object): """Computes and stores the average and current value. Examples:: >>> # Initialize a meter to record loss >>> losses = AverageMeter() >>> # Update meter after every minibatch update >>> losses.update(loss_value, batch_size) """ def __init__(self): self.reset() def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count class BatchMeter(object): def __init__(self, epoch_count, batch_count): self.epoch_count = epoch_count self.batch_count = batch_count self.reset() def reset(self): self.last_val = None self.values = np.zeros((self.epoch_count, self.batch_count)) def update(self, epoch, batch, val): self.last_val = val self.values[epoch, batch] = val def total_for_epoch(self, epoch): self.values[epoch].sum() def avg_for_epoch(self, epoch): self.values[epoch].mean() def batch_avg(self): self.values.mean() def epoch_avg(self): self.values.mean(axis=1).mean() def total(self): self.values.sum() class SingleMeter(EngineStateListener): def __init__(self, engine_state): self.engine_state = engine_state self.engine_state.add_listener(self) self.reset() self.is_empty = True def reset(self): self.total = 0 self.val = 0 def update(self, val, total): if torch.is_tensor(val): val = val.item() if torch.is_tensor(total): total = total.item() self.val = val self.total = total self.is_empty = False def ratio(self): if self.total == 0: return 0 return self.val / self.total class EpochMeter(EngineStateListener): # With RandomSample, number of batches might change from one epoch to another def __init__(self, engine_state): self.engine_state = engine_state self.engine_state.add_listener(self) self.min = np.zeros(self.engine_state.max_epoch) self.mean = np.zeros(self.engine_state.max_epoch) self.max = np.zeros(self.engine_state.max_epoch) self.batch_size = np.zeros(self.engine_state.max_epoch) self.sum = np.zeros(self.engine_state.max_epoch) self.total = np.zeros(self.engine_state.max_epoch) self.epoch_sum = [] self.epoch_total = [] self.is_empty = True def update(self, val, total=1.): self.is_empty = False if torch.is_tensor(val): val = val.item() if torch.is_tensor(total): total = total.item() self.epoch_sum.append(val) self.epoch_total.append(total) # Listeners def epoch_completed(self): if not self.is_empty: self.epoch_sum = np.array(self.epoch_sum) self.epoch_total = np.array(self.epoch_total) ratio = self.epoch_sum / self.epoch_total self.min[self.engine_state.epoch] = ratio.min() self.mean[self.engine_state.epoch] = ratio.mean() self.max[self.engine_state.epoch] = ratio.max() self.batch_size[self.engine_state.epoch] = len(self.epoch_sum) self.sum[self.engine_state.epoch] = self.epoch_sum.sum() self.total[self.engine_state.epoch] = self.epoch_total.sum() self.epoch_sum = [] self.epoch_total = [] self.is_empty = True # Utils def last_val(self): return self.epoch_sum[-1] def epoch_ratio(self, epoch): return self.mean[epoch] def total_ratio(self): return self.mean.mean() class EpochArrayMeter(EngineStateListener): # With RandomSample, number of batches might change from one epoch to another def __init__(self, engine_state, array_size): self.engine_state = engine_state self.engine_state.add_listener(self) self.min = np.zeros((self.engine_state.max_epoch, array_size)) self.mean = np.zeros((self.engine_state.max_epoch, array_size)) self.max = np.zeros((self.engine_state.max_epoch, array_size)) self.batch_size = np.zeros((self.engine_state.max_epoch, array_size)) self.sum = np.zeros((self.engine_state.max_epoch, array_size)) self.total = np.zeros((self.engine_state.max_epoch, array_size)) self.epoch_sum = [] self.epoch_total = [] self.is_empty = True def update(self, val, total): self.is_empty = False if torch.is_tensor(val): if val.is_cuda: val = val.cpu() val = val.numpy() if torch.is_tensor(total): if val.is_cuda: val = val.cpu() val = val.numpy() self.epoch_sum.append(val) self.epoch_total.append(total) # Listeners def epoch_completed(self): if not self.is_empty: self.epoch_sum = np.array(self.epoch_sum) self.epoch_total = np.array(self.epoch_total) ratio = self.epoch_sum / self.epoch_total self.min[self.engine_state.epoch] = ratio.min(axis=0) self.mean[self.engine_state.epoch] = ratio.mean(axis=0) self.max[self.engine_state.epoch] = ratio.max(axis=0) self.batch_size[self.engine_state.epoch] = self.epoch_sum.shape[0] self.sum[self.engine_state.epoch] = self.epoch_sum.sum(axis=0) self.total[self.engine_state.epoch] = self.epoch_total.sum(axis=0) self.epoch_sum = [] self.epoch_total = [] # Utils def epoch_ratio(self, epoch): return self.mean[epoch] def total_ratio(self): return self.mean.mean(axis=0) class TimeMeter(AverageMeter): """Computes and stores the average time and current time value. """ def __init__(self, name): super().__init__() self.name = name self.tic = None def _format_time(self, time): return str(datetime.timedelta(milliseconds=round(time))) def total_time(self): return self._format_time(self.sum) def average_time(self): return self._format_time(self.avg) def start(self): self.tic = self._current_time_ms() def stop(self): if self.tic is not None: self.update(self._current_time_ms() - self.tic) self.tic = None return self.val else: warnings.warn("{0}.start() should be called before {0}.stop()".format(self.__class__.__name__, RuntimeWarning)) return 0 @staticmethod def _current_time_ms(): return time.time() * 1000 class TorchTimeMeter(TimeMeter): """Computes and stores the average time and current time value. """ def __init__(self, name, plot=True): super().__init__(name) self.start_event = None self.end_event = None self.cuda = torch.cuda.is_available() self.plot = plot def start(self): if self.cuda: self._start_cuda() else: super().start() def _start_cuda(self): self.start_event = torch.cuda.Event(enable_timing=True) self.end_event = torch.cuda.Event(enable_timing=True) self.start_event.record() def stop(self): if self.cuda: return self._stop_cuda() else: return super().stop() def _stop_cuda(self): if self.start_event is not None: self.end_event.record() torch.cuda.synchronize() # TODO Check if slows down computation self.update(self.start_event.elapsed_time(self.end_event)) self.start_event = None self.end_event = None return self.val else: warnings.warn("{0}.start() should be called before {0}.stop()".format(self.__class__.__name__), RuntimeWarning) return 0 class EpochMetricsMeter(object): """A collection of metrics. Source: https://github.com/KaiyangZhou/Dassl.pytorch Examples:: >>> # 1. Create an instance of MetricMeter >>> metric = EpochMetricsMeter() >>> # 2. Update using a dictionary as input >>> input_dict = {'loss_1': value_1, 'loss_2': value_2} >>> metric.update(input_dict) >>> # 3. Convert to string and print >>> print(str(metric.summary(epoch))) """ def __init__(self, engine_state, delimiter='\t'): self.engine_state = engine_state self.meters = {} self.delimiter = delimiter def update(self, input_dict): if input_dict is None: return if not isinstance(input_dict, dict): raise TypeError( 'Input to MetricMeter.update() must be a dictionary' ) for k, v in input_dict.items(): if isinstance(v, torch.Tensor): v = v.item() if k not in self.meters.keys(): self.meters[k] = EpochMeter(self.engine_state) self.meters[k].update(v) def summary(self, epoch): output_str = [] for name, meter in self.meters.items(): output_str.append( '{} {:.3f} [{:.2f}, {:.2f}]'.format(name, meter.mean[epoch], meter.min[epoch], meter.max[epoch]) ) return self.delimiter.join(output_str) class LossEpochMetricsMeter(object): def __init__(self, engine_state, delimiter='\t'): self.engine_state = engine_state self.meters = OrderedDict() self.delimiter = delimiter def update(self, input_dict): if input_dict is None: return if not isinstance(input_dict, dict): raise TypeError( 'Input to MetricMeter.update() must be a dictionary' ) for k1, v1 in input_dict.items(): if k1 not in self.meters.keys(): self.meters[k1] = OrderedDict() for k2, v2 in v1.items(): if isinstance(v2, torch.Tensor): v2 = v2.item() if k2 not in self.meters[k1].keys(): self.meters[k1][k2] = EpochMeter(self.engine_state) self.meters[k1][k2].update(v2) def summary(self, epoch): final_str = "" for name, dict in self.meters.items(): if dict: output_str = ["\n\t" + name + ": "] for key, meter in dict.items(): output_str.append( '{} {:.3f} [{:.2f}, {:.2f}]'.format(key, meter.mean[epoch], meter.min[epoch], meter.max[epoch]) ) final_str += self.delimiter.join(output_str) return final_str class MetricMeter(object): """A collection of metrics. Source: https://github.com/KaiyangZhou/Dassl.pytorch Examples:: >>> # 1. Create an instance of MetricMeter >>> metric = MetricMeter() >>> # 2. Update using a dictionary as input >>> input_dict = {'loss_1': value_1, 'loss_2': value_2} >>> metric.update(input_dict) >>> # 3. Convert to string and print >>> print(str(metric)) """ def __init__(self, delimiter='\t'): self.meters = defaultdict(AverageMeter) self.delimiter = delimiter def update(self, input_dict): if input_dict is None: return if not isinstance(input_dict, dict): raise TypeError( 'Input to MetricMeter.update() must be a dictionary' ) for k, v in input_dict.items(): if isinstance(v, torch.Tensor): v = v.item() self.meters[k].update(v) def __str__(self): output_str = [] for name, meter in self.meters.items(): output_str.append( '{} {:.4f} ({:.4f})'.format(name, meter.val, meter.avg) ) return self.delimiter.join(output_str) ================================================ FILE: torchreid/utils/constants.py ================================================ GLOBAL = 'globl' FOREGROUND = 'foreg' BACKGROUND = 'backg' CONCAT_PARTS = 'conct' PARTS = 'parts' BN_GLOBAL = 'bn_globl' BN_FOREGROUND = 'bn_foreg' BN_BACKGROUND = 'bn_backg' BN_CONCAT_PARTS = 'bn_conct' BN_PARTS = 'bn_parts' PIXELS = 'pixls' bn_correspondants = { BN_BACKGROUND: BACKGROUND, BN_GLOBAL: GLOBAL, BN_FOREGROUND: FOREGROUND, BN_CONCAT_PARTS: CONCAT_PARTS, BN_PARTS: PARTS, } def get_test_embeddings_names(parts_names, test_embeddings): test_embeddings_names = [] if GLOBAL in test_embeddings or BN_GLOBAL in test_embeddings: test_embeddings_names.append('global') # test_embeddings_names.append(('global', 'gb')) if FOREGROUND in test_embeddings or BN_FOREGROUND in test_embeddings: test_embeddings_names.append('foreground') # test_embeddings_names.append(('foreground', 'fg')) if CONCAT_PARTS in test_embeddings or BN_CONCAT_PARTS in test_embeddings: test_embeddings_names.append('concatenated') # test_embeddings_names.append(('concatenated', 'cc')) if PARTS in test_embeddings or BN_PARTS in test_embeddings: test_embeddings_names = test_embeddings_names + parts_names return test_embeddings_names ================================================ FILE: torchreid/utils/distribution.py ================================================ import math import numpy as np import matplotlib.pyplot as plt from matplotlib.ticker import PercentFormatter from torchreid.utils import AverageMeter, Logger from torchreid.utils.engine_state import EngineState def plot_body_parts_pairs_distance_distribution(body_part_pairwise_dist, q_pids, g_pids, tag): m = body_part_pairwise_dist.shape[0] cols = round(math.sqrt(m)) rows = cols while rows * cols < m: rows += 1 fig = plt.figure(figsize=(rows*5, cols*3)) ssmd_meter = AverageMeter() for i in range(0, m): ax = fig.add_subplot(rows, cols, i+1) pos_p_mean, pos_p_std, neg_p_mean, neg_p_std, ssmd = compute_distance_distribution( ax, body_part_pairwise_dist[i], q_pids, g_pids, "Bp {} pairs distance distribution".format(i)) ssmd_meter.update(ssmd) fig.tight_layout() Logger.current_logger().add_figure("{} body part pairs distance distribution".format(tag), fig, EngineState.current_engine_state().epoch) return ssmd_meter.avg def plot_pairs_distance_distribution(distmat, q_pids, g_pids, tag): fig, ax = plt.subplots() result = compute_distance_distribution(ax, distmat, q_pids, g_pids, "{} pairs distance distribution".format(tag)) Logger.current_logger().add_figure("{} pairs distance distribution".format(tag), fig, EngineState.current_engine_state().epoch) return result def compute_distance_distribution(ax, distmat, q_pids, g_pids, title): pos_p = distmat[np.expand_dims(q_pids, axis=1) == np.expand_dims(g_pids, axis=0)] neg_p = distmat[np.expand_dims(q_pids, axis=1) != np.expand_dims(g_pids, axis=0)] pos_p_mean, pos_p_std, neg_p_mean, neg_p_std, ssmd = compute_ssmd(neg_p, pos_p) # plot_distributions(ax, neg_p, pos_p, pos_p_mean, pos_p_std, neg_p_mean, neg_p_std) # ax.set_title(title + " - SSMD = {:.4f} ".format(ssmd)) return pos_p_mean, pos_p_std, neg_p_mean, neg_p_std, ssmd def compute_ssmd(neg_p, pos_p): pos_p_mean = np.mean(pos_p) pos_p_std = np.std(pos_p) neg_p_mean = np.mean(neg_p) neg_p_std = np.std(neg_p) ssmd = abs(pos_p_mean - neg_p_mean) / (pos_p_std ** 2 + neg_p_std ** 2) return pos_p_mean, pos_p_std, neg_p_mean, neg_p_std, ssmd def plot_distributions(ax, neg_p, pos_p, pos_p_mean, pos_p_std, neg_p_mean, neg_p_std): bins = 100 ax.hist(pos_p, weights=np.ones_like(pos_p)/len(pos_p), bins=bins, label='{} positive pairs : $\mu={:10.3f}$, $\sigma={:10.3f}$'.format(len(pos_p), pos_p_mean, pos_p_std), density=False, alpha=0.4, color='green') ax.hist(neg_p, weights=np.ones_like(neg_p)/len(neg_p), bins=bins, label='{} negative pairs : $\mu={:10.3f}$, $\sigma={:10.3f}$'.format(len(neg_p), neg_p_mean, neg_p_std), density=False, alpha=0.4, color='red') ax.axvline(x=pos_p_mean, linestyle='--', color='darkgreen') ax.axvline(x=neg_p_mean, linestyle='--', color='darkred') ax.set_xlabel("pairs distance") ax.set_ylabel("pairs count") ax.legend() ax.yaxis.set_major_formatter(PercentFormatter(xmax=1)) ================================================ FILE: torchreid/utils/engine_state.py ================================================ import queue from heapq import heappush class EngineStateListener: def batch_completed(self): pass def epoch_started(self): pass def epoch_completed(self): pass def training_started(self): pass def training_completed(self): pass def test_completed(self): pass def run_completed(self): pass class EngineState(EngineStateListener): __main_engine_state = None # type: Optional[EngineState] @classmethod def current_engine_state(cls): # type: () -> EngineState return cls.__main_engine_state def __init__(self, start_epoch, max_epoch): self.start_epoch = start_epoch self.max_epoch = max_epoch self.epoch = start_epoch self.batch = 0 self.global_step = 0 self.estimated_num_batches = 0 self.lr = 0 self.is_training = False self.listeners = [] self.last_listeners = [] EngineState.__main_engine_state = self def add_listener(self, listener, last=False): # FIXME ugly if last: self.last_listeners.append(listener) else: self.listeners.append(listener) def batch_completed(self): for listener in self.listeners + self.last_listeners: listener.batch_completed() self.batch += 1 self.global_step += 1 def epoch_started(self): for listener in self.listeners + self.last_listeners: listener.epoch_started() self.batch = 0 def epoch_completed(self): for listener in self.listeners + self.last_listeners: listener.epoch_completed() if self.epoch != self.max_epoch - 1: self.epoch += 1 def training_started(self): for listener in self.listeners + self.last_listeners: listener.training_started() self.is_training = True def training_completed(self): for listener in self.listeners + self.last_listeners: listener.training_completed() self.is_training = False def test_completed(self): for listener in self.listeners + self.last_listeners: listener.test_completed() def run_completed(self): for listener in self.listeners + self.last_listeners: listener.run_completed() def update_lr(self, lr): self.lr = lr ================================================ FILE: torchreid/utils/imagetools.py ================================================ import numpy as np from scipy import signal def gkern(kernlen=21, std=None): """Returns a 2D Gaussian kernel array.""" if std is None: std = kernlen / 4 gkern1d = signal.gaussian(kernlen, std=std).reshape(kernlen, 1) gkern2d = np.outer(gkern1d, gkern1d) return gkern2d def build_gaussian_heatmaps(kp_xyc, w, h, gaussian=None): gaussian_heatmaps = np.zeros((len(kp_xyc), h, w)) for i, kp in enumerate(kp_xyc): # do not use invisible keypoints if kp[2] == 0: continue kpx, kpy = kp[:2].astype(int) if gaussian is None: g_scale = 6 g_radius = int(w / g_scale) gaussian = gkern(g_radius * 2 + 1) else: g_radius = gaussian.shape[0] // 2 rt, rb = min(g_radius, kpy), min(g_radius, h - 1 - kpy) rl, rr = min(g_radius, kpx), min(g_radius, w - 1 - kpx) gaussian_heatmaps[i, kpy - rt:kpy + rb + 1, kpx - rl:kpx + rr + 1] = gaussian[ g_radius - rt:g_radius + rb + 1, g_radius - rl:g_radius + rr + 1] return gaussian_heatmaps ================================================ FILE: torchreid/utils/logging/__init__.py ================================================ from .logger import Logger ================================================ FILE: torchreid/utils/logging/deprecated_loggers.py ================================================ from __future__ import absolute_import import os import sys import os.path as osp from torchreid.utils.tools import mkdir_if_missing __all__ = ['StdoutLogger', 'RankLogger'] class StdoutLogger(object): """Writes console output to external text file. Imported from ``_ Args: fpath (str): directory to save logging file. Examples:: >>> import sys >>> import os >>> import os.path as osp >>> from torchreid.utils import Logger >>> save_dir = 'log/resnet50-softmax-market1501' >>> log_name = 'train.log' >>> sys.stdout = Logger(osp.join(args.save_dir, log_name)) """ def __init__(self, fpath=None): self.console = sys.stdout self.file = None if fpath is not None: mkdir_if_missing(osp.dirname(fpath)) self.file = open(fpath, 'w') def __del__(self): self.close() def __enter__(self): pass def __exit__(self, *args): self.close() def write(self, msg): self.console.write(msg) if self.file is not None: self.file.write(msg) def flush(self): self.console.flush() if self.file is not None: self.file.flush() os.fsync(self.file.fileno()) def close(self): self.console.close() if self.file is not None: self.file.close() class RankLogger(object): """Records the rank1 matching accuracy obtained for each test dataset at specified evaluation steps and provides a function to show the summarized results, which are convenient for analysis. Args: sources (str or list): source dataset name(s). targets (str or list): target dataset name(s). Examples:: >>> from torchreid.utils import RankLogger >>> s = 'market1501' >>> t = 'market1501' >>> ranklogger = RankLogger(s, t) >>> ranklogger.write(t, 10, 0.5) >>> ranklogger.write(t, 20, 0.7) >>> ranklogger.write(t, 30, 0.9) >>> ranklogger.show_summary() >>> # You will see: >>> # => Show performance summary >>> # market1501 (source) >>> # - epoch 10 rank1 50.0% >>> # - epoch 20 rank1 70.0% >>> # - epoch 30 rank1 90.0% >>> # If there are multiple test datasets >>> t = ['market1501', 'dukemtmcreid'] >>> ranklogger = RankLogger(s, t) >>> ranklogger.write(t[0], 10, 0.5) >>> ranklogger.write(t[0], 20, 0.7) >>> ranklogger.write(t[0], 30, 0.9) >>> ranklogger.write(t[1], 10, 0.1) >>> ranklogger.write(t[1], 20, 0.2) >>> ranklogger.write(t[1], 30, 0.3) >>> ranklogger.show_summary() >>> # You can see: >>> # => Show performance summary >>> # market1501 (source) >>> # - epoch 10 rank1 50.0% >>> # - epoch 20 rank1 70.0% >>> # - epoch 30 rank1 90.0% >>> # dukemtmcreid (target) >>> # - epoch 10 rank1 10.0% >>> # - epoch 20 rank1 20.0% >>> # - epoch 30 rank1 30.0% """ def __init__(self, sources, targets): self.sources = sources self.targets = targets if isinstance(self.sources, str): self.sources = [self.sources] if isinstance(self.targets, str): self.targets = [self.targets] self.logger = { name: { 'epoch': [], 'rank1': [] } for name in self.targets } def write(self, name, epoch, rank1): """Writes result. Args: name (str): dataset name. epoch (int): current epoch. rank1 (float): rank1 result. """ self.logger[name]['epoch'].append(epoch) self.logger[name]['rank1'].append(rank1) def show_summary(self): """Shows saved results.""" print('=> Show performance summary') for name in self.targets: from_where = 'source' if name in self.sources else 'target' print('{} ({})'.format(name, from_where)) for epoch, rank1 in zip( self.logger[name]['epoch'], self.logger[name]['rank1'] ): print('- epoch {}\t rank1 {:.1%}'.format(epoch, rank1)) ================================================ FILE: torchreid/utils/logging/logger.py ================================================ import os import cv2 import wandb import matplotlib.pyplot as plt from typing import Optional from pandas.io.json._normalize import nested_to_record from torch.utils.tensorboard import SummaryWriter class Logger: """ A class to encapsulate external loggers such as Tensorboard, Allegro ClearML, Neptune, Weight and Biases, Comet, ... """ __main_logger = None # type: Optional[Logger] @classmethod def current_logger(cls): # type: () -> Logger return cls.__main_logger def __init__(self, cfg): # self.cfg = cfg # self.model_name = cfg.project.start_time + cfg.project.experiment_id # configs self.save_disk = cfg.project.logger.save_disk self.save_dir = cfg.data.save_dir self.matplotlib_show = cfg.project.logger.matplotlib_show # init external loggers self.tensorboard_logger = None if cfg.project.logger.use_tensorboard: self.tensorboard_folder = os.path.join(cfg.data.save_dir, 'tensorboard') self.tensorboard_logger = SummaryWriter(self.tensorboard_folder) self.use_wandb = cfg.project.logger.use_wandb if self.use_wandb: # os.environ["WANDB_SILENT"] = "true" # wandb.init(config=cfg, sync_tensorboard=True, project=cfg.project.name, dir=cfg.data.save_dir, reinit=False) if cfg.project.logger.use_tensorboard: wandb.tensorboard.patch(pytorch=True, save=True, root_logdir=self.tensorboard_folder) wandb.init(config=cfg, project=cfg.project.name, dir=cfg.data.save_dir, reinit=False, name=str(cfg.project.job_id), notes=cfg.project.notes, tags=cfg.project.tags ) # wandb.tensorboard.patch(save=True, tensorboardX=False) Logger.__main_logger = self def add_model(self, model): if self.use_wandb and wandb.run is not None: wandb.watch(model) def add_text(self, tag, value): if self.use_wandb and wandb.run is not None: wandb.log({tag: value}) def add_scalar(self, tag, scalar_value, step): if self.tensorboard_logger is not None: self.tensorboard_logger.add_scalar(tag, scalar_value, step) if self.use_wandb and wandb.run is not None: wandb.log({tag: scalar_value}) def add_figure(self, tag, figure, step): if self.matplotlib_show: figure.show() plt.waitforbuttonpress() if self.tensorboard_logger is not None: self.tensorboard_logger.add_figure(tag, figure, step) if self.use_wandb and wandb.run is not None: wandb.log({tag: wandb.Image( figure)}) # FIXME cannot give "figure" directly: Invalid value of type 'builtins.str' received for the 'color' property of scatter.marker Received value: 'none' if self.save_disk: figure_path = os.path.join(self.save_dir, 'figures', tag + '.png') os.makedirs(os.path.dirname(figure_path), exist_ok=True) plt.savefig(figure_path) plt.close(figure) def add_image(self, group, name, image, step): """Input image must be in RGB format""" # if self.tensorboard_logger is not None: # self.tensorboard_logger.add_figure(tag, figure, self.global_step()) if self.use_wandb and wandb.run is not None: wandb.log({group + name: wandb.Image(image)}) if self.save_disk: image_path = os.path.join(self.save_dir, 'images', f"{group}_{name}.jpg") os.makedirs(os.path.dirname(image_path), exist_ok=True) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imwrite(image_path, image) def add_embeddings(self, tag, embeddings, labels, imgs, step): if self.tensorboard_logger is not None: self.tensorboard_logger.add_embedding(embeddings, metadata=labels, label_img=imgs, global_step=step, tag=tag, metadata_header=None) def close(self): if self.tensorboard_logger is not None: self.tensorboard_logger.close() if self.use_wandb and wandb.run is not None: wandb.finish() ================================================ FILE: torchreid/utils/model_complexity.py ================================================ from __future__ import division, print_function, absolute_import import math import numpy as np from itertools import repeat from collections import namedtuple, defaultdict import torch __all__ = ['compute_model_complexity'] from torchreid.models.bpbreid import BPBreID """ Utility """ def _ntuple(n): def parse(x): if isinstance(x, int): return tuple(repeat(x, n)) return x return parse _single = _ntuple(1) _pair = _ntuple(2) _triple = _ntuple(3) """ Convolution """ def hook_convNd(m, x, y): k = torch.prod(torch.Tensor(m.kernel_size)).item() cin = m.in_channels flops_per_ele = k * cin # + (k*cin-1) if m.bias is not None: flops_per_ele += 1 flops = flops_per_ele * y.numel() / m.groups return int(flops) """ Pooling """ def hook_maxpool1d(m, x, y): flops_per_ele = m.kernel_size - 1 flops = flops_per_ele * y.numel() return int(flops) def hook_maxpool2d(m, x, y): k = _pair(m.kernel_size) k = torch.prod(torch.Tensor(k)).item() # ops: compare flops_per_ele = k - 1 flops = flops_per_ele * y.numel() return int(flops) def hook_maxpool3d(m, x, y): k = _triple(m.kernel_size) k = torch.prod(torch.Tensor(k)).item() flops_per_ele = k - 1 flops = flops_per_ele * y.numel() return int(flops) def hook_avgpool1d(m, x, y): flops_per_ele = m.kernel_size flops = flops_per_ele * y.numel() return int(flops) def hook_avgpool2d(m, x, y): k = _pair(m.kernel_size) k = torch.prod(torch.Tensor(k)).item() flops_per_ele = k flops = flops_per_ele * y.numel() return int(flops) def hook_avgpool3d(m, x, y): k = _triple(m.kernel_size) k = torch.prod(torch.Tensor(k)).item() flops_per_ele = k flops = flops_per_ele * y.numel() return int(flops) def hook_adapmaxpool1d(m, x, y): x = x[0] out_size = m.output_size k = math.ceil(x.size(2) / out_size) flops_per_ele = k - 1 flops = flops_per_ele * y.numel() return int(flops) def hook_adapmaxpool2d(m, x, y): x = x[0] out_size = _pair(m.output_size) k = torch.Tensor(list(x.size()[2:])) / torch.Tensor(out_size) k = torch.prod(torch.ceil(k)).item() flops_per_ele = k - 1 flops = flops_per_ele * y.numel() return int(flops) def hook_adapmaxpool3d(m, x, y): x = x[0] out_size = _triple(m.output_size) k = torch.Tensor(list(x.size()[2:])) / torch.Tensor(out_size) k = torch.prod(torch.ceil(k)).item() flops_per_ele = k - 1 flops = flops_per_ele * y.numel() return int(flops) def hook_adapavgpool1d(m, x, y): x = x[0] out_size = m.output_size k = math.ceil(x.size(2) / out_size) flops_per_ele = k flops = flops_per_ele * y.numel() return int(flops) def hook_adapavgpool2d(m, x, y): x = x[0] out_size = _pair(m.output_size) k = torch.Tensor(list(x.size()[2:])) / torch.Tensor(out_size) k = torch.prod(torch.ceil(k)).item() flops_per_ele = k flops = flops_per_ele * y.numel() return int(flops) def hook_adapavgpool3d(m, x, y): x = x[0] out_size = _triple(m.output_size) k = torch.Tensor(list(x.size()[2:])) / torch.Tensor(out_size) k = torch.prod(torch.ceil(k)).item() flops_per_ele = k flops = flops_per_ele * y.numel() return int(flops) """ Non-linear activations """ def hook_relu(m, x, y): # eq: max(0, x) num_ele = y.numel() return int(num_ele) def hook_leakyrelu(m, x, y): # eq: max(0, x) + negative_slope*min(0, x) num_ele = y.numel() flops = 3 * num_ele return int(flops) """ Normalization """ def hook_batchnormNd(m, x, y): num_ele = y.numel() flops = 2 * num_ele # mean and std if m.affine: flops += 2 * num_ele # gamma and beta return int(flops) def hook_instancenormNd(m, x, y): return hook_batchnormNd(m, x, y) def hook_groupnorm(m, x, y): return hook_batchnormNd(m, x, y) def hook_layernorm(m, x, y): num_ele = y.numel() flops = 2 * num_ele # mean and std if m.elementwise_affine: flops += 2 * num_ele # gamma and beta return int(flops) """ Linear """ def hook_linear(m, x, y): flops_per_ele = m.in_features # + (m.in_features-1) if m.bias is not None: flops_per_ele += 1 flops = flops_per_ele * y.numel() return int(flops) __generic_flops_counter = { # Convolution 'Conv1d': hook_convNd, 'Conv2d': hook_convNd, 'Conv3d': hook_convNd, # Pooling 'MaxPool1d': hook_maxpool1d, 'MaxPool2d': hook_maxpool2d, 'MaxPool3d': hook_maxpool3d, 'AvgPool1d': hook_avgpool1d, 'AvgPool2d': hook_avgpool2d, 'AvgPool3d': hook_avgpool3d, 'AdaptiveMaxPool1d': hook_adapmaxpool1d, 'AdaptiveMaxPool2d': hook_adapmaxpool2d, 'AdaptiveMaxPool3d': hook_adapmaxpool3d, 'AdaptiveAvgPool1d': hook_adapavgpool1d, 'AdaptiveAvgPool2d': hook_adapavgpool2d, 'AdaptiveAvgPool3d': hook_adapavgpool3d, # Non-linear activations 'ReLU': hook_relu, 'ReLU6': hook_relu, 'LeakyReLU': hook_leakyrelu, # Normalization 'BatchNorm1d': hook_batchnormNd, 'BatchNorm2d': hook_batchnormNd, 'BatchNorm3d': hook_batchnormNd, 'InstanceNorm1d': hook_instancenormNd, 'InstanceNorm2d': hook_instancenormNd, 'InstanceNorm3d': hook_instancenormNd, 'GroupNorm': hook_groupnorm, 'LayerNorm': hook_layernorm, # Linear 'Linear': hook_linear, } __conv_linear_flops_counter = { # Convolution 'Conv1d': hook_convNd, 'Conv2d': hook_convNd, 'Conv3d': hook_convNd, # Linear 'Linear': hook_linear, } def _get_flops_counter(only_conv_linear): if only_conv_linear: return __conv_linear_flops_counter return __generic_flops_counter def compute_model_complexity( model, cfg, verbose=False, only_conv_linear=True ): """Returns number of parameters and FLOPs. .. note:: (1) this function only provides an estimate of the theoretical time complexity rather than the actual running time which depends on implementations and hardware, and (2) the FLOPs is only counted for layers that are used at test time. This means that redundant layers such as person ID classification layer will be ignored as it is discarded when doing feature extraction. Note that the inference graph depends on how you construct the computations in ``forward()``. Args: model (nn.Module): network model. verbose (bool, optional): shows detailed complexity of each module. Default is False. only_conv_linear (bool, optional): only considers convolution and linear layers when counting flops. Default is True. If set to False, flops of all layers will be counted. Examples:: >>> from torchreid import models, utils >>> model = models.build_model(name='resnet50', num_classes=1000) >>> num_params, flops = utils.compute_model_complexity(model, (1, 3, 256, 128), verbose=True) """ registered_handles = [] layer_list = [] layer = namedtuple('layer', ['class_name', 'params', 'flops']) def _add_hooks(m): def _has_submodule(m): return len(list(m.children())) > 0 def _hook(m, x, y): params = sum(p.numel() for p in m.parameters()) class_name = str(m.__class__.__name__) flops_counter = _get_flops_counter(only_conv_linear) if class_name in flops_counter: flops = flops_counter[class_name](m, x, y) else: flops = 0 layer_list.append( layer(class_name=class_name, params=params, flops=flops) ) # only consider the very basic nn layer if _has_submodule(m): return handle = m.register_forward_hook(_hook) registered_handles.append(handle) default_train_mode = model.training model.eval().apply(_add_hooks) input_size = (1, 3, cfg.data.height, cfg.data.width) input_img = torch.rand(input_size) if next(model.parameters()).is_cuda: input_img = input_img.cuda() if isinstance(model, BPBreID): model(input_img, torch.ones(1, cfg.model.bpbreid.masks.parts_num+1, 16, 8)) # forward else: model(input_img) # forward for handle in registered_handles: handle.remove() model.train(default_train_mode) if verbose: per_module_params = defaultdict(list) per_module_flops = defaultdict(list) total_params, total_flops = 0, 0 for layer in layer_list: total_params += layer.params total_flops += layer.flops if verbose: per_module_params[layer.class_name].append(layer.params) per_module_flops[layer.class_name].append(layer.flops) if verbose: num_udscore = 55 print(' {}'.format('-' * num_udscore)) print(' Model complexity with input size {}'.format(input_size)) print(' {}'.format('-' * num_udscore)) for class_name in per_module_params: params = int(np.sum(per_module_params[class_name])) flops = int(np.sum(per_module_flops[class_name])) print( ' {} (params={:,}, flops={:,})'.format( class_name, params, flops ) ) print(' {}'.format('-' * num_udscore)) print( ' Total (params={:,}, flops={:,})'.format( total_params, total_flops ) ) print(' {}'.format('-' * num_udscore)) return total_params, total_flops ================================================ FILE: torchreid/utils/reidtools.py ================================================ from __future__ import print_function, absolute_import import numpy as np import shutil import os.path as osp import cv2 from torchreid.utils import Logger from .engine_state import EngineState from .tools import mkdir_if_missing __all__ = ['visualize_ranked_results'] GRID_SPACING = 10 QUERY_EXTRA_SPACING = 90 BW = 5 # border width GREEN = (0, 255, 0) RED = (0, 0, 255) def visualize_ranked_results( distmat, dataset, data_type, width=128, height=256, save_dir='', topk=10 ): """Visualizes ranked results. Supports both image-reid and video-reid. For image-reid, ranks will be plotted in a single figure. For video-reid, ranks will be saved in folders each containing a tracklet. Args: distmat (numpy.ndarray): distance matrix of shape (num_query, num_gallery). dataset (tuple): a 2-tuple containing (query, gallery), each of which contains tuples of (img_path(s), pid, camid). data_type (str): "image" or "video". width (int, optional): resized image width. Default is 128. height (int, optional): resized image height. Default is 256. save_dir (str): directory to save output images. topk (int, optional): denoting top-k images in the rank list to be visualized. Default is 10. """ num_q, num_g = distmat.shape mkdir_if_missing(save_dir) print('# query: {}\n# gallery {}'.format(num_q, num_g)) print('Visualizing top-{} ranks ...'.format(topk)) query, gallery = dataset assert num_q == len(query) assert num_g == len(gallery) indices = np.argsort(distmat, axis=1) def _cp_img_to(src, dst, rank, prefix, matched=False): """ Args: src: image path or tuple (for vidreid) dst: target directory rank: int, denoting ranked position, starting from 1 prefix: string matched: bool """ if isinstance(src, (tuple, list)): if prefix == 'gallery': suffix = 'TRUE' if matched else 'FALSE' dst = osp.join( dst, prefix + '_top' + str(rank).zfill(3) ) + '_' + suffix else: dst = osp.join(dst, prefix + '_top' + str(rank).zfill(3)) mkdir_if_missing(dst) for img_path in src: shutil.copy(img_path, dst) else: dst = osp.join( dst, prefix + '_top' + str(rank).zfill(3) + '_name_' + osp.basename(src) ) shutil.copy(src, dst) for q_idx in range(num_q): qpid, qcamid, qimg_path = query[q_idx]['pid'], query[q_idx]['camid'], query[q_idx]['img_path'] qimg_path_name = qimg_path[0] if isinstance( qimg_path, (tuple, list) ) else qimg_path if data_type == 'image': qimg = cv2.imread(qimg_path) qimg = cv2.resize(qimg, (width, height)) qimg = cv2.copyMakeBorder( qimg, BW, BW, BW, BW, cv2.BORDER_CONSTANT, value=(0, 0, 0) ) # resize twice to ensure that the border width is consistent across images qimg = cv2.resize(qimg, (width, height)) num_cols = topk + 1 grid_img = 255 * np.ones( ( height, num_cols*width + topk*GRID_SPACING + QUERY_EXTRA_SPACING, 3 ), dtype=np.uint8 ) grid_img[:, :width, :] = qimg else: qdir = osp.join( save_dir, osp.basename(osp.splitext(qimg_path_name)[0]) ) mkdir_if_missing(qdir) _cp_img_to(qimg_path, qdir, rank=0, prefix='query') rank_idx = 1 for g_idx in indices[q_idx, :]: gpid, gcamid, gimg_path = gallery[g_idx]['pid'], gallery[g_idx]['camid'], gallery[g_idx]['img_path'] invalid = (qpid == gpid) & (qcamid == gcamid) if not invalid: matched = gpid == qpid if data_type == 'image': border_color = GREEN if matched else RED gimg = cv2.imread(gimg_path) gimg = cv2.resize(gimg, (width, height)) gimg = cv2.copyMakeBorder( gimg, BW, BW, BW, BW, cv2.BORDER_CONSTANT, value=border_color ) gimg = cv2.resize(gimg, (width, height)) start = rank_idx*width + rank_idx*GRID_SPACING + QUERY_EXTRA_SPACING end = ( rank_idx+1 ) * width + rank_idx*GRID_SPACING + QUERY_EXTRA_SPACING grid_img[:, start:end, :] = gimg else: _cp_img_to( gimg_path, qdir, rank=rank_idx, prefix='gallery', matched=matched ) rank_idx += 1 if rank_idx > topk: break if data_type == 'image': imname = osp.basename(osp.splitext(qimg_path_name)[0]) # cv2.imwrite(osp.join(save_dir, imname + '.jpg'), grid_img) Logger.current_logger().add_image("Ranking grid", imname + '.jpg', cv2.cvtColor(grid_img, cv2.COLOR_BGR2RGB), EngineState.current_engine_state().epoch) if (q_idx+1) % 100 == 0: print('- done {}/{}'.format(q_idx + 1, num_q)) print('Done. Images have been saved to "{}" ...'.format(save_dir)) ================================================ FILE: torchreid/utils/rerank.py ================================================ #!/usr/bin/env python2/python3 # -*- coding: utf-8 -*- """ Source: https://github.com/zhunzhong07/person-re-ranking Created on Mon Jun 26 14:46:56 2017 @author: luohao Modified by Houjing Huang, 2017-12-22. - This version accepts distance matrix instead of raw features. - The difference of `/` division between python 2 and 3 is handled. - numpy.float16 is replaced by numpy.float32 for numerical precision. CVPR2017 paper:Zhong Z, Zheng L, Cao D, et al. Re-ranking Person Re-identification with k-reciprocal Encoding[J]. 2017. url:http://openaccess.thecvf.com/content_cvpr_2017/papers/Zhong_Re-Ranking_Person_Re-Identification_CVPR_2017_paper.pdf Matlab version: https://github.com/zhunzhong07/person-re-ranking API q_g_dist: query-gallery distance matrix, numpy array, shape [num_query, num_gallery] q_q_dist: query-query distance matrix, numpy array, shape [num_query, num_query] g_g_dist: gallery-gallery distance matrix, numpy array, shape [num_gallery, num_gallery] k1, k2, lambda_value: parameters, the original paper is (k1=20, k2=6, lambda_value=0.3) Returns: final_dist: re-ranked distance, numpy array, shape [num_query, num_gallery] """ from __future__ import division, print_function, absolute_import import numpy as np __all__ = ['re_ranking'] def re_ranking(q_g_dist, q_q_dist, g_g_dist, k1=20, k2=6, lambda_value=0.3): # The following naming, e.g. gallery_num, is different from outer scope. # Don't care about it. original_dist = np.concatenate( [ np.concatenate([q_q_dist, q_g_dist], axis=1), np.concatenate([q_g_dist.T, g_g_dist], axis=1) ], axis=0 ) original_dist = np.power(original_dist, 2).astype(np.float32) original_dist = np.transpose( 1. * original_dist / np.max(original_dist, axis=0) ) V = np.zeros_like(original_dist).astype(np.float32) initial_rank = np.argsort(original_dist).astype(np.int32) query_num = q_g_dist.shape[0] gallery_num = q_g_dist.shape[0] + q_g_dist.shape[1] all_num = gallery_num for i in range(all_num): # k-reciprocal neighbors forward_k_neigh_index = initial_rank[i, :k1 + 1] backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1] fi = np.where(backward_k_neigh_index == i)[0] k_reciprocal_index = forward_k_neigh_index[fi] k_reciprocal_expansion_index = k_reciprocal_index for j in range(len(k_reciprocal_index)): candidate = k_reciprocal_index[j] candidate_forward_k_neigh_index = initial_rank[ candidate, :int(np.around(k1 / 2.)) + 1] candidate_backward_k_neigh_index = initial_rank[ candidate_forward_k_neigh_index, :int(np.around(k1 / 2.)) + 1] fi_candidate = np.where( candidate_backward_k_neigh_index == candidate )[0] candidate_k_reciprocal_index = candidate_forward_k_neigh_index[ fi_candidate] if len( np. intersect1d(candidate_k_reciprocal_index, k_reciprocal_index) ) > 2. / 3 * len(candidate_k_reciprocal_index): k_reciprocal_expansion_index = np.append( k_reciprocal_expansion_index, candidate_k_reciprocal_index ) k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) weight = np.exp(-original_dist[i, k_reciprocal_expansion_index]) V[i, k_reciprocal_expansion_index] = 1. * weight / np.sum(weight) original_dist = original_dist[:query_num, ] if k2 != 1: V_qe = np.zeros_like(V, dtype=np.float32) for i in range(all_num): V_qe[i, :] = np.mean(V[initial_rank[i, :k2], :], axis=0) V = V_qe del V_qe del initial_rank invIndex = [] for i in range(gallery_num): invIndex.append(np.where(V[:, i] != 0)[0]) jaccard_dist = np.zeros_like(original_dist, dtype=np.float32) for i in range(query_num): temp_min = np.zeros(shape=[1, gallery_num], dtype=np.float32) indNonZero = np.where(V[i, :] != 0)[0] indImages = [] indImages = [invIndex[ind] for ind in indNonZero] for j in range(len(indNonZero)): temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum( V[i, indNonZero[j]], V[indImages[j], indNonZero[j]] ) jaccard_dist[i] = 1 - temp_min / (2.-temp_min) final_dist = jaccard_dist * (1-lambda_value) + original_dist*lambda_value del original_dist del V del jaccard_dist final_dist = final_dist[:query_num, query_num:] return final_dist ================================================ FILE: torchreid/utils/tensortools.py ================================================ def replace_values(input, mask, value): # TODO test perfs output = input * (~mask) + mask * value # input[mask] = value # output = input # output = torch.where(mask, input, torch.tensor(value, dtype=input.dtype, device=(input.get_device() if input.is_cuda else None))) return output def masked_mean(input, mask): # TODO CHECK ON RANKING GRID IF IT WORK WITH CONTINUOUS VISIBILITY """ output -1 where mean couldn't be computed """ valid_input = input * mask mean_weights = mask.sum(0) mean_weights = mean_weights + (mean_weights == 0) # to avoid division by 0 pairwise_dist = valid_input.sum(0) / mean_weights invalid_pairs = (mask.sum(dim=0) == 0) valid_pairwise_dist = replace_values(pairwise_dist, invalid_pairs, -1) return valid_pairwise_dist ================================================ FILE: torchreid/utils/tools.py ================================================ from __future__ import division, print_function, absolute_import import os import sys import json import time import errno import cv2 import numpy as np import random import os.path as osp import warnings import PIL import torch from PIL import Image from torchreid.utils.constants import bn_correspondants __all__ = [ 'mkdir_if_missing', 'check_isfile', 'read_json', 'write_json', 'set_random_seed', 'download_url', 'read_image', 'read_masks', 'collect_env_info', 'perc' ] def mkdir_if_missing(dirname): """Creates dirname if it is missing.""" if not osp.exists(dirname): try: os.makedirs(dirname) except OSError as e: if e.errno != errno.EEXIST: raise def check_isfile(fpath): """Checks if the given path is a file. Args: fpath (str): file path. Returns: bool """ isfile = osp.isfile(fpath) if not isfile: warnings.warn('No file found at "{}"'.format(fpath)) return isfile def read_json(fpath): """Reads json file from a path.""" with open(fpath, 'r') as f: obj = json.load(f) return obj def write_json(obj, fpath): """Writes to a json file.""" mkdir_if_missing(osp.dirname(fpath)) with open(fpath, 'w') as f: json.dump(obj, f, indent=4, separators=(',', ': ')) def set_random_seed(seed): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) def download_url(url, dst): """Downloads file from a url to a destination. Args: url (str): url to download file. dst (str): destination path. """ from six.moves import urllib print('* url="{}"'.format(url)) print('* destination="{}"'.format(dst)) def _reporthook(count, block_size, total_size): global start_time if count == 0: start_time = time.time() return duration = time.time() - start_time progress_size = int(count * block_size) speed = int(progress_size / (1024*duration)) percent = int(count * block_size * 100 / total_size) sys.stdout.write( '\r...%d%%, %d MB, %d KB/s, %d seconds passed' % (percent, progress_size / (1024*1024), speed, duration) ) sys.stdout.flush() urllib.request.urlretrieve(url, dst, _reporthook) sys.stdout.write('\n') def read_image(path): """Reads image from path using ``PIL.Image``. Args: path (str): path to an image. Returns: PIL image """ got_img = False if not osp.exists(path): raise IOError('"{}" does not exist'.format(path)) while not got_img: try: img = cv2.imread(path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) got_img = True except IOError: print( 'IOError incurred when reading "{}". Will redo. Don\'t worry. Just chill.' .format(path) ) return img def read_masks(masks_path): """Reads part-based masks information from path. Args: path (str): path to an image. Part-based masks information is stored in a .npy file with image name as prefix Returns: Numpy array of size N x H x W where N is the number of part-based masks """ got_masks = False if not osp.exists(masks_path): raise IOError('Masks file"{}" does not exist'.format(masks_path)) while not got_masks: try: masks = np.load(masks_path) masks = np.transpose(masks, (1, 2, 0)) got_masks = True except IOError: print( 'IOError incurred when reading "{}". Will redo. Don\'t worry. Just chill.' .format(masks_path) ) return masks def collect_env_info(): """Returns env info as a string. Code source: github.com/facebookresearch/maskrcnn-benchmark """ from torch.utils.collect_env import get_pretty_env_info env_str = get_pretty_env_info() env_str += '\n Pillow ({})'.format(PIL.__version__) return env_str def perc(val, decimals=2): return np.around(val*100, decimals) def extract_test_embeddings(model_output, test_embeddings): embeddings, visibility_scores, id_cls_scores, pixels_cls_scores, spatial_features, parts_masks = model_output embeddings_list = [] visibility_scores_list = [] embeddings_masks_list = [] for test_emb in test_embeddings: embds = embeddings[test_emb] embeddings_list.append(embds if len(embds.shape) == 3 else embds.unsqueeze(1)) if test_emb in bn_correspondants: test_emb = bn_correspondants[test_emb] vis_scores = visibility_scores[test_emb] visibility_scores_list.append(vis_scores if len(vis_scores.shape) == 2 else vis_scores.unsqueeze(1)) pt_masks = parts_masks[test_emb] embeddings_masks_list.append(pt_masks if len(pt_masks.shape) == 4 else pt_masks.unsqueeze(1)) assert len(embeddings) != 0 embeddings = torch.cat(embeddings_list, dim=1) # [N, P+2, D] visibility_scores = torch.cat(visibility_scores_list, dim=1) # [N, P+2] embeddings_masks = torch.cat(embeddings_masks_list, dim=1) # [N, P+2, Hf, Wf] return embeddings, visibility_scores, embeddings_masks, pixels_cls_scores ================================================ FILE: torchreid/utils/torch_receptive_field/__init__.py ================================================ from .receptive_field import receptive_field from .receptive_field import receptive_field_for_unit ================================================ FILE: torchreid/utils/torch_receptive_field/receptive_field.py ================================================ import torch import torch.nn as nn from torch.autograd import Variable from collections import OrderedDict import numpy as np # Source: https://github.com/Fangyh09/pytorch-receptive-field def check_same(stride): if isinstance(stride, (list, tuple)): assert len(stride) == 2 and stride[0] == stride[1] stride = stride[0] return stride def receptive_field(model, input_size, batch_size=-1, device="cuda"): ''' :parameter 'input_size': tuple of (Channel, Height, Width) :return OrderedDict of `Layername`->OrderedDict of receptive field stats {'j':,'r':,'start':,'conv_stage':,'output_shape':,} 'j' for "jump" denotes how many pixels do the receptive fields of spatially neighboring units in the feature tensor do not overlap in one direction. i.e. shift one unit in this feature map == how many pixels shift in the input image in one direction. 'r' for "receptive_field" is the spatial range of the receptive field in one direction. 'start' denotes the center of the receptive field for the first unit (start) in on direction of the feature tensor. Convention is to use half a pixel as the center for a range. center for `slice(0,5)` is 2.5. ''' def register_hook(module): def hook(module, input, output): class_name = str(module.__class__).split(".")[-1].split("'")[0] module_idx = len(receptive_field) m_key = "%i" % module_idx p_key = "%i" % (module_idx - 1) receptive_field[m_key] = OrderedDict() if not receptive_field["0"]["conv_stage"]: print("Enter in deconv_stage") receptive_field[m_key]["j"] = 0 receptive_field[m_key]["r"] = 0 receptive_field[m_key]["start"] = 0 else: p_j = receptive_field[p_key]["j"] p_r = receptive_field[p_key]["r"] p_start = receptive_field[p_key]["start"] if class_name == "Conv2d" or class_name == "MaxPool2d": kernel_size = module.kernel_size stride = module.stride padding = module.padding dilation = module.dilation kernel_size, stride, padding, dilation = map(check_same, [kernel_size, stride, padding, dilation]) receptive_field[m_key]["j"] = p_j * stride receptive_field[m_key]["r"] = p_r + ((kernel_size - 1) * dilation) * p_j receptive_field[m_key]["start"] = p_start + ((kernel_size - 1) / 2 - padding) * p_j elif class_name == "BatchNorm2d" or class_name == "ReLU" or class_name == "Bottleneck": receptive_field[m_key]["j"] = p_j receptive_field[m_key]["r"] = p_r receptive_field[m_key]["start"] = p_start elif class_name == "ConvTranspose2d": receptive_field["0"]["conv_stage"] = False receptive_field[m_key]["j"] = 0 receptive_field[m_key]["r"] = 0 receptive_field[m_key]["start"] = 0 else: raise ValueError("module not ok") pass receptive_field[m_key]["input_shape"] = list(input[0].size()) # only one receptive_field[m_key]["input_shape"][0] = batch_size if isinstance(output, (list, tuple)): # list/tuple receptive_field[m_key]["output_shape"] = [ [-1] + list(o.size())[1:] for o in output ] else: # tensor receptive_field[m_key]["output_shape"] = list(output.size()) receptive_field[m_key]["output_shape"][0] = batch_size if ( not isinstance(module, nn.Sequential) and not isinstance(module, nn.ModuleList) and not (module == model) ): hooks.append(module.register_forward_hook(hook)) device = device.lower() assert device in [ "cuda", "cpu", ], "Input device is not valid, please specify 'cuda' or 'cpu'" if device == "cuda" and torch.cuda.is_available(): dtype = torch.cuda.FloatTensor else: dtype = torch.FloatTensor # check if there are multiple inputs to the network if isinstance(input_size[0], (list, tuple)): x = [Variable(torch.rand(2, *in_size)).type(dtype) for in_size in input_size] else: x = Variable(torch.rand(2, *input_size)).type(dtype) # create properties receptive_field = OrderedDict() receptive_field["0"] = OrderedDict() receptive_field["0"]["j"] = 1.0 receptive_field["0"]["r"] = 1.0 receptive_field["0"]["start"] = 0.5 receptive_field["0"]["conv_stage"] = True receptive_field["0"]["output_shape"] = list(x.size()) receptive_field["0"]["output_shape"][0] = batch_size hooks = [] # register hook model.apply(register_hook) # make a forward pass model(x) # remove these hooks for h in hooks: h.remove() print("------------------------------------------------------------------------------") line_new = "{:>20} {:>10} {:>10} {:>10} {:>15} ".format("Layer (type)", "map size", "start", "jump", "receptive_field") print(line_new) print("==============================================================================") total_params = 0 total_output = 0 trainable_params = 0 for layer in receptive_field: # input_shape, output_shape, trainable, nb_params assert "start" in receptive_field[layer], layer assert len(receptive_field[layer]["output_shape"]) == 4 line_new = "{:7} {:12} {:>10} {:>10} {:>10} {:>15} ".format( "", layer, str(receptive_field[layer]["output_shape"][2:]), str(receptive_field[layer]["start"]), str(receptive_field[layer]["j"]), format(str(receptive_field[layer]["r"])) ) print(line_new) print("==============================================================================") # add input_shape receptive_field["input_size"] = input_size return receptive_field def receptive_field_for_unit(receptive_field_dict, layer, unit_position): """Utility function to calculate the receptive field for a specific unit in a layer using the dictionary calculated above :parameter 'layer': layer name, should be a key in the result dictionary 'unit_position': spatial coordinate of the unit (H, W) ``` alexnet = models.alexnet() model = alexnet.features.to('cuda') receptive_field_dict = receptive_field(model, (3, 224, 224)) receptive_field_for_unit(receptive_field_dict, "8", (6,6)) ``` Out: [(62.0, 161.0), (62.0, 161.0)] """ input_shape = receptive_field_dict["input_size"] if layer in receptive_field_dict: rf_stats = receptive_field_dict[layer] assert len(unit_position) == 2 feat_map_lim = rf_stats['output_shape'][2:] if np.any([unit_position[idx] < 0 or unit_position[idx] >= feat_map_lim[idx] for idx in range(2)]): raise Exception("Unit position outside spatial extent of the feature tensor ((H, W) = (%d, %d)) " % tuple(feat_map_lim)) # X, Y = tuple(unit_position) rf_range = [(rf_stats['start'] + idx * rf_stats['j'] - rf_stats['r'] / 2, rf_stats['start'] + idx * rf_stats['j'] + rf_stats['r'] / 2) for idx in unit_position] if len(input_shape) == 2: limit = input_shape else: # input shape is (channel, H, W) limit = input_shape[1:3] rf_range = [(max(0, rf_range[axis][0]), min(limit[axis], rf_range[axis][1])) for axis in range(2)] print("Receptive field size for layer %s, unit_position %s, is \n %s" % (layer, unit_position, rf_range)) return rf_range else: raise KeyError("Layer name incorrect, or not included in the model.") ================================================ FILE: torchreid/utils/torchtools.py ================================================ from __future__ import division, print_function, absolute_import import itertools import pickle import shutil import os.path as osp import warnings from functools import partial from collections import OrderedDict import torch import torch.nn as nn from .tools import mkdir_if_missing import collections import re __all__ = [ 'save_checkpoint', 'load_checkpoint', 'resume_from_checkpoint', 'open_all_layers', 'open_specified_layers', 'count_num_param', 'load_pretrained_weights' ] def save_checkpoint( state, save_dir, job_id=None, is_best=False, remove_module_from_keys=False ): r"""Saves checkpoint. Args: state (dict): dictionary. save_dir (str): directory to save checkpoint. is_best (bool, optional): if True, this checkpoint will be copied and named ``model-best.pth.tar``. Default is False. remove_module_from_keys (bool, optional): whether to remove "module." from layer names. Default is False. Examples:: >>> state = { >>> 'state_dict': model.state_dict(), >>> 'epoch': 10, >>> 'rank1': 0.5, >>> 'optimizer': optimizer.state_dict() >>> } >>> save_checkpoint(state, 'log/my_model') """ mkdir_if_missing(save_dir) if remove_module_from_keys: # remove 'module.' in state_dict's keys state_dict = state['state_dict'] new_state_dict = OrderedDict() for k, v in state_dict.items(): if k.startswith('module.'): k = k[7:] new_state_dict[k] = v state['state_dict'] = new_state_dict # save epoch = state['epoch'] fpath = osp.join(save_dir, 'job-{}_{}_model.pth.tar'.format(job_id, str(epoch))) torch.save(state, fpath) print('Checkpoint saved to "{}"'.format(fpath)) if is_best: shutil.copy(fpath, osp.join(osp.dirname(fpath), 'model-best.pth.tar')) def load_checkpoint(fpath): r"""Loads checkpoint. ``UnicodeDecodeError`` can be well handled, which means python2-saved files can be read from python3. Args: fpath (str): path to checkpoint. Returns: dict Examples:: >>> from torchreid.utils import load_checkpoint >>> fpath = 'log/my_model/model.pth.tar-10' >>> checkpoint = load_checkpoint(fpath) """ if fpath is None: raise ValueError('File path is None') if not osp.exists(fpath): raise FileNotFoundError('File is not found at "{}"'.format(fpath)) map_location = None if torch.cuda.is_available() else 'cpu' try: checkpoint = torch.load(fpath, map_location=map_location) except UnicodeDecodeError: pickle.load = partial(pickle.load, encoding="latin1") pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") checkpoint = torch.load( fpath, pickle_module=pickle, map_location=map_location ) except Exception: print('Unable to load checkpoint from "{}"'.format(fpath)) raise return checkpoint def resume_from_checkpoint(fpath, model, optimizer=None, scheduler=None): r"""Resumes training from a checkpoint. This will load (1) model weights and (2) ``state_dict`` of optimizer if ``optimizer`` is not None. Args: fpath (str): path to checkpoint. model (nn.Module): model. optimizer (Optimizer, optional): an Optimizer. scheduler (LRScheduler, optional): an LRScheduler. Returns: int: start_epoch. Examples:: >>> from torchreid.utils import resume_from_checkpoint >>> fpath = 'log/my_model/model.pth.tar-10' >>> start_epoch = resume_from_checkpoint( >>> fpath, model, optimizer, scheduler >>> ) """ print('Loading checkpoint from "{}"'.format(fpath)) checkpoint = load_checkpoint(fpath) model.load_state_dict(checkpoint['state_dict']) print('Loaded model weights') if optimizer is not None and 'optimizer' in checkpoint.keys(): optimizer.load_state_dict(checkpoint['optimizer']) print('Loaded optimizer') if scheduler is not None and 'scheduler' in checkpoint.keys(): scheduler.load_state_dict(checkpoint['scheduler']) print('Loaded scheduler') start_epoch = checkpoint['epoch'] print('Last epoch = {}'.format(start_epoch)) if 'rank1' in checkpoint.keys(): print('Last rank1 = {:.1%}'.format(checkpoint['rank1'])) return start_epoch def adjust_learning_rate( optimizer, base_lr, epoch, stepsize=20, gamma=0.1, linear_decay=False, final_lr=0, max_epoch=100 ): r"""Adjusts learning rate. Deprecated. """ if linear_decay: # linearly decay learning rate from base_lr to final_lr frac_done = epoch / max_epoch lr = frac_done*final_lr + (1.-frac_done) * base_lr else: # decay learning rate by gamma for every stepsize lr = base_lr * (gamma**(epoch // stepsize)) for param_group in optimizer.param_groups: param_group['lr'] = lr def set_bn_to_eval(m): r"""Sets BatchNorm layers to eval mode.""" # 1. no update for running mean and var # 2. scale and shift parameters are still trainable classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: m.eval() def open_all_layers(model): r"""Opens all layers in model for training. Examples:: >>> from torchreid.utils import open_all_layers >>> open_all_layers(model) """ model.train() for p in model.parameters(): p.requires_grad = True def open_specified_layers(model, open_layers): r"""Opens specified layers in model for training while keeping other layers frozen. Args: model (nn.Module): neural net model. open_layers (str or list): layers open for training. Examples:: >>> from torchreid.utils import open_specified_layers >>> # Only model.classifier will be updated. >>> open_layers = 'classifier' >>> open_specified_layers(model, open_layers) >>> # Only model.fc and model.classifier will be updated. >>> open_layers = ['fc', 'classifier'] >>> open_specified_layers(model, open_layers) """ if isinstance(model, nn.DataParallel): model = model.module if isinstance(open_layers, str): open_layers = [open_layers] for layer in open_layers: assert hasattr( model, layer ), '"{}" is not an attribute of the model, please provide the correct name'.format( layer ) for name, module in model.named_children(): if name in open_layers: module.train() for p in module.parameters(): p.requires_grad = True else: module.eval() for p in module.parameters(): p.requires_grad = False def count_num_param(model): r"""Counts number of parameters in a model while ignoring ``self.classifier``. Args: model (nn.Module): network model. Examples:: >>> from torchreid.utils import count_num_param >>> model_size = count_num_param(model) .. warning:: This method is deprecated in favor of ``torchreid.utils.compute_model_complexity``. """ warnings.warn( 'This method is deprecated and will be removed in the future.' ) num_param = sum(p.numel() for p in model.parameters()) if isinstance(model, nn.DataParallel): model = model.module if hasattr(model, 'classifier') and isinstance(model.classifier, nn.Module): # we ignore the classifier because it is unused at test time num_param -= sum(p.numel() for p in model.classifier.parameters()) return num_param def load_pretrained_weights(model, weight_path): r"""Loads pretrained weights to model. Features:: - Incompatible layers (unmatched in name or size) will be ignored. - Can automatically deal with keys containing "module.". Args: model (nn.Module): network model. weight_path (str): path to pretrained weights. Examples:: >>> from torchreid.utils import load_pretrained_weights >>> weight_path = 'log/my_model/model-best.pth.tar' >>> load_pretrained_weights(model, weight_path) """ checkpoint = load_checkpoint(weight_path) if 'state_dict' in checkpoint: state_dict = checkpoint['state_dict'] else: state_dict = checkpoint model_dict = model.state_dict() new_state_dict = OrderedDict() matched_layers, discarded_layers = [], [] for k, v in state_dict.items(): if k.startswith('module.'): k = k[7:] # discard module. if k in model_dict and model_dict[k].size() == v.size(): new_state_dict[k] = v matched_layers.append(k) else: discarded_layers.append(k) model_dict.update(new_state_dict) model.load_state_dict(model_dict) if len(matched_layers) == 0: warnings.warn( 'The pretrained weights "{}" cannot be loaded, ' 'please check the key names manually ' '(** ignored and continue **)'.format(weight_path) ) else: print( 'Successfully loaded pretrained weights from "{}"'. format(weight_path) ) if len(discarded_layers) > 0: print( '** The following layers are discarded ' 'due to unmatched keys or layer size: {}'. format(discarded_layers) ) # Copied from torch.utils.data._utils.collate.default_collate np_str_obj_array_pattern = re.compile(r'[SaUO]') default_collate_err_msg_format = ( "default_collate: batch must contain tensors, numpy arrays, numbers, " "dicts or lists; found {}") def collate(batch): r""" Function that takes in a batch of data and puts the elements within the batch into a tensor with an additional outer dimension - batch size. The exact output type can be a :class:`torch.Tensor`, a `Sequence` of :class:`torch.Tensor`, a Collection of :class:`torch.Tensor`, or left unchanged, depending on the input type. This is used as the default function for collation when `batch_size` or `batch_sampler` is defined in :class:`~torch.utils.data.DataLoader`. Here is the general input type (based on the type of the element within the batch) to output type mapping: * :class:`torch.Tensor` -> :class:`torch.Tensor` (with an added outer dimension batch size) * NumPy Arrays -> :class:`torch.Tensor` * `float` -> :class:`torch.Tensor` * `int` -> :class:`torch.Tensor` * `str` -> `str` (unchanged) * `bytes` -> `bytes` (unchanged) * `Mapping[K, V_i]` -> `Mapping[K, default_collate([V_1, V_2, ...])]` * `NamedTuple[V1_i, V2_i, ...]` -> `NamedTuple[default_collate([V1_1, V1_2, ...]), default_collate([V2_1, V2_2, ...]), ...]` * `Sequence[V1_i, V2_i, ...]` -> `Sequence[default_collate([V1_1, V1_2, ...]), default_collate([V2_1, V2_2, ...]), ...]` Args: batch: a single batch to be collated Examples: >>> # Example with a batch of `int`s: >>> default_collate([0, 1, 2, 3]) tensor([0, 1, 2, 3]) >>> # Example with a batch of `str`s: >>> default_collate(['a', 'b', 'c']) ['a', 'b', 'c'] >>> # Example with `Map` inside the batch: >>> default_collate([{'A': 0, 'B': 1}, {'A': 100, 'B': 100}]) {'A': tensor([ 0, 100]), 'B': tensor([ 1, 100])} >>> # Example with `NamedTuple` inside the batch: >>> Point = namedtuple('Point', ['x', 'y']) >>> default_collate([Point(0, 0), Point(1, 1)]) Point(x=tensor([0, 1]), y=tensor([0, 1])) >>> # Example with `Tuple` inside the batch: >>> default_collate([(0, 1), (2, 3)]) [tensor([0, 2]), tensor([1, 3])] >>> # Example with `List` inside the batch: >>> default_collate([[0, 1], [2, 3]]) [tensor([0, 2]), tensor([1, 3])] """ elem = batch[0] elem_type = type(elem) if isinstance(elem, torch.Tensor): out = None if torch.utils.data.get_worker_info() is not None: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum(x.numel() for x in batch) storage = elem.storage()._new_shared(numel) out = elem.new(storage).resize_(len(batch), *list(elem.size())) return torch.cat(batch, dim=0, out=out).numpy() # changed to cat from original code elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ and elem_type.__name__ != 'string_': if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap': # array of string classes and object if np_str_obj_array_pattern.search(elem.dtype.str) is not None: raise TypeError(default_collate_err_msg_format.format(elem.dtype)) return collate([b for b in batch]) elif elem.shape == (): # scalars return torch.as_tensor(batch).numpy() elif isinstance(elem, float): return torch.tensor(batch, dtype=torch.float64).numpy() elif isinstance(elem, int): return torch.tensor(batch).numpy() elif isinstance(elem, str): return batch elif isinstance(elem, collections.abc.Mapping): try: return elem_type({key: collate([d[key] for d in batch]) for key in elem}) except TypeError: # The mapping type may not support `__init__(iterable)`. return {key: collate([d[key] for d in batch]) for key in elem} elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple return elem_type(*(collate(samples) for samples in zip(*batch))) elif isinstance(elem, collections.abc.Sequence): # check to make sure that the elements in batch have consistent size return list(itertools.chain.from_iterable(batch)) # it = iter(batch) # elem_size = len(next(it)) # # if not all(len(elem) == elem_size for elem in it): # # raise RuntimeError('each element in list of batch should be of equal size') # transposed = list(zip(*batch)) # It may be accessed twice, so we use a list. # # if isinstance(elem, tuple): # return [collate(samples) for samples in transposed] # Backwards compatibility. # else: # try: # return elem_type([collate(samples) for samples in transposed]) # except TypeError: # # The sequence type may not support `__init__(iterable)` (e.g., `range`). # return [collate(samples) for samples in transposed] raise TypeError(default_collate_err_msg_format.format(elem_type)) ================================================ FILE: torchreid/utils/visualization/__init__.py ================================================ from .display_batch_triplets import show_triplet_grid from .visualize_query_gallery_rankings import visualize_ranking_grid ================================================ FILE: torchreid/utils/visualization/display_batch_triplets.py ================================================ import cv2 import matplotlib.pyplot as plt __all__ = ['show_triplet'] # try: # import matplotlib.cm # CMAP_JET = copy.copy(matplotlib.cm.get_cmap('jet')) # CMAP_JET.set_bad('white', alpha=0.5) # except ImportError: # CMAP_JET = None from torchreid.utils import Logger from torchreid.utils.engine_state import EngineState red = [1, 0, 0] green = [0, 1, 0] black = [0, 0, 0] img_size = (128, 256) def show_triplet_grid(triplets): fig11 = plt.figure(figsize=(40, 50), constrained_layout=False) outer_grid = fig11.add_gridspec(4, 5) count = 0 for a in range(4): for b in range(5): print("grid {}-{}".format(a, b)) # gridspec inside gridspec inner_grid = outer_grid[a, b].subgridspec(1, 3) axs = inner_grid.subplots() # Create all subplots for the inner grid. triplet = triplets[count] pos, anc, neg, pos_dist, neg_dist = triplet ax1, ax2, ax3 = axs[0], axs[1], axs[2] show_instance(ax1, pos, pos_dist, green) show_instance(ax2, anc, 0, black) show_instance(ax3, neg, neg_dist, red) count += 1 # show only the outside spines # for ax in fig11.get_axes(): # ax.spines['top'].set_visible(ax.is_first_row()) # ax.spines['bottom'].set_visible(ax.is_last_row()) # ax.spines['left'].set_visible(ax.is_first_col()) # ax.spines['right'].set_visible(ax.is_last_col()) Logger.current_logger().add_figure("Batch triplets", fig11, EngineState.current_engine_state().epoch) # plt.show() # plt.waitforbuttonpress() def show_triplet(anc, pos, neg, pos_dist, neg_dist): # instance = (image, masks, id, body_part_id, body_part_name) f, axarr = plt.subplots(1, 3) ax1, ax2, ax3 = axarr[0], axarr[1], axarr[2] show_instance(ax1, pos, pos_dist, green) show_instance(ax2, anc, 0, black) show_instance(ax3, neg, neg_dist, red) f.matplotlib_show() plt.waitforbuttonpress() def add_border(img, color): # border widths top, bottom, left, right = [5] * 4 return cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) def show_instance(ax, instance, dist, color): mask_idx = instance[2] body_part = instance[3] img = instance[0] mask = instance[1] # img = overlay_mask_1(img, mask) img = cv2.resize(img, img_size) # img = add_border(img, color) ax.imshow(img) mask = cv2.resize(mask, dsize=(img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) # mask = add_border(mask, color) ax.imshow(mask, cmap='jet', vmin=0, vmax=1, alpha=0.5) ax.tick_params(axis='both', which='both', bottom=False, top=False, labelbottom=False, left=False, right=False, labelleft=False) ax.set_title("Id = {}\n{}".format(mask_idx, body_part)) ax.set_xlabel('Dist = {}'.format(dist)) for axis in ['top', 'bottom', 'left', 'right']: ax.spines[axis].set_color(color) ax.spines[axis].set_linewidth(4) ================================================ FILE: torchreid/utils/visualization/embeddings_projection.py ================================================ import cv2 import torch import numpy as np from torchreid.utils import Logger from torchreid.utils.engine_state import EngineState def visualize_embeddings(qf, gf, q_pids, g_pids, test_loader, dataset_name, qf_parts_visibility, gf_parts_visibility, mAP, rank1): query_dataset = test_loader['query'].dataset gallery_dataset = test_loader['gallery'].dataset # TODO 1000 identities and 5 samples per identity sample_size = 1000 q_embeddings, q_imgs, q_meta, q_idx_list = extract_samples(qf, query_dataset, sample_size) g_embeddings, g_imgs, g_meta, g_idx_list = extract_samples(gf, gallery_dataset, sample_size) embeddings = torch.cat([q_embeddings, g_embeddings], 0) imgs = torch.cat([q_imgs, g_imgs], 0) meta = q_meta + g_meta logger = Logger.current_logger() for body_part_idx in range(0, embeddings.shape[1]): logger.add_embeddings("{} query-gallery embeddings projection for {} with mAP {} and rank-1 {}".format(dataset_name, body_part_idx, mAP, rank1), embeddings[:, body_part_idx], meta, imgs, EngineState.current_engine_state().epoch) def extract_samples(features, dataset, sample_size): sample_size = min(sample_size, len(dataset)) remaining_idx = np.arange(0, len(dataset)) idx_list = np.random.choice(remaining_idx, replace=False, size=sample_size) embeddings = [] meta = [] imgs = [] for idx in idx_list: _, pid, camid, img_path, masks = dataset[idx] embeddings.append(features[idx, :, :]) img = cv2.imread(img_path) img = cv2.resize(img, (64, 64)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img / 255 img = torch.from_numpy(img) imgs.append(img) meta.append(str(pid)) embeddings = torch.stack(embeddings) imgs = torch.stack(imgs) imgs = imgs.permute(0, 3, 1, 2) return embeddings, imgs, meta, idx_list ================================================ FILE: torchreid/utils/visualization/feature_map_visualization.py ================================================ import math import os.path as osp import json import cv2 import numpy as np from PIL import Image from sklearn.decomposition import PCA import time import matplotlib.pyplot as plt import torch # from reid import datasets, models # from reid.utils.data import transforms as T # from reid.utils.data.preprocessor import Preprocessor # from reid.utils.osutils import set_paths # from reid.utils.serialization import load_checkpoint # from reid.models.CompactBilinearPooling_dsybaik import CompactBilinearPooling # Source: https://github.com/yuminsuh/part_bilinear_reid/blob/master/vis_featmap.ipynb from torchreid.utils import Writer, Logger from torchreid.utils.constants import PARTS def flatten(maps): flattened = np.transpose(maps, (1, 0, 2, 3)).reshape(maps.shape[1], int(maps.size / maps.shape[1])) return flattened def organize(flattened, num_map, feat_dim, h, w): maps = flattened.reshape(feat_dim, num_map, h, w) maps = np.transpose(maps, (1, 0, 2, 3)) return maps def feat_to_color(maps): num_img, dim_feat, h, w = maps.shape maps_flatten = flatten(maps) maps_flatten_reduced = PCA(n_components=3).fit_transform(maps_flatten.transpose()) maps_reduced = organize(maps_flatten_reduced.transpose(), num_img, 3, h, w) return maps_reduced def normalize_01(m): max_value = m.max() min_value = m.min() r = max_value - min_value n = (m - min_value) / r return n def mapwise_normalize(map1, feats): num_sample, num_channel, h, w = map1.shape # FIXME TOO MANY VALUES TO UNPACK WITH INDEITITy MASK # mode 1 # norms = np.linalg.norm(np.sqrt(np.sum(map1**2, axis=(2,3))), axis=1) # normalized = map1 # mode 2 norms = np.sqrt(np.linalg.norm(feats, axis=1)) normalized = map1 / np.reshape(norms, (num_sample, 1, 1, 1)) return normalized # TODO output big grid of n identities horizontally and m samples per identity vertically # TODO test without norm stuff # TODO refactor into beautiful code # TODO cannot apply PCA batch by batch, should be global def visualize_pca_multi(maps_all, feats, pids, tag): maps_all_reduced = [] for maps in maps_all[1:]: if len(maps.shape) == 4: maps_all_reduced.append(feat_to_color(mapwise_normalize(maps, feats))) num_person = maps_all_reduced[0].shape[0] num_samples = len(pids) num_samples_per_id = 0 for i, pid in enumerate(pids): if pid != pids[0]: num_samples_per_id = i break n_rows = num_samples_per_id n_cols = math.ceil(num_samples / num_samples_per_id) fig = plt.figure(figsize=(n_cols*4, n_rows*2), constrained_layout=False) outer_grid = fig.add_gridspec(n_rows, n_cols) # count = 0 # for row in range(4): # for col in range(5): # print("grid {}-{}".format(row, col)) # # gridspec inside gridspec # inner_grid = outer_grid[row, col].subgridspec(1, 3) # axs = inner_grid.subplots() # Create all subplots for the inner grid. # triplet = triplets[count] # pos, anc, neg, pos_dist, neg_dist = triplet # ax1, ax2, ax3 = axs[0], axs[1], axs[2] # show_instance(ax1, pos, pos_dist, green) # show_instance(ax2, anc, 0, black) # show_instance(ax3, neg, neg_dist, red) # count += 1 # for person_idx in range(min(num_person, num_vis)): person_idx = 0 for row in range(n_rows): for col in range(n_cols): if person_idx < num_person: # print(person_idx) # fig, ax = plt.subplots(1, len(maps_all)) # fig.set_size_inches((10,10)) inner_grid = outer_grid[row, col].subgridspec(1, 3) axs = inner_grid.subplots() # Create all subplots for the inner grid. for idx, maps_reduced in enumerate([maps_all[0]] + maps_all_reduced): axs[idx].imshow(normalize_01(np.transpose(maps_reduced[person_idx, ::-1, :, :], (1, 2, 0)))) axs[idx].set_axis_off() person_idx += 1 # plt.show() # plt.tight_layout() plt.tight_layout(pad=1.30, h_pad=1.6, w_pad=1.6) Logger.current_logger().add_figure("features_part_maps_{}".format(tag), fig, False) plt.close(fig) # fig.savefig(savepath.format(save_id, person_idx)) def display_feature_maps(embeddings_dict, spatial_features, body_part_masks, imgs_path, pids): # TODO call at test time: display top 10 ranking for 5 queries? Take 10 random pids with 1 samples in query and # find 9 corresponding samples with same pid in gallery # TODO put config # TODO fix with new model output writer = Writer.current_writer() def extract_images(imgs_path): imgs = [] for img_path in imgs_path: img = cv2.imread(img_path) img = cv2.resize(img, (128, 256)) # TODO size = config img = img / 255 imgs.append(img) imgs = np.asarray(imgs) imgs = np.transpose(imgs, axes=[0, 3, 1, 2]) return imgs if writer.engine_state.epoch == writer.engine_state.max_epoch-1 and writer.engine_state.batch < 10 and writer.cfg.test.vis_feature_maps: # TODO move away # TODO body_parts_features = embeddings_dict[PARTS] body_part_masks = torch.unsqueeze(body_part_masks, 2) # [N, M, 1, Hf, Wf] spatial_features = torch.unsqueeze(spatial_features, 1) # [N, 1, D, Hf, Wf] images_np = extract_images(imgs_path) if len(body_parts_features.shape) == 3: body_parts_features = body_parts_features.flatten(1, 2) body_parts_features_np = body_parts_features.squeeze().detach().cpu().numpy().copy() body_part_masks_np = body_part_masks.squeeze().detach().cpu().numpy().copy() spatial_features_np = spatial_features.squeeze().detach().cpu().numpy().copy() visualize_pca_multi([images_np, spatial_features_np, body_part_masks_np], body_parts_features_np, pids, "e_{}_b_{}".format(writer.engine_state.epoch, writer.engine_state.batch)) # def sel_random_target(target): # labels = np.unique([t[1] for t in target]) # labels = np.delete(labels, labels == 0) # sel_labels = np.random.choice(labels, 200, replace=False) # sel_target = [t for t in target if t[1] in sel_labels] # _, sel_target = map(list, zip(*sorted(zip([t[1] for t in sel_target], sel_target)))) # return sel_target # # """ Settings """ # exp_dir = "logs/market1501/d2_b250" # batch_size = 60 # args = json.load(open(osp.join(exp_dir, "args.json"), "r")) # set_paths('paths') # np.random.seed(0) # # np.random.seed(int(time.time())) # # savepath = 'vis_examples/{}_{}.png' # # """ Load dataset """ # dataset = datasets.create(args['dataset'], "data/{}".format(args['dataset'])) # test_transformer = T.Compose([ # T.RectScale(args['height'], args['width']), # T.CenterCrop((args['crop_height'], args['crop_width'])), # T.ToTensor(), # T.RGB_to_BGR(), # T.NormalizeBy(255), # ]) # target = sel_random_target(list(set(dataset.query) | set(dataset.gallery))) # test_loader = DataLoader( # Preprocessor(target, # root=dataset.images_dir, transform=test_transformer), # batch_size=batch_size, num_workers=args['workers'], # shuffle=False, pin_memory=True) # # """ Load model """ # model = models.create(args['arch'], features=args['features'], # dilation=args['dilation'], initialize=False).cuda() # model_weight = osp.join(exp_dir, 'epoch_750.pth.tar') # checkpoint = load_checkpoint(model_weight) # model.app_feat_extractor.load_state_dict(checkpoint['app_state_dict']) # model.part_feat_extractor.load_state_dict(checkpoint['part_state_dict']) # model.eval() # # """ Extract feature maps """ # num_test = len(target) # feat1, feat2, feat_out, h, w = 512, 128, 512, 20, 10 # pool = CompactBilinearPooling(feat1, feat2, feat_out, sum_pool=True) # # app_feats = np.zeros((num_test, feat1, h, w)) # part_feats = np.zeros((num_test, feat2, h, w)) # bilinear_feats = np.zeros((num_test, feat_out)) # target_imgs = np.zeros((num_test, 3, args['crop_height'], args['crop_width'])) # for i, (imgs, fnames, pids, _) in enumerate(test_loader): # app_feat = model.app_feat_extractor(imgs.cuda()) # part_feat = model.part_feat_extractor(imgs.cuda()) # bilinear_feat = pool(app_feat, part_feat) # # i_start = i * batch_size # i_end = min((i + 1) * batch_size, num_test) # app_feats[i_start:i_end, :, :, :] = app_feat.detach().cpu().numpy().copy() # part_feats[i_start:i_end, :, :, :] = part_feat.detach().cpu().numpy().copy() # bilinear_feats[i_start:i_end, :] = bilinear_feat.detach().cpu().numpy().copy() # target_imgs[i_start:i_end, :, :, :] = imgs.detach().cpu().numpy().copy() # # """ Visualize maps """ # num_vis = 200 # visualize_pca_multi([target_imgs[:num_vis], app_feats[:num_vis], part_feats[:num_vis]], bilinear_feats[:num_vis], # num_vis=num_vis, save_id="") ================================================ FILE: torchreid/utils/visualization/visualize_query_gallery_rankings.py ================================================ import ntpath import random import cv2 import matplotlib import numpy as np from torchreid.utils import Logger, perc from torchreid.utils.engine_state import EngineState GRID_SPACING_V = 100 GRID_SPACING_H = 100 QUERY_EXTRA_SPACING = 30 TOP_MARGIN = 350 LEFT_MARGIN = 150 RIGHT_MARGIN = 500 BOTTOM_MARGIN = 300 ROW_BACKGROUND_LEFT_MARGIN = 75 ROW_BACKGROUND_RIGHT_MARGIN = 75 LEFT_TEXT_OFFSET = 10 BW = 12 # border width GREEN = (0, 255, 0) RED = (0, 0, 255) BLUE = (255, 0, 0) YELLOW = (255, 255, 0) TEXT_FONT = cv2.FONT_HERSHEY_SIMPLEX TEXT_COLOR = (0, 0, 0) TEXT_LINE_TYPE = cv2.LINE_AA WIDTH = 128 HEIGHT = 256 cmap = matplotlib.cm.get_cmap('hsv') # TODO document and make code easier to read and adapt, i.e. less intricate def visualize_ranking_grid(distmat, body_parts_distmat, test_loader, dataset_name, qf_parts_visibility, gf_parts_visibility, q_parts_masks, g_parts_masks, mAP, rank1, save_dir, topk, visrank_q_idx_list, visrank_count, config=None, bp_idx=None): num_q, num_g = distmat.shape query_dataset = test_loader['query'].dataset gallery_dataset = test_loader['gallery'].dataset assert num_q == len(query_dataset) assert num_g == len(gallery_dataset) indices = np.argsort(distmat, axis=1) mask_filtering_flag = qf_parts_visibility is not None or gf_parts_visibility is not None if qf_parts_visibility is None: qf_parts_visibility = np.ones((num_q, body_parts_distmat.shape[0]), dtype=bool) if gf_parts_visibility is None: gf_parts_visibility = np.ones((num_g, body_parts_distmat.shape[0]), dtype=bool) n_missing = visrank_count - len(visrank_q_idx_list) if n_missing > 0: q_idx_list = visrank_q_idx_list remaining_idx = np.arange(0, num_q) q_idx_list = np.append(q_idx_list, np.random.choice(remaining_idx, replace=False, size=n_missing)) elif n_missing < 0: q_idx_list = np.array(visrank_q_idx_list[:visrank_count]) else: q_idx_list = np.array(visrank_q_idx_list) q_idx_list = q_idx_list.astype(int) print("visualize_ranking_grid for dataset {}, bp {} and ids {}".format(dataset_name, bp_idx, q_idx_list)) for q_idx in q_idx_list: if q_idx >= len(query_dataset): # FIXME this happen when using multiple target dataset with 'visrank_q_idx_list' provided for another dataset new_q_idx = random.randint(0, len(query_dataset)-1) print("Invalid query index {}, using random index {} instead".format(q_idx, new_q_idx)) q_idx = new_q_idx query = query_dataset[q_idx] qpid, qcamid, qimg_path = query['pid'], query['camid'], query['img_path'] qmasks = q_parts_masks[q_idx] if bp_idx is not None: qmasks = qmasks[bp_idx:bp_idx+1] query_sample = (q_idx, qpid, qcamid, qimg_path, qmasks, qf_parts_visibility[q_idx, :]) gallery_topk_samples = [] rank_idx = 1 for g_idx in indices[q_idx, :]: gallery = gallery_dataset[g_idx] gpid, gcamid, gimg_path = gallery['pid'], gallery['camid'], gallery['img_path'] invalid = test_loader['query'].dataset.gallery_filter(np.array(qpid), np.array(qcamid), None, np.array(gpid), np.array(gcamid), None).item() invalid = invalid or distmat[q_idx, g_idx] < 0 if not invalid: # matched = gpid == qpid gmasks = g_parts_masks[g_idx] if bp_idx is not None: gmasks = gmasks[bp_idx:bp_idx+1] gallery_sample = (g_idx, gpid, gcamid, gimg_path, gmasks, gf_parts_visibility[g_idx, :], qpid == gpid, distmat[q_idx, g_idx], body_parts_distmat[:, q_idx, g_idx]) gallery_topk_samples.append(gallery_sample) rank_idx += 1 if rank_idx > topk: break if len(gallery_topk_samples) > 0: show_ranking_grid(query_sample, gallery_topk_samples, mAP, rank1, dataset_name, config, mask_filtering_flag, bp_idx) else: print("Skip ranking plot of query id {} ({}), no valid gallery available".format(q_idx, qimg_path)) def show_ranking_grid(query_sample, gallery_topk_samples, mAP, rank1, dataset_name, config, mask_filtering_flag, bp_idx=None): qidx, qpid, qcamid, qimg_path, qmasks, qf_parts_visibility = query_sample topk = len(gallery_topk_samples) bp_num = len(qf_parts_visibility) num_cols = bp_num + 1 num_rows = topk + 1 grid_img = 255 * np.ones( ( num_rows * HEIGHT + (num_rows + 1) * GRID_SPACING_V + QUERY_EXTRA_SPACING + TOP_MARGIN + BOTTOM_MARGIN, num_cols * WIDTH + (num_cols + 1) * GRID_SPACING_H + QUERY_EXTRA_SPACING + LEFT_MARGIN + RIGHT_MARGIN, 3 ), dtype=np.uint8 ) samples = [query_sample] + gallery_topk_samples insert_background_line(grid_img, BLUE, 0, HEIGHT, 120, 0) insert_background_line(grid_img, BLUE, len(samples), HEIGHT, 0, -75) pos = (int(grid_img.shape[1]/2), 0) filtering_str = "body part filtering with threshold {}".format(config.model.bpbreid.masks.mask_filtering_threshold) if config.model.bpbreid.mask_filtering_testing else "no body part filtering" align_top_text(grid_img, "Ranking for dataset {}, {}, pid {}, mAP {:.2f}%, rank1 {:.2f}%, loss {}, {}".format(dataset_name, config.project.job_id, qpid, mAP * 100, rank1 * 100, config.loss.part_based.name, filtering_str), pos, 3.5, 7, 120) for row, sample in enumerate(samples): display_sample_on_row(grid_img, sample, row, (WIDTH, HEIGHT), mask_filtering_flag, qf_parts_visibility) for col in range(1, num_cols): parts_visibility_count = 0 row = topk+1 bp_idx = col - 1 distances = [] for i, sample in enumerate(samples): if i == 0: idx, pid, camid, img_path, masks, parts_visibility = sample else: idx, pid, camid, img_path, masks, parts_visibility, matched, dist_to_query, body_parts_dist_to_query = sample distances.append(body_parts_dist_to_query[bp_idx]) parts_visibility_count += parts_visibility[bp_idx] distances = np.asarray(distances) min = distances.min() max = distances.max() mean = distances.mean() pos = (col * WIDTH + int(WIDTH / 2) + (col + 1) * GRID_SPACING_H + QUERY_EXTRA_SPACING + LEFT_MARGIN, (row) * HEIGHT + int(HEIGHT / 2) + (row + 1) * GRID_SPACING_V + QUERY_EXTRA_SPACING + TOP_MARGIN) align_top_multi_text(grid_img, "Bp={}/{}\nMin={:.1f}\nMean={:.1f}\nMax={:.1f}".format( parts_visibility_count, topk + 1, min, mean, max), pos, 1, 2, 60) if bp_idx is not None: filename = "_{}_{}_qidx_{}_qpid_{}_{}_part_{}.jpg".format(config.project.job_id, dataset_name, qidx, qpid, ntpath.basename(qimg_path), bp_idx) else: filename = "_{}_{}_qidx_{}_qpid_{}_{}.jpg".format(config.project.job_id, dataset_name, qidx, qpid, ntpath.basename(qimg_path)) # path = os.path.join(save_dir, filename) # Path(os.path.dirname(path)).mkdir(parents=True, exist_ok=True) # cv2.imwrite(path, grid_img) Logger.current_logger().add_image("Ranking grid", filename, cv2.cvtColor(grid_img, cv2.COLOR_BGR2RGB), EngineState.current_engine_state().epoch) def insert_background_line(grid_img, match_color, row, height, padding_top=0, padding_bottom=0): alpha = 0.1 color = (255 * (1-alpha) + match_color[0] * alpha, 255 * (1-alpha) + match_color[1] * alpha, 255 * (1-alpha) + match_color[2] * alpha) hs = row * height + (row + 1) * GRID_SPACING_V + QUERY_EXTRA_SPACING + TOP_MARGIN - int(GRID_SPACING_V/2) + 15 - padding_top he = (row + 1) * height + (row + 1) * GRID_SPACING_V + QUERY_EXTRA_SPACING + TOP_MARGIN + int(GRID_SPACING_V/2) + 15 + padding_bottom ws = ROW_BACKGROUND_LEFT_MARGIN we = grid_img.shape[1] - ROW_BACKGROUND_RIGHT_MARGIN grid_img[hs:he, ws:we, :] = color def display_sample_on_row(grid_img, sample, row, img_shape, mask_filtering_flag, q_parts_visibility): if row == 0: idx, pid, camid, img_path, masks, parts_visibility = sample matched, dist_to_query, body_parts_dist_to_query = None, None, None else: idx, pid, camid, img_path, masks, parts_visibility, matched, dist_to_query, body_parts_dist_to_query = sample masks = masks.numpy() width, height = img_shape bp_num = masks.shape[0] img = cv2.imread(img_path) img = cv2.resize(img, (width, height)) for col in range(0, bp_num + 1): bp_idx = col - 1 if row == 0 and col == 0: img_to_insert = img img_to_insert = make_border(img_to_insert, BLUE, BW) pos = ((bp_num + 1) * width + (bp_num + 2) * GRID_SPACING_H + QUERY_EXTRA_SPACING + LEFT_MARGIN, row * height + int(height / 2) + (row + 1) * GRID_SPACING_V + TOP_MARGIN) align_left_multitext(grid_img, "*Id = {}*\n" "Visible = {}/{}".format( pid, parts_visibility.sum(), bp_num), pos, 1.1, 2, 15) elif col == 0: match_color = GREEN if matched else RED insert_background_line(grid_img, match_color, row, height) img_to_insert = make_border(img, match_color, BW) pos = (LEFT_MARGIN + GRID_SPACING_H, row * height + int(height / 2) + (row + 1) * GRID_SPACING_V + QUERY_EXTRA_SPACING + TOP_MARGIN) align_right_text(grid_img, str(row), pos, 3, 6, 30) pos = (LEFT_MARGIN + GRID_SPACING_H + int(width / 2), (row + 1) * height + (row + 1) * GRID_SPACING_V + QUERY_EXTRA_SPACING + TOP_MARGIN) g_to_q_vis_score = np.sqrt(q_parts_visibility * parts_visibility).sum() / bp_num align_top_text(grid_img, "{}% | {:.2f}".format(int(perc(g_to_q_vis_score, 0)), dist_to_query), pos, 1.2, 2, 10) pos = ((bp_num + 1) * width + (bp_num + 2) * GRID_SPACING_H + QUERY_EXTRA_SPACING + LEFT_MARGIN, row * height + int(height / 2) + (row + 1) * GRID_SPACING_V + QUERY_EXTRA_SPACING + TOP_MARGIN) if len(parts_visibility) == 1 or parts_visibility.sum() == 0: valid_body_parts_dist = body_parts_dist_to_query else: valid_body_parts_dist = body_parts_dist_to_query[parts_visibility > 0] align_left_multitext(grid_img, "*Id = {}*\n" "Idx = {}\n" "Cam id = {}\n" "Name = {}\n" "Bp Visibles = {}/{}\n" "[{:.2f}; {:.2f}; {:.2f}]\n" "[{:.2f}; {:.2f}; {:.2f}]".format( pid, idx, camid, ntpath.basename(img_path), (parts_visibility > 0).sum(), bp_num, body_parts_dist_to_query.min(), body_parts_dist_to_query.mean(), body_parts_dist_to_query.max(), valid_body_parts_dist.min(), valid_body_parts_dist.mean(), valid_body_parts_dist.max()), pos, 1, 2, 15, match_color) else: if row == 0: pos = (col * width + int(width / 2) + (col + 1) * GRID_SPACING_H + QUERY_EXTRA_SPACING + LEFT_MARGIN, TOP_MARGIN + GRID_SPACING_V) align_bottom_text(grid_img, str(bp_idx), pos, 2, 5, 35) pos = (col * width + int(width / 2) + (col + 1) * GRID_SPACING_H + QUERY_EXTRA_SPACING + LEFT_MARGIN, (row + 1) * height + (row + 1) * GRID_SPACING_V + TOP_MARGIN) align_top_text(grid_img, "{}%".format(int(perc(parts_visibility[bp_idx], 0))), pos, 0.9, 2, 10) if row != 0: pos = (col * width + int(width / 2) + (col + 1) * GRID_SPACING_H + QUERY_EXTRA_SPACING + LEFT_MARGIN, (row + 1) * height + (row + 1) * GRID_SPACING_V + QUERY_EXTRA_SPACING + TOP_MARGIN) thickness = 3 if body_parts_dist_to_query.argmax() == bp_idx or body_parts_dist_to_query.argmin() == bp_idx else 2 align_top_text(grid_img, "{}% | {:.2f}".format(int(perc(parts_visibility[bp_idx], 0)), body_parts_dist_to_query[bp_idx]), pos, 0.9, thickness, 10) mask = masks[bp_idx, :, :] img_with_mask_overlay = mask_overlay(img, mask, interpolation=cv2.INTER_CUBIC) if mask_filtering_flag: # match_color = GREEN if parts_visibility[bp_idx] else RED match_color = cmap(parts_visibility[bp_idx].item()/3, bytes=True)[0:-1] # divided by three because hsv colormap goes from red to green inside [0, 0.333] img_to_insert = make_border(img_with_mask_overlay, (int(match_color[2]), int(match_color[1]), int(match_color[0])), BW) else: img_to_insert = img_with_mask_overlay insert_img_into_grid(grid_img, img_to_insert, row, col) def mask_overlay(img, mask, clip=True, interpolation=cv2.INTER_NEAREST): width, height = img.shape[1], img.shape[0] mask = cv2.resize(mask, dsize=(width, height), interpolation=interpolation) if clip: mask = np.clip(mask, 0, 1) mask = (mask * 255).astype(np.uint8) else: mask = np.interp(mask, (mask.min(), mask.max()), (0, 255)).astype(np.uint8) mask_color = cv2.applyColorMap(mask, cv2.COLORMAP_JET) masked_img = cv2.addWeighted(img, 0.5, mask_color.astype(img.dtype), 0.5, 0) return masked_img def align_top_text(img, text, pos, fontScale=1.0, thickness=1, padding=4): textsize = cv2.getTextSize(text, TEXT_FONT, fontScale, thickness)[0] textX = int(pos[0] - (textsize[0] / 2)) textY = pos[1] + textsize[1] + padding cv2.putText(img, text, (textX, textY), TEXT_FONT, fontScale=fontScale, color=TEXT_COLOR, thickness=thickness, lineType=TEXT_LINE_TYPE) def align_top_multi_text(img, text, pos, fontScale=1.0, thickness=1, padding=4, text_color=(0, 0, 0)): v_padding = 20 text_lines = text.split('\n') text_line_height = cv2.getTextSize(text_lines[0], TEXT_FONT, fontScale, thickness)[0][1] text_height = len(text_lines) * text_line_height + (len(text_lines)-1) * v_padding textY = int(pos[1] - text_height + text_line_height) + padding for i, text_line in enumerate(text_lines): bold_marker = "*" bold = text_line.startswith(bold_marker) and text_line.endswith(bold_marker) line_thickness = thickness+1 if bold else thickness if bold: text_line = text_line[len(bold_marker):len(text_line)-len(bold_marker)] textsize = cv2.getTextSize(text_line, TEXT_FONT, fontScale, thickness)[0] text_line_pos = (int(pos[0] - (textsize[0] / 2)), textY + (text_line_height + v_padding) * i) text_color = text_color if i == 0 else TEXT_COLOR cv2.putText(img, text_line, text_line_pos, TEXT_FONT, fontScale=fontScale, color=TEXT_COLOR, thickness=line_thickness, lineType=TEXT_LINE_TYPE) def align_bottom_text(img, text, pos, fontScale=1.0, thickness=1, padding=4): textsize = cv2.getTextSize(text, TEXT_FONT, fontScale, thickness)[0] textX = int(pos[0] - (textsize[0] / 2)) textY = pos[1] - padding cv2.putText(img, text, (textX, textY), TEXT_FONT, fontScale=fontScale, color=TEXT_COLOR, thickness=thickness, lineType=TEXT_LINE_TYPE) def align_right_text(img, text, pos, fontScale=1.0, thickness=1, padding=4): textsize = cv2.getTextSize(text, TEXT_FONT, fontScale, thickness)[0] textX = pos[0] - textsize[0] - padding textY = int(pos[1] + (textsize[1] / 2)) cv2.putText(img, text, (textX, textY), TEXT_FONT, fontScale=fontScale, color=TEXT_COLOR, thickness=thickness, lineType=TEXT_LINE_TYPE) def align_left_multitext(img, text, pos, fontScale=1.0, thickness=1, padding=4, text_color=(0, 0, 0)): v_padding = 20 text_lines = text.split('\n') text_line_height = cv2.getTextSize(text_lines[0], TEXT_FONT, fontScale, thickness)[0][1] text_height = len(text_lines) * text_line_height + (len(text_lines)-1) * v_padding textX = pos[0] + padding textY = int(pos[1] - (text_height / 2) + text_line_height) for i, text_line in enumerate(text_lines): bold_marker = "*" bold = text_line.startswith(bold_marker) and text_line.endswith(bold_marker) line_thickness = thickness+1 if bold else thickness if bold: text_line = text_line[len(bold_marker):len(text_line)-len(bold_marker)] pos = (textX, textY + (text_line_height + v_padding) * i) text_color = text_color if i == 0 else TEXT_COLOR cv2.putText(img, text_line, pos, TEXT_FONT, fontScale=fontScale, color=text_color, thickness=line_thickness, lineType=TEXT_LINE_TYPE) def centered_text(img, text, pos, fontScale=1, thickness=1): textsize = cv2.getTextSize(text, TEXT_FONT, fontScale, thickness)[0] textX = int(pos[0] - (textsize[0] / 2)) textY = int(pos[1] + (textsize[1] / 2)) cv2.putText(img, text, (textX, textY), TEXT_FONT, fontScale=fontScale, color=TEXT_COLOR, thickness=thickness, lineType=TEXT_LINE_TYPE) def insert_img_into_grid(grid_img, img, row, col): extra_spacing_h = QUERY_EXTRA_SPACING if row > 0 else 0 extra_spacing_w = QUERY_EXTRA_SPACING if col > 0 else 0 width, height = img.shape[1], img.shape[0] hs = row * height + (row + 1) * GRID_SPACING_V + extra_spacing_h + TOP_MARGIN he = (row + 1) * height + (row + 1) * GRID_SPACING_V + extra_spacing_h + TOP_MARGIN ws = col * width + (col + 1) * GRID_SPACING_H + extra_spacing_w + LEFT_MARGIN we = (col + 1) * width + (col + 1) * GRID_SPACING_H + extra_spacing_w + LEFT_MARGIN grid_img[hs:he, ws:we, :] = img def make_border(img, border_color, bw): img_b = cv2.copyMakeBorder( img, bw, bw, bw, bw, cv2.BORDER_CONSTANT, value=border_color ) img_b = cv2.resize(img_b, (img.shape[1], img.shape[0])) return img_b ##################################### # Matplotlib version - too slow # ##################################### # GRID_SPACING = 20 # QUERY_EXTRA_SPACING = 60 # BW = 12 # border width # GREEN = (0, 255, 0) # RED = (0, 0, 255) # BLUE = (255, 0, 0) # YELLOW = (255,255,0) # FONT = cv2.FONT_HERSHEY_SIMPLEX # TEXT_COLOR = (0, 0, 0) # # width = 128 # # height = 256 # # # def mask_overlay(img, mask, clip=True): # width, height = img.shape[1], img.shape[0] # mask = cv2.resize(mask, dsize=(width, height), interpolation=cv2.INTER_CUBIC) # if clip: # mask = np.clip(mask, 0, 1) # mask = (mask*255).astype(np.uint8) # else: # mask = np.interp(mask, (mask.min(), mask.max()), (0, 255)).astype(np.uint8) # mask_color = cv2.applyColorMap(mask, cv2.COLORMAP_JET) # mask_color = cv2.cvtColor(mask_color, cv2.COLOR_BGR2RGB) # masked_img = cv2.addWeighted(img, 0.5, mask_color, 0.5, 0) # return masked_img # # # def show_ranking_grid(query_sample, gallery_topk_samples, config, osp=None): # width = 128 # height = 256 # samples = [query_sample] + gallery_topk_samples # # print('start {}'.format(time.time())) # # plt.close('all') # fig = plt.figure(figsize=(100, 66), constrained_layout=True) # outer_grid = fig.add_gridspec(len(samples), 1) # # outer_grid = plt.GridSpec(len(samples), 1, wspace=1, hspace=1) # # for row, sample in enumerate(samples): # print('row {} {}'.format(row, time.time())) # display_sample_on_row(outer_grid[row, 0], sample, row, (width, height)) # # # plt.savefig('/Users/vladimirsomers/Downloads/test_ranking_viz_matplotlib/test_grid_viz_plt_{}.pdf'.format(int(time.time())), format='pdf') # print('savefig {}'.format(time.time())) # plt.savefig('/Users/vladimirsomers/Downloads/test_ranking_viz_matplotlib/test_grid_viz_plt_{}.jpg'.format(int(time.time())), format='jpg') # print('end {}'.format(time.time())) # plt.close('all') # # plt.show() # # plt.waitforbuttonpress() # # # def display_sample_on_row(subplot, sample, row, img_shape): # if row == 0: # pid, camid, img_path, masks_path, parts_visibility = sample # matched, dist_to_query, body_parts_dist_to_query = None, None, None # else: # pid, camid, img_path, masks_path, parts_visibility, matched, dist_to_query, body_parts_dist_to_query = sample # # width, height = img_shape # masks = read_masks(masks_path) # bp_num = masks.shape[0] # img = cv2.imread(img_path) # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # img = cv2.resize(img, (width, height)) # cols = bp_num+1 # # inner_grid = subplot.subgridspec(1, cols) # axs = inner_grid.subplots() # # plt.subplots_adjust(right=0.8) # # for col in range(0, cols): # if row == 0 and col == 0: # img_to_insert = img # elif col == 0: # border_color = GREEN if matched else RED # img_to_insert = make_border(img, border_color, BW) # else: # bp_idx = col - 1 # border_color = GREEN if parts_visibility[bp_idx] else RED # mask = masks[bp_idx, :, :] # img_with_mask_overlay = mask_overlay(img, mask) # img_to_insert = make_border(img_with_mask_overlay, border_color, BW) # # ax = axs[col] # ax.imshow(img_to_insert) # ax.tick_params(axis='both', which='both', bottom=False, top=False, labelbottom=False, left=False, right=False, # labelleft=False) # # ax.set_title("Id = {}\n{}".format(mask_idx, body_part)) # # ax.set_xlabel('Dist = {}'.format(dist)) # # for axis in ['top', 'bottom', 'left', 'right']: # # ax.spines[axis].set_color(color) # # ax.spines[axis].set_linewidth(4) # # # def insert_img_into_grid(grid_img, img, row, col): # extra_spacing_h = QUERY_EXTRA_SPACING if row > 0 else GRID_SPACING # extra_spacing_w = QUERY_EXTRA_SPACING if col > 0 else GRID_SPACING # width, height = img.shape[1], img.shape[0] # hs = (row) * height + row * GRID_SPACING + extra_spacing_h # he = (row + 1) * height + row * GRID_SPACING + extra_spacing_h # ws = (col) * width + col * GRID_SPACING + extra_spacing_w # we = (col + 1) * width + col * GRID_SPACING + extra_spacing_w # grid_img[hs:he, ws:we, :] = img # # # def make_border(img, border_color, bw): # img_b = cv2.copyMakeBorder( # img, # bw, bw, bw, bw, # cv2.BORDER_CONSTANT, # value=border_color # ) # img_b = cv2.resize(img_b, (img.shape[1], img.shape[0])) # return img_b ================================================ FILE: torchreid/utils/writer.py ================================================ import datetime import os from typing import Optional import matplotlib.pyplot as plt import numpy as np import torch from tabulate import tabulate from . import Logger, visualize_ranking_grid from .avgmeter import TorchTimeMeter, SingleMeter, \ EpochMeter, EpochArrayMeter, LossEpochMetricsMeter from .distribution import plot_body_parts_pairs_distance_distribution, plot_pairs_distance_distribution from .engine_state import EngineStateListener from .tools import perc from .visualization.embeddings_projection import visualize_embeddings class Writer(EngineStateListener): # TODO Integrate this with Pytorch Lightning """ A class to encapsulate external loggers and writers such as Tensorboard and Allegro ClearML """ __main_writer = None # type: Optional[Writer] @classmethod def current_writer(cls): # type: () -> Writer return cls.__main_writer def __init__(self, cfg): self.cfg = cfg self.model_name = cfg.project.start_time + cfg.project.experiment_id self.logger = Logger.current_logger() # running state self.is_training = True self.batch_debug_freq = cfg.train.batch_debug_freq # configs self.start_eval = cfg.test.start_eval self.eval_freq = cfg.train.eval_freq self.max_epoch = cfg.train.max_epoch # init time meters self.total_run_timer = TorchTimeMeter("total run time", False) self.test_timer = TorchTimeMeter("multi_target_test", False) self.epoch_timer = TorchTimeMeter("epoch", False) self.batch_timer = TorchTimeMeter("batch") self.data_loading_timer = TorchTimeMeter("data_loading", False) self.test_batch_timer = TorchTimeMeter("test_batch") self.performance_evaluation_timer = TorchTimeMeter("performance_evaluation", False) self.feature_extraction_timer = TorchTimeMeter("feature_extraction") self.loss_timer = TorchTimeMeter("loss_computation") self.optimizer_timer = TorchTimeMeter("optimizer_step") Writer.__main_writer = self def init_engine_state(self, engine_state, parts_num): self.engine_state = engine_state # init meters self.invalid_pairwise_distances_count = EpochMeter(self.engine_state) self.uncomparable_body_parts_pairs_count = EpochMeter(self.engine_state) self.invalid_pairs_count_at_test_time = SingleMeter(self.engine_state) self.uncomparable_queries_at_test_time = SingleMeter(self.engine_state) self.used_body_parts_in_max = EpochArrayMeter(self.engine_state, parts_num) self.losses = LossEpochMetricsMeter(engine_state) self.loss = EpochMeter(engine_state) # writer should be the last listener to be called self.engine_state.add_listener(self, True) ######################## # TRAINING STATS # ######################## def report_performance(self, cmc, mAP, ssmd, pxl_acc_avg, name=""): self.logger.add_scalar('r1 {}'.format(name), perc(cmc[0]), self.engine_state.epoch) self.logger.add_scalar('r5 {}'.format(name), perc(cmc[1]), self.engine_state.epoch) self.logger.add_scalar('r10 {}'.format(name), perc(cmc[2]), self.engine_state.epoch) self.logger.add_scalar('r20 {}'.format(name), perc(cmc[3]), self.engine_state.epoch) self.logger.add_scalar('mAP {}'.format(name), perc(mAP), self.engine_state.epoch) self.logger.add_scalar('ssmd {}'.format(name), ssmd, self.engine_state.epoch) self.logger.add_scalar('pxl_acc {}'.format(name), pxl_acc_avg, self.engine_state.epoch) def report_global_performance(self, cmc_per_dataset, mAP_per_dataset, ssmd_per_dataset, pxl_acc_per_dataset): self.logger.add_text('r1_global', str(cmc_per_dataset[0])) self.logger.add_text('r5_global', str(cmc_per_dataset[1])) self.logger.add_text('r10_global', str(cmc_per_dataset[2])) self.logger.add_text('r20_global', str(cmc_per_dataset[3])) self.logger.add_text('mAP_global', str(mAP_per_dataset)) self.logger.add_text('ssmd_global', str(ssmd_per_dataset)) self.logger.add_text('pxl_acc_global', str(pxl_acc_per_dataset)) def intermediate_evaluate(self): return (self.engine_state.epoch + 1) >= self.start_eval and self.eval_freq > 0 and ( self.engine_state.epoch + 1) % self.eval_freq == 0 and (self.engine_state.epoch + 1) != self.max_epoch def update_invalid_pairwise_distances_count(self, batch_pairwise_dist): self.invalid_pairwise_distances_count.update((batch_pairwise_dist == float(-1)).sum(), batch_pairwise_dist.nelement()) def update_invalid_part_based_pairwise_distances_count(self, valid_body_part_pairwise_dist_mask): self.uncomparable_body_parts_pairs_count.update((valid_body_part_pairwise_dist_mask.nelement() - valid_body_part_pairwise_dist_mask.sum()), valid_body_part_pairwise_dist_mask.nelement()) def used_parts_statistics(self, M, body_part_id): # count apparition of each body part id, remove diagonal as we don't consider pairs with same id used_body_parts_count = torch.bincount(body_part_id.flatten(), minlength=M) - torch.bincount( body_part_id.diag(), minlength=M) used_body_parts_count = used_body_parts_count / 2 # body parts are counted two times since matrix is symmetric self.used_body_parts_in_max.update(used_body_parts_count, np.ones(len(used_body_parts_count))*used_body_parts_count.sum().item()) # def plot_batch_distance_distribution(self, batch_pairwise_dist, labels): # TODO report each epoch and not each batch for wandb performance issue # if self.batch_debug_freq > 0 and (self.engine_state.global_step + 1) % self.batch_debug_freq == 0: # batch_pairwise_dist = batch_pairwise_dist.detach().cpu().numpy() # labels = labels.detach().cpu().numpy() # if len(batch_pairwise_dist.shape) == 3: # ssmd = plot_body_parts_pairs_distance_distribution( # batch_pairwise_dist, labels, labels, "Training batch") # self.logger.add_scalar("SSMD/training batch ssmd", ssmd, self.engine_state.global_step) # else: # pos_p_mean, pos_p_std, neg_p_mean, neg_p_std, ssmd = plot_pairs_distance_distribution( # batch_pairwise_dist, labels, labels, "Training batch") # self.logger.add_scalar("SSMD/training batch ssmd", ssmd, self.engine_state.global_step) # TODO batch plot # def report_body_parts_mean_distances(self, distances): # TODO report each epoch and not each batch for wandb performance issue # mean_distance_per_body_part = distances.mean(dim=(1, 2)) # for bp_id, count in enumerate(mean_distance_per_body_part): # self.logger.add_scalar("Body parts mean distances/bp_{}".format(bp_id), count, self.engine_state.global_step) def visualize_triplets(self, images, masks, mask, dist): if self.batch_debug_freq > 0 and (self.engine_state.global_step + 1) % self.batch_debug_freq == 0: pass # TODO # np_mask = mask.clone().detach().cpu().numpy() # np_dist = dist.clone().detach().cpu().numpy() # dist_ap, dist_an = [], [] # triplets = [] # for i in range(20): # print("Computing triplet {}".format(i)) # pos_d = np_dist[i]*(np_mask[i]) # neg_d = np_dist[i]*(np_mask[i] == 0) # pos_idx = pos_d.argmax() # neg_idx = neg_d.argmax() # # # instance = (image, masks, id, body_part_id, body_part_name) # pos_img = cv2.imread(images[pos_idx]) # pos_img = cv2.cvtColor(pos_img, cv2.COLOR_BGR2RGB) # anc_img = cv2.imread(images[i]) # anc_img = cv2.cvtColor(anc_img, cv2.COLOR_BGR2RGB) # neg_img = cv2.imread(images[neg_idx]) # neg_img = cv2.cvtColor(neg_img, cv2.COLOR_BGR2RGB) # # pos = (pos_img, masks[pos_idx][bp], pos_idx, bp) # anc = (anc_img, masks[i][bp], i, bp) # neg = (neg_img, masks[neg_idx][bp], neg_idx, bp) # # # pos, anc, neg, pos_dist, neg_dist = triplet # triplet = [pos, anc, neg, pos_d[pos_idx], neg_d[neg_idx]] # # triplets.append(triplet) # # triplets = np.repeat(np.array(triplets), 5, axis=0) # show_triplet_grid(triplets, self, bp) ######################## # TESTING STATS # ######################## def qg_pairwise_dist_statistics(self, pairwise_dist, body_part_pairwise_dist, qf_parts_visibility, gf_parts_visibility): valid_pairwise_dist_mask = (pairwise_dist != float(-1)) invalid_pairs_count = (~valid_pairwise_dist_mask).sum() self.invalid_pairs_count_at_test_time.update(invalid_pairs_count, valid_pairwise_dist_mask.nelement()) uncomparable_queries_count = (~valid_pairwise_dist_mask.max(dim=1)[0]).sum() self.uncomparable_queries_at_test_time.update(uncomparable_queries_count, valid_pairwise_dist_mask.shape[0]) part_pairwise_dist_numpy = body_part_pairwise_dist.numpy() self.qg_body_part_distances_boxplot(part_pairwise_dist_numpy) self.qg_body_part_pairs_availability_barplot(part_pairwise_dist_numpy) if qf_parts_visibility is not None and gf_parts_visibility is not None: qf_parts_visibility = qf_parts_visibility.numpy() gf_parts_visibility = gf_parts_visibility.numpy() self.qg_body_part_availability_barplot(qf_parts_visibility, gf_parts_visibility) self.qg_distribution_of_body_part_availability_histogram(qf_parts_visibility, gf_parts_visibility) def qg_body_part_distances_boxplot(self, body_part_pairwise_dist): histogram = body_part_pairwise_dist.reshape(body_part_pairwise_dist.shape[:-2] + (-1,)).transpose() idx_to_keep = np.random.choice(histogram.shape[0], min(2000, histogram.shape[0]), replace=False) idx_to_keep = np.concatenate([idx_to_keep, np.argmax(histogram, axis=0), np.argmin(histogram, axis=0)]) sampled_histogram = histogram[idx_to_keep] valid_distances_histogram = [sampled_histogram[sampled_histogram[:, i] != -1, i] for i in range(0, sampled_histogram.shape[1])] fig, ax = plt.subplots(figsize=(24, 4)) ax.boxplot(valid_distances_histogram, notch=True, widths=0.35, labels=range(0, body_part_pairwise_dist.shape[0])) ax.set_ylabel('Distance') ax.set_xlabel('Body part index') ax.set_title('Distance distribution of query-gallery body part pairs') fig.tight_layout() self.logger.add_figure("Query-gallery body part distances boxplot", fig, self.engine_state.global_step) def qg_body_part_pairs_availability_barplot(self, body_part_pairwise_dist): body_part_pairs_availability = (body_part_pairwise_dist != -1).mean(axis=(1, 2)) x_labels = range(0, len(body_part_pairs_availability)) x = np.arange(len(x_labels)) # the label locations width = 0.7 # the width of the bars fig, ax = plt.subplots(figsize=(24, 4)) rects = ax.bar(x, body_part_pairs_availability, width) # Add some text for x_labels, title and custom x-axis tick x_labels, etc. ax.set_ylabel('Availability') ax.set_xlabel('Body part index') ax.set_title('Query-gallery body parts pairs availability') ax.set_yticks(np.arange(0, 1.2, 0.1)) ax.yaxis.get_major_ticks()[-1].set_visible(False) ax.set_xticks(x) ax.set_xticklabels(x_labels) def autolabel(rects): """Attach a text label above each bar in *rects*, displaying its height.""" for rect in rects: height = rect.get_height() ax.annotate('{}%'.format(int(height*100)), xy=(rect.get_x() + rect.get_width() / 2, height), xytext=(0, 3), # 3 points vertical offset textcoords="offset points", ha='center', va='bottom') autolabel(rects) fig.tight_layout() self.logger.add_figure("Query-gallery body part pairs availability barplot", fig, self.engine_state.global_step) def qg_body_part_availability_barplot(self, qf_parts_visibility, gf_parts_visibility): qf_mask_availability = qf_parts_visibility.mean(axis=0) gf_mask_availability = gf_parts_visibility.mean(axis=0) x_labels = range(0, len(qf_mask_availability)) x = np.arange(len(x_labels)) # the label locations width = 0.35 # the width of the bars fig, ax = plt.subplots(figsize=(24, 4)) rects1 = ax.bar(x - width / 2, qf_mask_availability, width, label='Query') rects2 = ax.bar(x + width / 2, gf_mask_availability, width, label='Gallery') # Add some text for x_labels, title and custom x-axis tick x_labels, etc. ax.set_ylabel('Availability') ax.set_xlabel('Body part index') ax.set_title('Body parts availability for {} query and {} gallery samples'.format(qf_parts_visibility.shape[0], gf_parts_visibility.shape[0])) ax.set_yticks(np.arange(0, 1.2, 0.1)) ax.yaxis.get_major_ticks()[-1].set_visible(False) ax.set_xticks(x) ax.set_xticklabels(x_labels) ax.legend() def autolabel(rects): """Attach a text label above each bar in *rects*, displaying its height.""" for rect in rects: height = rect.get_height() ax.annotate('{}%'.format(int(height*100)), xy=(rect.get_x() + rect.get_width() / 2, height), xytext=(0, 3), # 3 points vertical offset textcoords="offset points", ha='center', va='bottom') # autolabel(rects1) # autolabel(rects2) fig.tight_layout() self.logger.add_figure("Query-gallery body part availability barplot", fig, self.engine_state.global_step) def qg_distribution_of_body_part_availability_histogram(self, qf_parts_visibility, gf_parts_visibility): qf_mask_availability = qf_parts_visibility.sum(axis=1) gf_mask_availability = gf_parts_visibility.sum(axis=1) x = np.arange(gf_parts_visibility.shape[1]+2) qf_mask_availability_distribution = np.histogram(qf_mask_availability, bins=x)[0]/len(qf_parts_visibility) gf_mask_availability_distribution = np.histogram(gf_mask_availability, bins=x)[0]/len(gf_parts_visibility) x_labels = np.arange(gf_parts_visibility.shape[1]+1) width = 0.35 # the width of the bars fig, ax = plt.subplots(figsize=(24, 4)) ax.bar(x_labels - width / 2, qf_mask_availability_distribution, width, label='Query') ax.bar(x_labels + width / 2, gf_mask_availability_distribution, width, label='Gallery') # Add some text for x_labels, title and custom x-axis tick x_labels, etc. ax.set_ylabel('Samples count') ax.set_xlabel('Amount of body parts available') ax.set_title('Body parts availability distribution for {} query and {} gallery samples'.format(qf_parts_visibility.shape[0], gf_parts_visibility.shape[0])) # ax.set_yticks(np.arange(0, 1.2, 0.1)) ax.yaxis.get_major_ticks()[-1].set_visible(False) ax.set_xticks(x_labels) ax.set_xticklabels(x_labels) ax.legend() fig.tight_layout() self.logger.add_figure("Query-gallery distribution of body part availability histogram", fig, self.engine_state.global_step) def visualize_embeddings(self, qf, gf, q_pids, g_pids, test_loader, dataset_name, qf_parts_visibility, gf_parts_visibility, mAP, rank1): if self.cfg.test.vis_embedding_projection and not self.engine_state.is_training: print("Visualizing embeddings projection") visualize_embeddings(qf, gf, q_pids, g_pids, test_loader, dataset_name, qf_parts_visibility, gf_parts_visibility, mAP, rank1) def visualize_rank(self, test_loader, dataset_name, distmat, save_dir, visrank_topk, visrank_q_idx_list, visrank_count, body_parts_distmat, qf_parts_visibility, gf_parts_visibility, q_parts_masks, g_parts_masks, mAP, rank1): if self.cfg.test.visrank: save_dir = os.path.join(save_dir, 'vis_bp_rank_' + dataset_name) visualize_ranking_grid(distmat, body_parts_distmat, test_loader, dataset_name, qf_parts_visibility, gf_parts_visibility, q_parts_masks, g_parts_masks, mAP, rank1, save_dir, visrank_topk, visrank_q_idx_list, visrank_count, config=self.cfg) if self.cfg.test.visrank_per_body_part: for bp in range(0, body_parts_distmat.shape[0]): qf_part_visibility = None if qf_parts_visibility is not None: qf_part_visibility = qf_parts_visibility[:, bp:bp+1] gf_part_visibility = None if gf_parts_visibility is not None: gf_part_visibility = gf_parts_visibility[:, bp:bp+1] visualize_ranking_grid(body_parts_distmat[bp], body_parts_distmat[bp:bp+1], test_loader, dataset_name, qf_part_visibility, gf_part_visibility, q_parts_masks, g_parts_masks, mAP, rank1, save_dir, visrank_topk, visrank_q_idx_list, visrank_count, config=self.cfg, bp_idx=bp) ######################## # RUNNING EVENTS # ######################## def training_started(self): self.report_performance([0, 0, 0, 0], 0, 0, 0) def epoch_started(self): self.logger.add_scalar('Other/epoch', self.engine_state.epoch, self.engine_state.global_step) self.logger.add_scalar('Other/batch', self.engine_state.batch, self.engine_state.global_step) self.logger.add_scalar('Other/iteration', self.engine_state.global_step, self.engine_state.global_step) def epoch_completed(self): eta_seconds = (self.max_epoch - (self.engine_state.epoch + 1)) * self.epoch_timer.avg / 1000 eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print( 'epoch: [{0}/{1} e][{2} b]\t' 'eta {eta}\t' 'lr {lr:.8f}\t' 'loss {loss:.3f}\t' '{losses}'.format( self.engine_state.epoch + 1, self.max_epoch, self.engine_state.batch, eta=eta_str, lr=self.engine_state.lr, loss=self.loss.epoch_ratio(self.engine_state.epoch), losses=self.losses.summary(self.engine_state.epoch) ) ) for name, dict in self.losses.meters.items(): for key, meter in dict.items(): self.logger.add_scalar('Loss/' + name + "_" + key + '_avg', meter.mean[self.engine_state.epoch], self.engine_state.epoch) # self.logger.add_scalar("Training/Trivial triplets in batch", self.zero_losses_count.epoch_ratio(self.engine_state.epoch), self.engine_state.epoch) if not self.used_body_parts_in_max.is_empty: for bp_id, bp_ratio in enumerate(self.used_body_parts_in_max.epoch_ratio(self.engine_state.epoch)): self.logger.add_scalar("Used body parts in training/bp {}".format(bp_id), bp_ratio, self.engine_state.epoch) def training_completed(self): print("Training completed") # TODO fix metrics affected by loss refactor print("Average image pairs that couldn't be compared within one batch: {}%".format(perc(self.invalid_pairwise_distances_count.total_ratio()))) print("Average body part pairs that couldn't be compared within one batch: {}%".format(perc(self.uncomparable_body_parts_pairs_count.total_ratio()))) # print("Average valid triplets during one epoch: {}%".format(perc(self.valid_triplets_mask_count.total_ratio()))) self.display_used_body_parts() def test_completed(self): print("Test completed") if not self.invalid_pairs_count_at_test_time.is_empty: print("Amount of pairs query-gallery that couldn't be compared: {}%".format(perc(self.invalid_pairs_count_at_test_time.ratio(), 3))) if not self.uncomparable_queries_at_test_time.is_empty: print("Amount of queries that couldn't be compared to any gallery sample: {}%".format(perc(self.uncomparable_queries_at_test_time.ratio(), 3))) def run_completed(self): timer_meters = [ self.total_run_timer, self.epoch_timer, self.batch_timer, self.data_loading_timer, self.feature_extraction_timer, self.loss_timer, self.optimizer_timer, self.performance_evaluation_timer, self.test_timer, self.test_batch_timer, ] table = [] for time_meter in timer_meters: table.append([time_meter.name, time_meter.average_time(), time_meter.total_time(), time_meter.count]) headers = ["Time metric name", "Average", "Total", "Count"] print(tabulate(table, headers, tablefmt="fancy_grid")) ######################## # UTILS # ######################## def display_used_body_parts(self): if self.used_body_parts_in_max.is_empty: return # plot histogram body_parts_used_for_training = self.used_body_parts_in_max.total_ratio() x_labels = range(0, len(body_parts_used_for_training)) x = np.arange(len(x_labels)) # the label locations width = 0.7 # the width of the bars fig, ax = plt.subplots(figsize=(24, 4)) rects = ax.bar(x, body_parts_used_for_training, width) # Add some text for x_labels, title and custom x-axis tick x_labels, etc. ax.set_ylabel('Selection percentage') ax.set_xlabel('Body part index') ax.set_title('Body parts used for training') ax.set_yticks(np.arange(0, 1.2, 0.1)) ax.yaxis.get_major_ticks()[-1].set_visible(False) ax.set_xticks(x) ax.set_xticklabels(x_labels) def autolabel(rects): """Attach a text label above each bar in *rects*, displaying its height.""" for rect in rects: height = rect.get_height() ax.annotate('{}%'.format(np.around(height*100, 2)), xy=(rect.get_x() + rect.get_width() / 2, height), xytext=(0, 3), # 3 points vertical offset textcoords="offset points", ha='center', va='bottom') autolabel(rects) fig.tight_layout() self.logger.add_figure("Body parts used for training", fig, self.engine_state.global_step)